Commit f8fc59f5a59ff320ccbaa3ac0433760678f6e587 - fastml

+33

-0

Makefile less more

	0	.PHONY: all libs semphy programs clean install
	1
	2	all: libs programs
	3
	4	debug: libs.debug
	5
	6	%: libs.% programs.%
	7	echo $@
	8
	9	libs: libs.all
	10
	11	programs: programs.all
	12
	13	programs.all: libs
	14	programs.debug: libs.debug
	15
	16	semphy: programs.semphy
	17
	18	install: programs.install
	19
	20	programs.install programs.all semphy: libs
	21
	22	clean: libs.clean programs.clean
	23
	24	libs.%:
	25	+cd libs;make $(*)
	26
	27	programs.%:
	28	+cd programs;make $(*)
	29
	30	tags: libs//.cpp libs//.h programs//.h programs//.cpp
	31	etags --members --language=c++ $^
	32

+88

-0

README less more

	0	FastML - program for computing maximum likelihood
	1	ancestral sequence reconstruction
	2
	3	The FastML program is a bioinformatics tool for the reconstruction of ancestral sequences based on the phylogenetic relations between homologous sequences.
	4	The program runs several algorithms that reconstruct the ancestral sequences with emphasis on an accurate reconstruction of both indels and characters.
	5
	6	URL: http://fastml.tau.ac.il/
	7
	8	Authors: Haim Ashkenazy, Osnat Penn, Adi Doron-Faigenboim, Ofir Cohen, Gina Cannarozzi, Oren Zomer and Tal Pupko
	9
	10	When using the FastML algorithm please cite:
	11	[1] Ashkenazy H, Penn O, Doron-Faigenboim A, Cohen O, Cannarozzi G, Zomer O, Pupko T. 2012
	12	FastML: a web server for probabilistic reconstruction of ancestral sequences
	13	Nucleic Acids Res. 40(Web Server issue):W580-4.
	14
	15	[2] Pupko T, Pe'er I, Hasegawa M, Graur D, Friedman N. 2002
	16	A branch-and-bound algorithm for the inference of ancestral amino-acid sequences when the replacement rate varies among sites: Application to the evolution of five gene families.
	17	Bioinformatics 18(8): 1116-1123. [pdf] [abs]
	18
	19	[3] Pupko T, Pe'er I, Shamir R, Graur D. 2000.
	20	A fast algorithm for joint reconstruction of ancestral amino-acid sequences.
	21	Mol. Biol. Evol. 17(6): 890-896. [pdf] [abs]
	22
	23	[4] Pupko, T. and Pe'er I. 2000.
	24	Maximum likelihood reconstruction of ancestral amino-acid sequences.
	25	Currents in Computational Molecular Biology. Ed. Miyano, S., Shamir, R, and Takagi, T. pp. 184-185. Universal Academy Press, Tokyo, Japan. [pdf]
	26
	27	Installation
	28	============
	29
	30	1. Unpack the archive by typing:
	31	% tar -xzf FastML.v3.1.tgz
	32
	33	2. Compile the package by typing:
	34	% cd FastML.v3.1
	35	% make
	36	(Running `make' takes a while)
	37
	38	3A. FastML uses Perl:
	39	Type "perl -v" and check that Perl is installed.
	40	If it's not installed, download and install it from: http://www.perl.org/
	41
	42	3B. To reconstruct the ML tree during FastML run RAxML and BioPerl should be installed in your system.
	43
	44	RAxML: Type "which raxmlHPC" and check that the program is found
	45	If it's not installed, download and install RAxML from: http://sco.h-its.org/exelixis/web/software/raxml/index.html
	46	BioPerl: Type "perl -e 'use Bio::SeqIO'" to check that BioPerl is installed.
	47	If it's not installed, download and install it from: http://www.bioperl.org/
	48
	49
	50	Usage
	51	=====
	52
	53	Run the Perl script: FastML.v3.1/www/fastml/FastML_Wrapper.pl
	54	(Note that you cannot move this script from of its directory, because it uses relative paths to other files in other directories. Sorry)
	55	FastML uses flags in the command line arguments: (for help, type: "perl FastML_Wrapper.pl")
	56
	57	USAGE: perl FastML_Wrapper.pl --MSA_File MSA_File --seqType [AA\|NUC\|CODON] --outDir OUTDIR
	58
	59	Required parameters:
	60	--MSA_File Input multiple sequence alignment in FASTA format
	61	--seqType Sequence type may be either of: nuc (nucleotides), aa (amino acids),
	62	or codon (nucleotides that will be treated as whole codons)
	63	--outDir FULL PATH of the output directory where all output files will be created
	64	(NOTE: EACH RUN must have its UNIQUE outDir.
	65	In case the outDir does not exists it will be created automatically)
	66	Optional parameters:
	67	--Tree <phylogenetic tree>
	68	--TreeAlg <NJ \| RAxML> - How to reconstruct the tree when a tree is not provided by the user; default=NJ
	69	--SubMatrix <JTT \| LG \| mtREV \| cpREV \| WAG \| DAYHOFF > amino acid options, the default is JTT.
	70	<JC_Nuc \| T92 \| HKY \| GTR> nucleotide options, the default is JC_Nuc.
	71	<yang \| empiriCodon> codon options, the default is yang.
	72	--OptimizeBL <yes \| no> default: yes
	73	--UseGamma <yes \| no> default: yes
	74	--Alpha <User provide alpha> (relevant only when UseGamma==yes)
	75	user alpha parameter of the gamma distribution [if alpha is not given, alpha and branches will be evaluated from the data]
	76	--jointReconstruction <yes \| no> default: yes
	77	--indelReconstruction <PARSIMONY\|ML\|BOTH> - which method is used for indel reconstruction
	78	--indelCutOff <Cutoff for indel vs Char> deafult = 0.5
	79
	80	EXAMPLE:
	81	> perl FastML.v3.1/www/fastml/FastML_Wrapper.pl --MSA_File MSA.aln --outDir /home/MSA.FastML --seqType aa --Tree tree.newick
	82	Will reconstruct ancestral sequences (both "joint" and "marginal") based on the proteins MSA in "MSA.aln" and the tree in "tree,newick" and output all results to the diretory "MSA.FastML" at the home directory
	83
	84	Copyrights
	85	==========
	86	* To modify the code, or use parts of it for other purposes, permission should be requested. Please contact Tal Pupko: talp@post.tau.ac.il
	87	* Please note that the use of the FastML program is for academic use only

+29

-0

libs/Makefile less more

	0	# $Id: Makefile 942 2006-10-18 12:28:12Z ninio $
	1
	2	# There might be need for a split (as done in programs/Makefile) becouse of a bug in make 3.80.1 - see
	3	# http://www.cygwin.com/ml/cygwin/2004-09/msg01659.html
	4
	5	LIBS= phylogeny
	6
	7	# all has to be the FIRST task!
	8	TASKS= all clean test depend debug All install doubleRep
	9	.PHONY: $(TASKS) $(LIBS)
	10
	11	define TASKS_template
	12	$(1): $$(addsuffix .$(1),$(LIBS))
	13	endef
	14
	15	$(foreach task,$(TASKS),$(eval $(call TASKS_template,$(task))))
	16
	17	define LIB_template
	18	$(1).%:
	19	+cd $(1) && make $$(*)
	20	endef
	21
	22	$(foreach lib,$(LIBS),$(eval $(call LIB_template,$(lib))))
	23
	24
	25
	26	$(LIBS):
	27	+cd $@ && make
	28

+82

-0

libs/phylogeny/.project less more

	0	<?xml version="1.0" encoding="UTF-8"?>
	1	<projectDescription>
	2	<name>phylogeny</name>
	3	<comment></comment>
	4	<projects>
	5	</projects>
	6	<buildSpec>
	7	<buildCommand>
	8	<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
	9	<triggers>clean,full,incremental,</triggers>
	10	<arguments>
	11	<dictionary>
	12	<key>?name?</key>
	13	<value></value>
	14	</dictionary>
	15	<dictionary>
	16	<key>org.eclipse.cdt.make.core.append_environment</key>
	17	<value>true</value>
	18	</dictionary>
	19	<dictionary>
	20	<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
	21	<value>all</value>
	22	</dictionary>
	23	<dictionary>
	24	<key>org.eclipse.cdt.make.core.buildArguments</key>
	25	<value></value>
	26	</dictionary>
	27	<dictionary>
	28	<key>org.eclipse.cdt.make.core.buildCommand</key>
	29	<value>make</value>
	30	</dictionary>
	31	<dictionary>
	32	<key>org.eclipse.cdt.make.core.buildLocation</key>
	33	<value>${workspace_loc:/phylogeny/Debug}</value>
	34	</dictionary>
	35	<dictionary>
	36	<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
	37	<value>clean</value>
	38	</dictionary>
	39	<dictionary>
	40	<key>org.eclipse.cdt.make.core.contents</key>
	41	<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
	42	</dictionary>
	43	<dictionary>
	44	<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
	45	<value>false</value>
	46	</dictionary>
	47	<dictionary>
	48	<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
	49	<value>true</value>
	50	</dictionary>
	51	<dictionary>
	52	<key>org.eclipse.cdt.make.core.enableFullBuild</key>
	53	<value>true</value>
	54	</dictionary>
	55	<dictionary>
	56	<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
	57	<value>all</value>
	58	</dictionary>
	59	<dictionary>
	60	<key>org.eclipse.cdt.make.core.stopOnError</key>
	61	<value>true</value>
	62	</dictionary>
	63	<dictionary>
	64	<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
	65	<value>true</value>
	66	</dictionary>
	67	</arguments>
	68	</buildCommand>
	69	<buildCommand>
	70	<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
	71	<arguments>
	72	</arguments>
	73	</buildCommand>
	74	</buildSpec>
	75	<natures>
	76	<nature>org.eclipse.cdt.core.ccnature</nature>
	77	<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
	78	<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
	79	<nature>org.eclipse.cdt.core.cnature</nature>
	80	</natures>
	81	</projectDescription>

+25

-0

libs/phylogeny/AddLog.cpp less more

	0	// $Id: AddLog.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#include "AddLog.h"
	6	#include <cmath>
	7
	8	const int tAddLog_Precompute::G_LOGADD = 500;
	9	const int tAddLog_Precompute::D_LOGADD = 50;
	10
	11	tAddLog_Precompute AddLogData;
	12
	13	int tAddLog_Precompute::d_logadd;
	14
	15	tAddLog_Precompute::tAddLog_Precompute(){
	16	d_logadd = int(D_LOGADDlog(10.0)G_LOGADD);
	17	logaddf = new double [d_logadd+1];
	18	for (int i=0; i<= d_logadd; i++)
	19	logaddf[i] = log(1.0+exp(-static_cast<double>(i)/G_LOGADD));
	20	}
	21
	22	tAddLog_Precompute::~tAddLog_Precompute(){
	23	delete [] logaddf;
	24	}

+67

-0

libs/phylogeny/AddLog.h less more

	0	// $Id: AddLog.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.00
	3	// last modified 2 Nov 2002
	4
	5	#ifndef __AddLog_h
	6	#define __AddLog_h
	7
	8	#include <iostream>
	9	using namespace std;
	10
	11	class tAddLog_Precompute {
	12	public:
	13
	14	tAddLog_Precompute();
	15	~tAddLog_Precompute();
	16
	17	double AddLog( double x, double y );
	18
	19	private:
	20	static const int D_LOGADD; // = 50; // y/x < 1e-D discard
	21	static const int G_LOGADD;// = 500; // step function look-up every 1/G
	22	static int d_logadd;
	23
	24	double *logaddf;
	25	};
	26
	27	extern tAddLog_Precompute AddLogData;
	28
	29	inline
	30	double
	31	AddLog(double x, double y ){
	32	return AddLogData.AddLog(x, y);
	33	}
	34
	35	inline double
	36	tAddLog_Precompute::AddLog(double x, double y ){
	37	if (x < y) {
	38	double dummy = x;
	39	x = y;
	40	y = dummy;
	41	}
	42
	43	#ifdef notdef
	44	return x + log(1 + exp(y-x));
	45	#endif
	46
	47	double z = (x-y)*G_LOGADD;
	48	int i = int(z);
	49	if( i < d_logadd ) x += ((i+1-z)logaddf[i] + (z-i)logaddf[i+1]);
	50	return x;
	51	}
	52
	53	#endif
	54
	55
	56	/*
	57	Folks,
	58
	59	In many of our program we use the AddLog procedure that compute the sum of
	60	two numbers in log form. Gill spent some time investigating faster versions
	61	of this procedure, which gave him 3-4 fold speedup on his program. Attached
	62	is my re-packaging of his solution. I think it will be useful in some of the
	63	code we use.
	64
	65	-Nir
	66	*/

+112

-0

libs/phylogeny/C_evalParamUSSRV.cpp less more

	0	// $Id: C_evalParamUSSRV.cpp 1915 2007-04-04 15:56:24Z privmane $
	1	#include "C_evalParamUSSRV.h"
	2
	3	// *********************
	4	// * USSRV *
	5	// *********************
	6
	7	MDOUBLE C_evalParamUSSRV::operator() (MDOUBLE param) {
	8
	9	setParam(param);
	10	MDOUBLE res = likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_baseSc,*_pModel,_weights);
	11	print(param,res);
	12	return -res;
	13	}
	14
	15	void C_evalAlphaUSSRV::setParam(MDOUBLE alpha)
	16	{
	17	if (_pModel->noOfCategor() == 1)
	18	errorMsg::reportError(" one category when trying to optimize alpha");
	19	_pModel->updateAlpha(alpha);
	20	}
	21
	22	void C_evalAlphaUSSRV::print(MDOUBLE alpha,MDOUBLE res) {
	23	LOG(5,<<" with Alpha = "<<alpha<<" logL = " <<res<<endl);
	24	}
	25
	26
	27	void C_evalNuUSSRV::setParam(MDOUBLE Nu)
	28	{
	29	_pModel->updateNu(Nu);
	30	}
	31
	32	void C_evalNuUSSRV::print(MDOUBLE nu,MDOUBLE res) {
	33	LOG(5,<<" with Nu = "<<nu<<" logL = " <<res<<endl);
	34	}
	35
	36	void C_evalFUSSRV::setParam(MDOUBLE f)
	37	{
	38	_pModel->updateF(f);
	39	}
	40
	41	void C_evalFUSSRV::print(MDOUBLE f,MDOUBLE res) {
	42	LOG(5,<<" with F = "<<f<<" logL = " <<res<<endl);
	43	}
	44
	45
	46	// *********************
	47	// * SSRV *
	48	// *********************
	49
	50	MDOUBLE C_evalParamSSRV::operator() (MDOUBLE param) {
	51
	52	setParam(param);
	53	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_ssrvSp,_weights);
	54	print(param,res);
	55	return -res;
	56	}
	57
	58	void C_evalAlphaSSRV::setParam(MDOUBLE alpha)
	59	{
	60	if (alpha<0)
	61	errorMsg::reportError("ERROR in C_evalAlphaSSRV::setParam, alpha is < 0 ");
	62
	63	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
	64	gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
	65	gammaDist->setAlpha(alpha);
	66	pMulRM->updateQ();
	67	}
	68
	69	void C_evalAlphaSSRV::print(MDOUBLE alpha,MDOUBLE res) {
	70	LOG(5,<<" with Alpha = "<<alpha<<" logL = " <<res<<endl);
	71	}
	72
	73
	74	void C_evalNuSSRV::setParam(MDOUBLE Nu)
	75	{
	76	if (Nu<0)
	77	errorMsg::reportError("C_evalNuSSRV::setParam, nu is < 0 ");
	78
	79	static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(Nu);
	80	}
	81
	82	void C_evalNuSSRV::print(MDOUBLE nu,MDOUBLE res) {
	83	LOG(5,<<" with Nu = "<<nu<<" logL = " <<res<<endl);
	84	}
	85
	86	void C_evalTrTvSSRV::setParam(MDOUBLE TrTv)
	87	{
	88	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
	89	static_cast<tamura92*>(pMulRM->getBaseRM())->changeTrTv(TrTv);
	90	pMulRM->updateQ();
	91	}
	92
	93	void C_evalTrTvSSRV::print(MDOUBLE TrTv,MDOUBLE res) {
	94	LOG(5,<<" with TrTv = "<<TrTv<<" logL = " <<res<<endl);
	95	}
	96
	97	void C_evalThetaSSRV::setParam(MDOUBLE Theta)
	98	{
	99	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
	100	static_cast<tamura92*>(pMulRM->getBaseRM())->changeTheta(Theta);
	101	pMulRM->updateFreq();
	102	pMulRM->updateQ();
	103	}
	104
	105	void C_evalThetaSSRV::print(MDOUBLE Theta,MDOUBLE res) {
	106	LOG(5,<<" with Theta = "<<Theta<<" logL = " <<res<<endl);
	107	}
	108
	109
	110
	111

+177

-0

libs/phylogeny/C_evalParamUSSRV.h less more

	0	// $Id: C_evalParamUSSRV.h 1915 2007-04-04 15:56:24Z privmane $
	1	#ifndef ___C_EVAL_PARAM_USSRV
	2	#define ___C_EVAL_PARAM_USSRV
	3
	4	#include "definitions.h"
	5
	6	#include "likelihoodComputation.h"
	7	#include "likelihoodComputation2USSRV.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "tree.h"
	12	#include "replacementModelSSRV.h"
	13	#include "tamura92.h"
	14	#include "stochasticProcessSSRV.h"
	15	#include "ussrvModel.h"
	16	#include "logFile.h"
	17
	18	// *********************
	19	// * USSRV *
	20	// *********************
	21
	22	class C_evalParamUSSRV {
	23	public:
	24	C_evalParamUSSRV(const tree& et,
	25	const sequenceContainer& sc,
	26	const sequenceContainer& baseSc,
	27	ussrvModel* pModel,
	28	const Vdouble* weights = NULL)
	29	: _et(et),_sc(sc),_baseSc(baseSc),_pModel(pModel),_weights(weights){}
	30
	31	MDOUBLE operator() (MDOUBLE param) ;
	32	virtual ~C_evalParamUSSRV(){}
	33
	34	protected:
	35	const tree& _et;
	36	const sequenceContainer& _sc;
	37	const sequenceContainer& _baseSc;
	38	ussrvModel* _pModel;
	39	const Vdouble * _weights;
	40
	41
	42	protected:
	43	virtual void setParam(MDOUBLE param) = 0;
	44	virtual void print(MDOUBLE param,MDOUBLE res) =0;
	45	};
	46
	47
	48	class C_evalAlphaUSSRV : public C_evalParamUSSRV {
	49	public:
	50	C_evalAlphaUSSRV(const tree& et,
	51	const sequenceContainer& sc,
	52	const sequenceContainer& baseSc,
	53	ussrvModel* pModel,
	54	const Vdouble *weights = NULL)
	55	: C_evalParamUSSRV(et,sc,baseSc,pModel,weights)
	56	{}
	57
	58	protected:
	59	virtual void setParam(MDOUBLE alpha);
	60	virtual void print(MDOUBLE alpha,MDOUBLE res);
	61	};
	62
	63
	64
	65	class C_evalNuUSSRV : public C_evalParamUSSRV{
	66	public:
	67	C_evalNuUSSRV( const tree& et,
	68	const sequenceContainer& sc,
	69	const sequenceContainer& baseSc,
	70	ussrvModel* pModel,
	71	const Vdouble * weights = NULL)
	72	: C_evalParamUSSRV(et,sc,baseSc,pModel,weights){}
	73
	74	protected:
	75	virtual void setParam(MDOUBLE Nu);
	76	virtual void print(MDOUBLE nu,MDOUBLE res);
	77	};
	78
	79	class C_evalFUSSRV : public C_evalParamUSSRV{
	80	public:
	81	C_evalFUSSRV( const tree& et,
	82	const sequenceContainer& sc,
	83	const sequenceContainer& baseSc,
	84	ussrvModel* pModel,
	85	const Vdouble * weights = NULL)
	86	: C_evalParamUSSRV(et,sc,baseSc,pModel,weights){}
	87
	88	protected:
	89	virtual void setParam(MDOUBLE F);
	90	virtual void print(MDOUBLE f,MDOUBLE res);
	91	};
	92
	93	// *********************
	94	// * SSRV *
	95	// *********************
	96
	97	class C_evalParamSSRV {
	98	public:
	99	C_evalParamSSRV(const tree& et,
	100	const sequenceContainer& sc,
	101	stochasticProcessSSRV& ssrvSp,
	102	const Vdouble* weights = NULL)
	103	: _et(et),_sc(sc),_ssrvSp(ssrvSp),_weights(weights){}
	104
	105	MDOUBLE operator() (MDOUBLE param) ;
	106	virtual ~C_evalParamSSRV(){}
	107
	108	protected:
	109	const tree& _et;
	110	const sequenceContainer& _sc;
	111	stochasticProcessSSRV& _ssrvSp;
	112	const Vdouble * _weights;
	113
	114
	115	protected:
	116	virtual void setParam(MDOUBLE param) = 0;
	117	virtual void print(MDOUBLE param,MDOUBLE res) =0;
	118	};
	119
	120
	121	class C_evalAlphaSSRV : public C_evalParamSSRV {
	122	public:
	123	C_evalAlphaSSRV(const tree& et,
	124	const sequenceContainer& sc,
	125	stochasticProcessSSRV& ssrvSp,
	126	const Vdouble *weights = NULL)
	127	: C_evalParamSSRV(et,sc,ssrvSp,weights)
	128	{}
	129
	130	protected:
	131	virtual void setParam(MDOUBLE alpha);
	132	virtual void print(MDOUBLE alpha,MDOUBLE res);
	133	};
	134
	135
	136
	137	class C_evalNuSSRV : public C_evalParamSSRV{
	138	public:
	139	C_evalNuSSRV( const tree& et,
	140	const sequenceContainer& sc,
	141	stochasticProcessSSRV& ssrvSp,
	142	const Vdouble * weights = NULL)
	143	: C_evalParamSSRV(et,sc,ssrvSp,weights){}
	144
	145	protected:
	146	virtual void setParam(MDOUBLE Nu);
	147	virtual void print(MDOUBLE nu,MDOUBLE res);
	148	};
	149
	150	class C_evalTrTvSSRV : public C_evalParamSSRV{
	151	public:
	152	C_evalTrTvSSRV(const tree& et,
	153	const sequenceContainer& sc,
	154	stochasticProcessSSRV& ssrvSp,
	155	const Vdouble * weights = NULL)
	156	: C_evalParamSSRV(et,sc,ssrvSp,weights){}
	157
	158	protected:
	159	virtual void setParam(MDOUBLE TrTv);
	160	virtual void print(MDOUBLE TrTv,MDOUBLE res);
	161	};
	162
	163	class C_evalThetaSSRV : public C_evalParamSSRV{
	164	public:
	165	C_evalThetaSSRV(const tree& et,
	166	const sequenceContainer& sc,
	167	stochasticProcessSSRV& ssrvSp,
	168	const Vdouble * weights = NULL)
	169	: C_evalParamSSRV(et,sc,ssrvSp,weights){}
	170
	171	protected:
	172	virtual void setParam(MDOUBLE Theta);
	173	virtual void print(MDOUBLE Theta,MDOUBLE res);
	174	};
	175
	176	#endif

+52

-0

libs/phylogeny/ConversionUtils.cpp less more

	0	#include "ConversionUtils.h"
	1	#include "someUtil.h"
	2	#include "errorMsg.h"
	3
	4	#include <cmath>
	5
	6	using namespace std;
	7
	8	void appendIntToString (string& ioString, const int inValue) {
	9	std::ostringstream o;
	10	o << ioString<< inValue;
	11	ioString = o.str();
	12	}
	13
	14	string appendInt2string(const int x)
	15	{
	16	string res;
	17	appendIntToString(res, x);
	18	return res;
	19	}
	20
	21	string appendDouble2string(const double x, const int lenght){
	22
	23	// first getting the integer part:
	24	int theIntegerPart = static_cast<int>(x);
	25	double theRemainingPart = fabs(x-theIntegerPart);
	26	int integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght));
	27	string part1, part2;
	28	appendIntToString(part1, theIntegerPart);
	29	appendIntToString(part2, integerRepresentingTheRemainingPart);
	30	while (part2.length()<lenght){
	31	part2.insert(0, "0");
	32	}
	33
	34	string result = part1;
	35	result += ".";
	36	result += part2;
	37
	38	// removing 0 from the end
	39	int i = result.length()-1;
	40	while (result[i]!='.' && i>0 && result[i]=='0'){
	41	result.erase(i);
	42	i--;
	43	}
	44
	45	// removing "." if this is the last character in the string.
	46	if (result[result.length()-1]=='.')
	47	result.erase(result.length()-1);
	48
	49	return result;
	50	}
	51

+51

-0

libs/phylogeny/ConversionUtils.h less more

	0	//utility class that converts between data types
	1	#ifndef ___ConversionUtils_h
	2	#define ___ConversionUtils_h
	3
	4	#include <sstream>
	5	#include <string>
	6	#include "definitions.h"
	7
	8	using namespace std;
	9
	10	//a function that turns an integer to string
	11
	12	void appendIntToString (string& ioString, const int inValue);
	13	string appendDouble2string(const double x, int const howManyDigitsAfterTheDot=5);
	14	string appendInt2string(const int x);
	15
	16
	17	// Trims spaces at the left side of a string
	18	static inline string trim_left(const string& str )
	19	{
	20	int i=str.find_first_not_of(" \t");
	21	if(str.size()==0 \|\| i >= str.size())
	22	return str;
	23	return str.substr( i ) ;
	24	}
	25
	26
	27	////
	28	// Trims spaces at the right side of a string
	29	static inline string trim_right(const string& str )
	30	{
	31	int i=str.find_last_not_of(" \t");
	32	if(str.size()==0 \|\| i >= str.size())
	33	return str;
	34	return str.substr(0, i + 1);
	35	}
	36
	37	////
	38	// Trims spaces at both sides of a string
	39	static inline string trim(const string& str )
	40	{
	41	return trim_left(trim_right(str));
	42	}
	43
	44
	45	#endif
	46
	47
	48
	49
	50

+178

-0

libs/phylogeny/GLaguer.cpp less more

	0	// $Id: GLaguer.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "definitions.h"
	2	#include "GLaguer.h"
	3
	4	#include "errorMsg.h"
	5	#include "gammaUtilities.h"
	6
	7
	8
	9	GLaguer::GLaguer(const int pointsNum, const MDOUBLE alf, Vdouble & points, Vdouble & weights)
	10	{
	11	gaulag(_points, _weights, alf, pointsNum);
	12
	13	weights = _weights;
	14	points = _points;
	15	}
	16
	17
	18	//Input: alf = the alpha parameter of the Laguerre polynomials
	19	// pointsNum = the polynom order
	20	//Output: the abscissas and weights are stored in the vecotrs x and w, respectively.
	21	//Discreption: given alf, the alpha parameter of the Laguerre polynomials, the function returns the abscissas and weights
	22	// of the n-point Guass-Laguerre quadrature formula.
	23	// The smallest abscissa is stored in x[0], the largest in x[pointsNum - 1].
	24	void GLaguer::gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum)
	25	{
	26	x.resize(pointsNum, 0.0);
	27	w.resize(pointsNum, 0.0);
	28	const int MAXIT=10000;
	29	const MDOUBLE EPS=1.0e-6;
	30	int i,its,j;
	31	MDOUBLE ai,p1,p2,p3,pp,z=0.0,z1;
	32
	33	int n= x.size();
	34	for (i=0;i<n;i++) {
	35	//loops over the desired roots
	36	if (i == 0) { //initial guess for the smallest root
	37	z=(1.0+alf)(3.0+0.92alf)/(1.0+2.4n+1.8alf);
	38	} else if (i == 1) {//initial guess for the second smallest root
	39	z += (15.0+6.25alf)/(1.0+0.9alf+2.5*n);
	40	} else { //initial guess for the other roots
	41	ai=i-1;
	42	z += ((1.0+2.55ai)/(1.9ai)+1.26aialf/
	43	(1.0+3.5ai))(z-x[i-2])/(1.0+0.3*alf);
	44	}
	45	for (its=0;its<MAXIT;its++) { //refinement by Newton's method
	46	p1=1.0;
	47	p2=0.0;
	48	for (j=0;j<n;j++) { //Loop up the recurrence relation to get the Laguerre polynomial evaluated at z.
	49	p3=p2;
	50	p2=p1;
	51	p1=((2j+1+alf-z)p2-(j+alf)*p3)/(j+1);
	52	}
	53	//p1 is now the desired Laguerre polynomial. We next compute pp, its derivative,
	54	//by a standard relation involving also p2, the polynomial of one lower order.
	55	pp=(np1-(n+alf)p2)/z;
	56	z1=z;
	57	z=z1-p1/pp; //Newton's formula
	58	if (fabs(z-z1) <= EPS)
	59	break;
	60	}
	61	if (its >= MAXIT)
	62	errorMsg::reportError("too many iterations in gaulag");
	63	x[i]=z;
	64	w[i] = -exp(gammln(alf+n)-gammln(MDOUBLE(n)))/(ppnp2);
	65	}
	66	}
	67
	68
	69	void GLaguer::GetPhylipLaguer(const int categs, MDOUBLE alpha, Vdouble & points, Vdouble & weights)
	70	{
	71	/* calculate rates and probabilities to approximate Gamma distribution
	72	of rates with "categs" categories and shape parameter "alpha" using
	73	rates and weights from Generalized Laguerre quadrature */
	74
	75	points.resize(categs, 0.0);
	76	weights.resize(categs, 0.0);
	77	long i;
	78	raterootarray lgroot; /* roots of GLaguerre polynomials */
	79	double f, x, xi, y;
	80
	81	alpha = alpha - 1.0;
	82	lgroot[1][1] = 1.0+alpha;
	83	for (i = 2; i <= categs; i++)
	84	{
	85	cerr<<lgroot[i][1]<<"\t";
	86	lgr(i, alpha, lgroot); /* get roots for L^(a)_n */
	87	cerr<<lgroot[i][1]<<endl;
	88	}
	89	/* here get weights */
	90	/* Gamma weights are (1+a)(1+a/2) ... (1+a/n)x_i/((n+1)^2 [L_{n+1}^a(x_i)]^2) /
	91	f = 1;
	92	for (i = 1; i <= categs; i++)
	93	f *= (1.0+alpha/i);
	94	for (i = 1; i <= categs; i++) {
	95	xi = lgroot[categs][i];
	96	y = glaguerre(categs+1, alpha, xi);
	97	x = fxi/((categs+1)(categs+1)yy);
	98	points[i-1] = xi/(1.0+alpha);
	99	weights[i-1] = x;
	100	}
	101	}
	102
	103
	104	void GLaguer::lgr(long m, double alpha, raterootarray lgroot)
	105	{ /* For use by initgammacat. Get roots of m-th Generalized Laguerre
	106	polynomial, given roots of (m-1)-th, these are to be
	107	stored in lgroot[m][] */
	108	long i;
	109	double upper, lower, x, y;
	110	bool dwn; /* is function declining in this interval? */
	111
	112	if (m == 1) {
	113	lgroot[1][1] = 1.0+alpha;
	114	} else {
	115	dwn = true;
	116	for (i=1; i<=m; i++) {
	117	if (i < m) {
	118	if (i == 1)
	119	lower = 0.0;
	120	else
	121	lower = lgroot[m-1][i-1];
	122	upper = lgroot[m-1][i];
	123	}
	124	else { /* i == m, must search above */
	125	lower = lgroot[m-1][i-1];
	126	x = lgroot[m-1][m-1];
	127	do {
	128	x = 2.0*x;
	129	y = glaguerre(m, alpha,x);
	130	} while ((dwn && (y > 0.0)) \|\| ((!dwn) && (y < 0.0)));
	131	upper = x;
	132	}
	133	while (upper-lower > 0.000000001) {
	134	x = (upper+lower)/2.0;
	135	if (glaguerre(m, alpha, x) > 0.0) {
	136	if (dwn)
	137	lower = x;
	138	else
	139	upper = x;
	140	}
	141	else {
	142	if (dwn)
	143	upper = x;
	144	else
	145	lower = x;
	146	}
	147	}
	148	lgroot[m][i] = (lower+upper)/2.0;
	149	dwn = !dwn; // switch for next one
	150	}
	151	}
	152	} /* lgr */
	153
	154
	155	double GLaguer::glaguerre(long m, double b, double x)
	156	{ /* Generalized Laguerre polynomial computed recursively.
	157	For use by initgammacat */
	158	long i;
	159	double gln, glnm1, glnp1; /* L_n, L_(n-1), L_(n+1) */
	160
	161	if (m == 0)
	162	return 1.0;
	163	else {
	164	if (m == 1)
	165	return 1.0 + b - x;
	166	else {
	167	gln = 1.0+b-x;
	168	glnm1 = 1.0;
	169	for (i=2; i <= m; i++) {
	170	glnp1 = ((2(i-1)+b+1.0-x)gln - (i-1+b)*glnm1)/i;
	171	glnm1 = gln;
	172	gln = glnp1;
	173	}
	174	return gln;
	175	}
	176	}
	177	} /* glaguerre */

+30

-0

libs/phylogeny/GLaguer.h less more

	0	// $Id: GLaguer.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___GLAGUER
	2	#define ___GLAGUER
	3
	4	#include "definitions.h"
	5	#include <vector>
	6	using namespace std;
	7
	8	typedef double raterootarray[35][35];
	9
	10	class GLaguer
	11	{
	12	public:
	13	explicit GLaguer(const int pointsNum, const MDOUBLE alpha, Vdouble & points, Vdouble & weights);
	14
	15	void GetPhylipLaguer(const int pointsNum, MDOUBLE alf, Vdouble & points, Vdouble & weights);
	16
	17	private:
	18	void gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum);
	19
	20	void lgr(long m, double alpha, raterootarray lgroot);
	21	double glaguerre(long m, double b, double x);
	22
	23
	24	private:
	25	Vdouble _points;
	26	Vdouble _weights;
	27	};
	28
	29	#endif

+156

-0

libs/phylogeny/GamMixtureOptimizer.cpp less more

	0	#include "GamMixtureOptimizer.h"
	1	#include "someUtil.h"
	2	#include "optGammaMixtureEM.h"
	3	#include "optGammaMixtureLS.h"
	4
	5	#include <fstream>
	6	#include <algorithm>
	7	#include <ctime>
	8	using namespace std;
	9
	10
	11
	12	GamMixtureOptimizer::GamMixtureOptimizer(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p)
	13	{
	14	_pSc = &sc;
	15	_pTree = &inTree;
	16	_pSp = pSp;
	17	_unObservableData_p = unObservableData_p;
	18	_tolOptSpecific = 0.001;
	19
	20	}
	21
	22
	23	GamMixtureOptimizer::~GamMixtureOptimizer()
	24	{
	25	}
	26
	27
	28	///////////////////////////////////////////////////////////////////////////////////////////////////////////
	29	//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
	30	//The function starts form few starting points.
	31	//For each point it tries to optimize the likellihood doing only a small number of iterations.
	32	//It then picks the best points (highest likelihood) and continue the maximization for these points only.
	33	//This can be repeated a number of times, each cycle with a different optimization algorithm.
	34	//The best gammaMixture is stored in _sp and the best likelihood is returned.
	35	//input Parameters:
	36	//pointsNum: a vector with the number of points to peformed the current cycle of optimization.
	37	//iterNum: the number of iterations to perform in each cycle.
	38	//OptAlgs: the optimization algorithm to be performed in each cycle.
	39	//tol = for determining convergence in the maximization process.
	40	MDOUBLE GamMixtureOptimizer::findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector<OptimAlg> OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF/= NULL/)
	41	{
	42	//make sure that the number of points in each cycle is not bigger than the previous cycle.
	43	int i;
	44	for (i = 0; i < pointsNum.size()-1; ++i)
	45	{
	46	if (pointsNum[i] < pointsNum[i+1])
	47	errorMsg::reportError("input error in GamMixtureOptimizer::findBestParamManyStarts()");
	48	}
	49
	50	//create starting distributions
	51	vector<mixtureDistribution*> distVec;
	52	const mixtureDistribution * pMixture = getMixtureDist();
	53	for (i = 0; i < pointsNum[0]; ++i)
	54	{
	55	//the first distribution will be the current one
	56	if (i == 0)
	57	distVec.push_back(new mixtureDistribution(*pMixture));
	58	else
	59	distVec.push_back(new mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15));
	60	}
	61
	62	//make a small number of iterations for all random starts
	63	int numOfOptCycles = pointsNum.size();
	64	Vdouble likelihoodVec;
	65	for (i = 0; i < numOfOptCycles; ++i)
	66	{
	67	if (i != 0)
	68	{
	69	vector<mixtureDistribution*> tmpDistVec(0);
	70	//sort results and continue optimization only with the best (pointsNum[i]) points
	71	Vdouble sortedL = likelihoodVec;
	72	sort(sortedL.begin(),sortedL.end());
	73	MDOUBLE threshold = sortedL[sortedL.size()- pointsNum[i]];
	74	for (int j = 0; j < likelihoodVec.size(); ++j)
	75	{
	76	if (likelihoodVec[j] >= threshold)
	77	tmpDistVec.push_back(distVec[j]);
	78	else
	79	delete distVec[j];
	80	}
	81	distVec.clear();
	82	distVec = tmpDistVec;
	83	}
	84
	85	likelihoodVec.clear();
	86	likelihoodVec.resize(pointsNum[i]);
	87	int c;
	88	for (c = 0; c < pointsNum[i]; ++c)
	89	{
	90	cerr <<"optimizing point " <<c<<endl;
	91	MDOUBLE ll = optimizeParam(distVec[c], iterNum[i], OptAlgs[i], tols[i], pWeights, pOutF);
	92	cerr<<"pointi: "<<c<<" likelihood = "<<ll<<endl;
	93	likelihoodVec[c] = ll;
	94	}
	95	}
	96
	97	Vdouble sortedL = likelihoodVec;
	98	sort(sortedL.begin(),sortedL.end());
	99	MDOUBLE bestL = sortedL[likelihoodVec.size() - 1];
	100	for (i = 0; i < likelihoodVec.size(); ++i)
	101	{
	102	if (bestL == likelihoodVec[i])
	103	{
	104	_pSp->setDistribution(distVec[i]);
	105	}
	106	delete distVec[i];
	107	}
	108	distVec.clear();
	109	return bestL;
	110	}
	111
	112	MDOUBLE GamMixtureOptimizer::findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	113	{
	114	mixtureDistribution* pInDistribution = static_cast<mixtureDistribution*>(_pSp->distr());
	115	return optimizeParam(pInDistribution, maxIterations, alg, tol, pWeights, pOutF);
	116	}
	117
	118
	119	MDOUBLE GamMixtureOptimizer::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	120	{
	121	MDOUBLE res = 0.0;
	122	switch (alg)
	123	{
	124	case EM: {
	125	optGammaMixtureEM emOpt(_pSp, _pSc, *_pTree);
	126	res = emOpt.optimizeParam(pInDistribution, maxIterations, tol, _tolOptSpecific, pOutF);
	127	break;
	128	}
	129	case ONE_DIM: {
	130	optGammaMixtureLS lsOpt(_pSp, _pSc, _pTree,MAXIMUM_ALPHA_PARAM,MAXIMUM_BETA_PARAM,_unObservableData_p);
	131	res = lsOpt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::ONE_DIM);
	132	MDOUBLE resRecompute = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_pTree,_pSc,*_pSp,pWeights,_unObservableData_p);
	133	if(!DEQUAL(res,resRecompute)){
	134	LOGnOUT(3,<<"--- error: different likelihood after GamMixtureOptimizer::optimizeParam,diff= "<<res-resRecompute <<"\n");
	135	}
	136	break;
	137	}
	138	//case TX_CONJUGATE_DERIVATIVES:
	139	// {
	140	// txGamMixtureOptimizer txOpt(_pSp, _pSc, _pTree);
	141	// txOpt.setOptimizationParameters(tol, _tolOptSpecific, _tolOptSpecific, _tolOptSpecific);
	142	// res = txOpt.optimizeParam(pInDistribution, maxIterations, pWeights, alg, pOutF);
	143	// break;
	144	// }
	145	//case NR_CONJUGATE_DERIVATIVES:
	146	// {
	147	// optGammaMixtureLS opt(_pSp, _pSc, _pTree);
	148	// res = opt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::CONJUGATE_DERIVATIVES, pOutF);
	149	// break;
	150	// }
	151	default:
	152	errorMsg::reportError("unknown optimization algorithm in GamMixtureOptimizer::optimizeParam()");
	153	}
	154	return res;
	155	}

+52

-0

libs/phylogeny/GamMixtureOptimizer.h less more

	0	#ifndef __GAMMIXTURE_OPTIMIZER
	1	#define __GAMMIXTURE_OPTIMIZER
	2	/************************************************************
	3	GamMixtureOptimizer class is used to find the best Gamma mixture parameters.
	4	The parameters to otimized are the alpha and beta of each component and the components probabilities.
	5	The optimizer can choose between several optimization algorithms (EM, ConjugateDerivatives, etc).
	6	The interface to the optimizer is the functions:
	7	1. findBestParam() = given a gammaMixture - finds the best parameters.
	8	2. findBestParamManyStarts() - finds the best parameters but starts from many initial points.
	9	3. SetOptAlg() - choose the optimization algorithm to be used.
	10	************************************************************/
	11	#include "definitions.h"
	12	#include "stochasticProcess.h"
	13	#include "sequenceContainer.h"
	14	#include "tree.h"
	15	#include "mixtureDistribution.h"
	16	#include "unObservableData.h"
	17
	18
	19
	20	class GamMixtureOptimizer{
	21	public:
	22	enum OptimAlg {EM, ONE_DIM, TX_CONJUGATE_DERIVATIVES, NR_CONJUGATE_DERIVATIVES};
	23	public:
	24
	25	explicit GamMixtureOptimizer(stochasticProcess* cur_sp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p = NULL);
	26	virtual ~GamMixtureOptimizer();
	27
	28	const stochasticProcess* getSp() const {return _pSp;}
	29	const mixtureDistribution* getMixtureDist() const {return static_cast<mixtureDistribution*>(_pSp->distr());}
	30
	31	MDOUBLE findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector<OptimAlg> OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF = NULL);
	32	//return the logLikelihood. the final distribution is stored in the stochasticProcess
	33	MDOUBLE findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
	34
	35	void setTolOptSpecific(const MDOUBLE tol) {_tolOptSpecific = tol;}
	36
	37	private:
	38	MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF);
	39
	40
	41	private:
	42	stochasticProcess* _pSp;
	43	const sequenceContainer* _pSc;
	44	const tree* _pTree;
	45	unObservableData* _unObservableData_p;
	46
	47	MDOUBLE _tolOptSpecific; //tolerance specific to the optimization algorithm
	48	};
	49
	50	#endif
	51

+24

-0

libs/phylogeny/HIVb.dat.q less more

	0	""
	1	"0.16315391 "
	2	"0.0026528488 0.15680618 "
	3	"0.77200021 0.0026528488 9.3704985 "
	4	"0.065662251 0.18661252 0.045663061 0.0026528488 "
	5	"0.029241185 1.8153444 0.35657046 0.0026528488 0.0026528488 "
	6	"0.7859595 0.039751241 0.042054709 5.6172481 0.0026528488 1.3583647 "
	7	"1.1329574 1.9384101 0.17158679 1.5057888 0.47638319 0.032849536 2.0839453 "
	8	"0.044971782 4.796584 4.0566567 1.0170492 0.12737547 3.7434084 0.063530422 0.0026528488 "
	9	"0.0026528488 0.35934906 0.3610872 0.0093800488 0.0026528488 0.0026528488 0.0032315889 0.0026528488 0.054707578 "
	10	"0.11420832 0.37215595 0.0026528488 0.0046480457 0.068855751 0.79296833 0.0026528488 0.0026528488 0.92409864 3.1615537 "
	11	"0.0026528488 10.850151 4.1938515 0.0026528488 0.0026528488 3.4738365 2.4484839 0.27680089 0.0026528488 0.17101271 0.04324117 "
	12	"0.009902713 1.3338205 0.0026528488 0.0026528488 0.0026528488 0.1611213 0.093268326 0.0026528488 0.0026528488 5.9458299 2.8224242 0.68043448 "
	13	"0.0074953058 0.0026528488 0.0026528488 0.0026528488 4.9333171 0.0026528488 0.0026528488 0.15469345 0.077228672 1.803067 4.5230222 0.018180397 0.099760378 "
	14	"1.1259592 0.68101281 0.0039239772 0.018180397 0.0026528488 2.3727663 0.0063788279 0.0026528488 1.3015831 0.021784823 1.1022958 0.016652568 0.0026528488 0.0026528488 "
	15	"1.3085601 1.8459052 6.9741802 0.28026286 2.4900381 0.061711098 0.0026528488 2.324113 0.20307398 0.64624988 0.49218621 0.26746605 0.0026528488 0.50747511 2.8532025 "
	16	"8.4457685 1.5220348 3.6538588 0.14576024 0.39260517 0.12924096 0.15374532 0.19610654 0.37755025 4.5693569 0.023221606 2.4785142 2.6211525 0.0074953058 1.0686577 4.7385556 "
	17	"0.0026528488 0.52597396 0.0026528488 0.0026528488 1.3968681 0.014142867 0.0026528488 0.64556544 0.036884095 0.0026528488 0.39731344 0.0026528488 0.047262092 0.44002431 0.023584144 0.013196755 0.0026528488 "
	18	"0.0026528488 0.0052623288 0.93601524 0.35795048 4.0213579 0.059971891 0.042054709 0.0026528488 9.9186301 0.078613459 0.059416384 0.0026528488 0.0026528488 8.13894 0.016149535 0.34382193 0.056055755 0.67924601 "
	19	"4.0399067 0.043106352 0.014142867 0.55599996 0.22285362 0.011097026 0.54567507 0.50571521 0.0026528488 9.4117238 0.74829436 0.14104083 3.6361006 0.38374731 0.0026528488 0.039751241 0.37629386 0.0026528488 0.021784823 "
	20
	21
	22
	23	"0.060490222 0.066039665 0.044127815 0.042109048 0.020075899 0.053606488 0.071567447 0.072308239 0.022293943 0.069730629 0.098851122 0.056968211 0.019768318 0.028809447 0.046025282 0.05060433 0.053636813 0.033011601 0.028350243 0.061625237 "

+23

-0

libs/phylogeny/HIVw.dat.q less more

	0	""
	1	"0.021810606 "
	2	"0.18082842 0.046923924 "
	3	"1.2987859 0.019752881 8.6119047 "
	4	"0.049094712 0.83857481 0.017714543 0.0014641764 "
	5	"0.0014641764 3.1258994 0.10016958 0.0014641764 0.0014641764 "
	6	"1.6291158 0.0073686726 0.059013922 3.5501299 0.0014641764 0.93899388 "
	7	"0.54716271 3.9350911 0.017714543 3.0445791 0.014343013 0.017714543 4.3281346 "
	8	"0.0014641764 2.0041793 2.5180202 0.67873067 0.0014641764 5.4310694 0.0014641764 0.0014641764 "
	9	"0.0014641764 0.39260132 0.28903662 0.042497426 0.0014641764 0.010022346 0.011435569 0.0014641764 0.0014641764 "
	10	"0.046923924 0.17182315 0.0014641764 0.0014641764 0.0014641764 0.8464345 0.038021439 0.014343013 0.51650871 2.6655214 "
	11	"0.17358807 11.681111 3.1232346 0.26188639 0.0014641764 3.8275035 7.0170946 0.081825497 0.065612672 0.23938727 0.0014641764 "
	12	"0.0014641764 0.96240899 0.059013922 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.014343013 0.0014641764 5.0679244 3.3336075 1.1993479 "
	13	"0.17509295 0.0014641764 0.0014641764 0.0014641764 0.1062872 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.43423957 2.1926949 0.0014641764 0.0014641764 "
	14	"0.29570799 0.11851717 0.10098366 0.0014641764 0.0014641764 0.89168927 0.0014641764 0.0014641764 4.0834122 0.0014641764 2.8788489 0.032776467 0.0014641764 0.010022346 "
	15	"2.5166849 2.4452448 4.2665807 0.12529865 0.32854654 0.046923924 0.0014641764 1.838906 0.21235155 0.21672475 1.7991682 0.0014641764 0.11495981 1.2531563 4.1726098 "
	16	"7.0696878 0.27181058 1.3300754 0.18460189 0.0014641764 0.059472209 0.13433613 0.014343013 0.28099302 2.7419485 0.0014641764 1.185403 2.170826 0.033533153 1.2700295 1.856807 "
	17	"0.0014641764 1.7469498 0.0014641764 0.0014641764 1.6102836 0.012981329 0.0014641764 0.82749392 0.0014641764 0.0014641764 0.40127511 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.32257563 0.0014641764 "
	18	"0.0014641764 0.0014641764 1.4831375 0.66811539 2.4446914 0.0014641764 0.0014641764 0.0014641764 13.906425 0.033533153 0.0014641764 0.0014641764 0.16960961 1.2086132 0.0014641764 0.27325689 0.14366733 0.0014641764 "
	19	"7.2650675 0.081825497 0.021810606 0.85445233 0.0014641764 0.0014641764 0.64409704 0.81883185 0.24231504 7.2690793 0.86487141 0.037501949 4.3246792 0.66766443 0.0014641764 0.25261054 0.0014641764 0.0014641764 0.39673909 "
	20
	21
	22	"0.0377494 0.057321 0.0891129 0.0342034 0.0240105 0.0437824 0.0618606 0.0838496 0.0156076 0.0983641 0.0577867 0.0641682 0.0158419 0.0422741 0.0458601 0.0550846 0.0813774 0.019597 0.0205847 0.0515639 "

+67

-0

libs/phylogeny/KH_calculation.cpp less more

	0	#include "KH_calculation.h"
	1
	2	namespace KH_calculation {
	3
	4	double get_phi (double z)
	5	{
	6	// constants
	7	double a1 = 0.254829592;
	8	double a2 = -0.284496736;
	9	double a3 = 1.421413741;
	10	double a4 = -1.453152027;
	11	double a5 = 1.061405429;
	12	double p = 0.3275911;
	13
	14	// Save the sign of z
	15	int sign = 1;
	16	if (z < 0)
	17	{
	18	sign = -1;
	19	}
	20	z = fabs(z)/sqrt(2.0);
	21
	22	// A&S formula 7.1.26
	23	double t = 1.0/(1.0 + p*z);
	24	double y = 1.0 - (((((a5t + a4)t) + a3)t + a2)t + a1)texp(-z*z);
	25
	26	return 0.5(1.0 + signy);
	27	}
	28
	29
	30	double calc_p_value_kh (const Vdouble & LogLikePerPositionA, const Vdouble & LogLikePerPositionB)
	31	{
	32	//calc esteemated variance of delta of KH (Assessing the Uncertainty in Phylogenetic Inference, Nielsen, pg 484)
	33	//delta(X) = LL(A) - LL(B)
	34	//H0: E(delta(X)) <= 0 ---> tree B is either better or equal to tree A
	35	//H1: E(delta(X)) > 0 ---> tree A is better than tree B
	36	int num_pos = LogLikePerPositionA.size();
	37	double varDeltaX = 0;
	38	double sum_diffs = 0;
	39	double avg_diff = 0;
	40	for (int i=0; i < num_pos; ++i)
	41	{
	42	sum_diffs += (LogLikePerPositionA[i] - LogLikePerPositionB[i]);
	43	}
	44	avg_diff = sum_diffs / num_pos;
	45
	46	double sum_squares = 0;
	47	double sqr_diff = 0;
	48	for (int i=0; i < num_pos; ++i)
	49	{
	50	sqr_diff = pow (LogLikePerPositionA[i] - LogLikePerPositionB[i] - avg_diff, 2);
	51	sum_squares += sqr_diff;
	52	}
	53	varDeltaX = (num_pos / (num_pos - 1)) * sum_squares;
	54	//end calc esteemated variance of delta of KH (Assessing the Uncertainty in Phylogenetic Inference, Nielsen, pg 484)
	55
	56	//obtain the standard test statistic, z:
	57	double stdDeltaX = sqrt (varDeltaX);
	58	double z = sum_diffs / stdDeltaX; //let's hope stdDeltaX is not a zero
	59
	60	double phi_of_z = get_phi (z);
	61	double p_value = 1 - phi_of_z; //one-sided test to see if A is better than B
	62
	63	return p_value;
	64	}
	65
	66	};⏎

+16

-0

libs/phylogeny/KH_calculation.h less more

	0	// Kishino-Hasegawa Test 2013 02 27 Eli Levy Karin
	1
	2	#ifndef ___KH_CALCULATION
	3	#define ___KH_CALCULATION
	4
	5	#include "math.h"
	6	#include <cmath>
	7	#include "definitions.h"
	8
	9	namespace KH_calculation {
	10	double calc_p_value_kh (const Vdouble & LogLikePerPositionA, const Vdouble & LogLikePerPositionB);
	11
	12	double get_phi (double z);
	13	};
	14
	15	#endif

+23

-0

libs/phylogeny/LG.dat.q less more

	0	" 0.425093 "
	1	" 0.276818 0.751878 "
	2	" 0.395144 0.123954 5.076149 "
	3	" 2.489084 0.534551 0.528768 0.062556 "
	4	" 0.969894 2.807908 1.695752 0.523386 0.084808 "
	5	" 1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 "
	6	" 2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 "
	7	" 0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 "
	8	" 0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 "
	9	" 0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 "
	10	" 0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 "
	11	" 1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 "
	12	" 0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 "
	13	" 1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 "
	14	" 4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 "
	15	" 2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 "
	16	" 0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 "
	17	" 0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 "
	18	" 2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 "
	19
	20	" 0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 "
	21	" 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147 "
	22	" Si Quang Le and Olivier Gascuel (LG) matrix "

+239

-0

libs/phylogeny/Makefile less more

	0	#! /usr/local/bin/gmake
	1	# $Id: Makefile 11759 2013-09-24 13:45:40Z elilevy $
	2	# makfile for yaep5
	3
	4
	5
	6	# use LOGREP=t or DOUBLEREP=t to activate logRep or doubleRep respectively (or setenv DOUBLEREP in the shell)
	7	#DOUBLEREP=t
	8	#LOGREP=t
	9
	10
	11	Libsources= AddLog.cpp NNiProp.cpp NNiSep.cpp Nni.cpp aaJC.cpp \
	12	allTrees.cpp allTreesSeparateModel.cpp alphabet.cpp amino.cpp \
	13	bestAlpha.cpp bestAlphaManyTrees.cpp bestHKYparam.cpp bootstrap.cpp \
	14	bblEM.cpp bblEMfixRoot.cpp bblEMProprtional.cpp bblEMProportionalEB.cpp bblLSProportionalEB.cpp bblEMSeperate.cpp \
	15	chebyshevAccelerator.cpp clustalFormat.cpp codon.cpp codonJC.cpp \
	16	computeCounts.cpp computeDownAlg.cpp computeMarginalAlg.cpp \
	17	computePijComponent.cpp computeUpAlg.cpp computeUpAlgFactors.cpp \
	18	computeSubstitutionCounts.cpp \
	19	computePosteriorExpectationOfSubstitutions.cpp \
	20	computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp \
	21	ConversionUtils.cpp countTableComponent.cpp datMatrixHolder.cpp distanceTable.cpp \
	22	distribution.cpp errorMsg.cpp evaluateCharacterFreq.cpp \
	23	fastStartTree.cpp fastaFormat.cpp findRateOfGene.cpp \
	24	fromCountTableComponentToDistance.cpp fromCountTableComponentToDistancefixRoot.cpp \
	25	fromCountTableComponentToDistanceProp.cpp fromCountTableComponentToDistancePropEB.cpp fromQtoPt.cpp \
	26	generalGammaDistributionFixedCategories.cpp gammaDistribution.cpp gammaUtilities.cpp \
	27	generalGammaDistribution.cpp getRandomWeights.cpp goldmanYangModel.cpp \
	28	granthamChemicalDistances.cpp hky.cpp simulateWithDependence.cpp KH_calculation.cpp likeDist.cpp likeDistfixRoot.cpp \
	29	likeDistProp.cpp likeDistPropEB.cpp likelihoodComputation.cpp \
	30	likelihoodComputationFactors.cpp logFile.cpp maseFormat.cpp \
	31	molphyFormat.cpp nexusFormat.cpp nj.cpp njConstrain.cpp \
	32	nucJC.cpp nucleotide.cpp numRec.cpp Parameters.cpp phylipFormat.cpp \
	33	pijAccelerator.cpp readDatMatrix.cpp readTree.cpp recognizeFormat.cpp \
	34	replacementModel.cpp searchStatus.cpp seqContainerTreeMap.cpp \
	35	sequence.cpp sequenceContainer.cpp simulateTree.cpp \
	36	siteSpecificRate.cpp someUtil.cpp split.cpp splitMap.cpp \
	37	splitTreeUtil.cpp stochasticProcess.cpp suffStatComponent.cpp \
	38	talRandom.cpp tree.cpp treeIt.cpp treeUtil.cpp uniDistribution.cpp \
	39	uniformDistribution.cpp cmdline2EvolObjs.cpp \
	40	generalGammaDistributionLaguerre.cpp gammaDistributionLaguerre.cpp GLaguer.cpp \
	41	givenRatesMLDistance.cpp distanceBasedSeqs2Tree.cpp \
	42	posteriorDistance.cpp pairwiseGammaDistance.cpp doubleRep.cpp \
	43	logRep.cpp indel.cpp indelModel.cpp mulAlphabet.cpp \
	44	replacementModelSSRV.cpp stochasticProcessSSRV.cpp bestAlphaAndNu.cpp \
	45	C_evalParamUSSRV.cpp matrixUtils.cpp betaOmegaDistribution.cpp \
	46	betaUtilities.cpp betaDistribution.cpp geneticCodeHolder.cpp \
	47	samplingSequences.cpp bblEM2USSRV.cpp bestParamUSSRV.cpp \
	48	likeDist2USSRV.cpp ussrvModel.cpp likelihoodComputation2USSRV.cpp \
	49	fromCountTableComponentToDistance2USSRV.cpp normalDist.cpp \
	50	tamura92.cpp bestTamura92param.cpp phylipSequentialFormat.cpp \
	51	simulateCodonsJumps.cpp \
	52	simulateJumpsAbstract.cpp \
	53	ssrvDistanceSeqs2Tree.cpp multipleStochasticProcess.cpp distributionPlusInvariant.cpp\
	54	extremeValDistribution.cpp \
	55	gammaDistributionFixedCategories.cpp generalGammaDistributionPlusInvariant.cpp gammaDistributionPlusInvariant.cpp \
	56	distributionPlusCategory.cpp simulateJumps.cpp computeJumps.cpp seqeuncesFilter.cpp \
	57	optGammaMixtureLS.cpp mixtureDistribution.cpp suffStatGammaMixture.cpp GamMixtureOptimizer.cpp optGammaMixtureEM.cpp gainLossAlphabet.cpp \
	58	wYangModel.cpp codonUtils.cpp likelihoodComputation2Codon.cpp likeDist2Codon.cpp unObservableData.cpp likelihoodComputationGL.cpp \
	59	threeStateModel.cpp threeStateAlphabet.cpp oneTwoMoreModel.cpp betaDistributionFixedCategories.cpp betaDistributionFixedCategoriesWithOmegaUniform.cpp \
	60	bblEM2codon.cpp bestAlphaAndK.cpp fromCountTableComponentToDistance2Codon.cpp\
	61	gtrModel.cpp bestGtrModelParams.cpp simulateRateShiftJumps.cpp integerAlphabet.cpp
	62
	63	# do not use: fromInstructionFile.cpp, simulateSequnce.cpp split.save.cpp
	64
	65
	66	# LibCsources= cmdline.c
	67	# LibCsources += getopt.c getopt1.c
	68
	69	EXEC =
	70	#TEST_EXEC_SUB = split_test splitMap_test bootstrap_test
	71	TEST_EXEC = $(addprefix tests/,$(TEST_EXEC_SUB))
	72	LIB = libEvolTree.a
	73	DEBUGLIB = $(LIB:.a=Debug.a)
	74	DOUBLEREPLIB = $(LIB:.a=DoubleRep.a)
	75
	76
	77	#CC=g++
	78	CXX=g++
	79	CC=$(CXX)
	80
	81	#requres 2.13, but may work with 2.11
	82	GENGETOPT = gengetopt
	83	# osX/tiger
	84	#GENGETOPT = /opt/local/bin/gengetopt
	85
	86	.SECONDARY: semphy_cmdline.c semphy_cmdline.h
	87
	88	#LDFLAGS=
	89
	90	CPPFLAGS= -O3 -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32
	91	CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32 -DVERBOS
	92	#CPPFLAGSDOU= $(CPPFLAGS)
	93	#-pg
	94
	95
	96	#CPPFLAGS+= -I/usr/include/g++-v3
	97	#CPPFLAGS+= -DLOG -DLOGCLS -DMEMCHK
	98
	99	# sources
	100	sources= $(Libsources) $(LibCsources) $(addsuffix .cpp,$(EXEC) $(TEST_EXEC))
	101
	102	.PHONY: tests lib test debug %.debug
	103	.PHONY: dat DOUBLEREP doubleRep
	104
	105	all: lib $(EXEC)
	106
	107	test: all tests
	108	+cd tests; make -k test
	109
	110	#ifdef DOUBLEREP
	111	#CPPFLAGS+= -DLOGREP
	112	#CPPFLAGSDEBUG += -DLOGREP
	113	#LDFLAGSDEBUG += -DLOGREP
	114	#endif
	115
	116	ifdef DOUBLEREP
	117	CPPFLAGS+= -DDOUBLEREP
	118	CPPFLAGSDEBUG += -DDOUBLEREP
	119	LDFLAGSDEBUG += -DDOUBLEREP
	120	endif
	121
	122	debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32
	123	debug: $(DEBUGLIB)
	124	pl:
	125	@echo "lib ="$(LIB)
	126	@echo "debug="$(DEBUGLIB)
	127	#debug: all
	128	# cp libEvolTree.a libEvolTreeDebug.a
	129
	130	# <<<<<<< Makefile
	131	# %.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-25
	132	# % debug: LIB = libEvolTreeDebug.a
	133	# %.debug: %
	134	# @echo "made \""$(*)"\" in debug mode"
	135
	136	# =======
	137	#>>>>>>> 2.34
	138
	139	lib: $(LIB)
	140
	141	$(LIB): $(Libsources:.cpp=.o) $(LibCsources:.c=.o)
	142	ar rv $@ $?
	143	ranlib $@
	144
	145	tags: .cpp .h
	146	etags --members --language=c++ $^
	147
	148	$(EXEC) $(TEST_EXEC): $(LIB)
	149	tests: $(TEST_EXEC)
	150
	151	-include make.dep
	152
	153	install:
	154	cd ../fast; make -f Makefile.lib install_do
	155
	156
	157	clean:
	158	-rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) *.o
	159
	160
	161	ifneq ($(wildcard make.dep), make.dep)
	162	make.dep: depend
	163	endif
	164
	165	depend makedep: _make.dep
	166	@mv -f _make.dep make.dep
	167
	168	_make.dep: $(sources)
	169	@echo making depend
	170	# $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ \| sed '\''s/$$$\.o[ :]/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] \|\| rm -f $@'
	171	@$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ \| sed "s/$^[^.]*$\.o/\1.o \1.debug.o/g" > $@'
	172	_fast:
	173	cd ../fast;make -f Makefile.lib -k all
	174
	175	fast.% _fast.%:
	176	cd ../fast;make -f Makefile.lib -k $(*)
	177
	178
	179	simulateSequnce: simulateSequnce_cmdline.o
	180
	181
	182	evolObjsTest.ggo: evolObjs.header evolObjs.args
	183	cat $^ > $@
	184
	185
	186	# commandline (gengetopts)
	187	%_cmdline.h %_cmdline.c: %.ggo
	188	$(GENGETOPT) -i$< -F$(*)_cmdline
	189
	190	%.dat.q: %.dat
	191	awk 'BEGIN{RS="[\n\r]+";};{print "\" "$$0" \"\r"}' $< > $@
	192	# cat $@
	193
	194	DAT = cpREV45.dat.q dayhoff.dat.q jones.dat.q mtREV24.dat.q wag.dat.q HIVb.dat.q HIVw.dat.q
	195
	196	dat: $(DAT)
	197
	198	cleandat:
	199	rm $(DAT)
	200
	201	datMatrixHolder.o: $(DAT)
	202
	203	.PRECIOUS: $(DAT)
	204
	205	debug: LIB = $(DEBUGLIB)
	206
	207	%.debug: CPPFLAGS = $(CPPFLAGSDEBUG)
	208	%.debug: %
	209	@echo "made \""$(*)"\" in debug mode"
	210
	211
	212	%.debug.o: %.c
	213	$(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@
	214
	215	%.debug.o: %.cpp
	216	$(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@
	217
	218	$(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
	219	ar rv $@ $?
	220	ranlib $@
	221
	222	#doubleRep: LOGREP=t
	223	#doubleRep: CPPFLAGS+= -DLOGREP
	224	doubleRep: DOUBLEREP=t
	225	doubleRep: CPPFLAGS+= -DDOUBLEREP
	226	doubleRep: $(DOUBLEREPLIB)
	227
	228	%.doubleRep.o: %.c
	229	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
	230
	231	%.doubleRep.o: %.cpp
	232	$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
	233
	234	$(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o)
	235	ar rv $@ $?
	236	ranlib $@
	237
	238	# DO NOT DELETE

+139

-0

libs/phylogeny/NNiProp.cpp less more

	0	// $Id: NNiProp.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "treeIt.h"
	4	#include "treeUtil.h"
	5	#include "NNiProp.h"
	6	#include "bblEM.h"
	7	#include "bblEMProportional.h"
	8	#include "logFile.h"
	9	#include <algorithm>
	10	#include <iostream>
	11	#include <iomanip>
	12	using namespace std;
	13
	14	NNiProp::NNiProp(vector<sequenceContainer>& sc,
	15	vector<stochasticProcess>& sp,
	16	const vector<Vdouble > weights,
	17	vector<char>* nodeNotToSwap):_nodeNotToSwap(nodeNotToSwap),
	18	_sc(sc),_sp(sp),_weights(weights) {
	19	_bestScore = VERYSMALL;
	20	_treeEvaluated =-1;
	21	_out = NULL;
	22
	23	}
	24
	25	void NNiProp::setOfstream(ostream* out) {
	26	_out = out;
	27	}
	28
	29	tree NNiProp::NNIstep(tree et) {
	30	et.create_names_to_internal_nodes();
	31	_bestScore = evalTree(et);
	32	_bestTree = et;
	33	treeIterTopDown tIt(et);
	34	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	35	if (mynode->isLeaf() \|\| mynode->isRoot()) continue; // swaping only internal nodes
	36
	37	if (_nodeNotToSwap) {
	38	if ((*_nodeNotToSwap)[mynode->id()]) {
	39	continue;
	40	}
	41	}
	42	tree newT1 = NNIswap1(et,mynode);
	43	tree newT2 = NNIswap2(et,mynode);
	44	MDOUBLE treeScore1 = evalTree(newT1);
	45	if (treeScore1 > _bestScore) {
	46	_bestTree = newT1;
	47	_bestScore = treeScore1;
	48	LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
	49	if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
	50	_bestTree.output(*_out);
	51
	52	}
	53	MDOUBLE treeScore2 = evalTree(newT2);
	54	if (treeScore2 > _bestScore) {
	55	_bestTree = newT2;
	56	_bestScore = treeScore2;
	57	LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
	58	if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
	59	_bestTree.output(*_out);
	60	}
	61	}
	62	return _bestTree;
	63	}
	64
	65	tree NNiProp::NNIswap1(tree et,tree::nodeP mynode) {
	66	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	67	#ifdef VERBOS
	68	LOG(5,<<"b4 swap1"<<endl);
	69	LOGDO(5,et.output(myLog::LogFile()));
	70	#endif
	71
	72	tree::nodeP fatherNode = mynodeInNewTree->father();
	73	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	74	// it might be me
	75	if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	76	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
	77
	78	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	79	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	80	nodeToSwap2->setFather(fatherNode);
	81	fatherNode->setSon(nodeToSwap2);
	82	nodeToSwap1->setFather(mynodeInNewTree);
	83	mynodeInNewTree->setSon(nodeToSwap1);
	84	#ifdef VERBOS
	85	LOG(5,<<"after swap1"<<endl);
	86	LOGDO(5,et.output(myLog::LogFile()));
	87	#endif
	88
	89	return et;
	90	}
	91
	92	tree NNiProp::NNIswap2(tree et,tree::nodeP mynode) {
	93	#ifdef VERBOS
	94	LOG(5,<<"b4 swap2"<<endl);
	95	LOGDO(5,et.output(myLog::LogFile()));
	96	#endif
	97	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	98
	99
	100	tree::nodeP fatherNode = mynodeInNewTree->father();
	101	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	102	// it might be me
	103	if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	104	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
	105	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	106	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	107	nodeToSwap2->setFather(fatherNode);
	108	fatherNode->setSon(nodeToSwap2);
	109	nodeToSwap1->setFather(mynodeInNewTree);
	110	mynodeInNewTree->setSon(nodeToSwap1);
	111	#ifdef VERBOS
	112	LOG(5,<<"after swap2"<<endl);
	113	LOGDO(5,et.output(myLog::LogFile()));
	114	#endif
	115	return et;
	116
	117	}
	118
	119	MDOUBLE NNiProp::evalTree(tree& et) {
	120	#ifdef VERBOS
	121	LOG(5,<<"b4 bbl in alltrees"<<endl);
	122	LOGDO(5,et.output(myLog::LogFile()));
	123	#endif
	124	bblEMProportional bblEMprop1(et,_sc,_sp,_weights);
	125	MDOUBLE res = bblEMprop1.getTreeLikelihood();
	126	// MDOUBLE res = 12;
	127	_treeEvaluated++;
	128	// cerr.precision(5);
	129	_out->precision(5);
	130
	131	if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl);
	132	if ((_out)&&(_treeEvaluated)) (*_out)<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl;
	133	return res;
	134	}
	135
	136
	137
	138

+39

-0

libs/phylogeny/NNiProp.h less more

	0	// $Id: NNiProp.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___NNI_PROP
	3	#define ___NNI_PROP
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "sequenceContainer.h"
	7	#include "definitions.h"
	8	#include "stochasticProcess.h"
	9	#include <vector>
	10	using namespace std;
	11
	12	class NNiProp {
	13	public:
	14	explicit NNiProp(vector<sequenceContainer>& sc,
	15	vector<stochasticProcess>& sp,
	16	const vector<Vdouble > weights,
	17	vector<char>* nodeNotToSwap);
	18
	19	tree NNIstep(tree et);
	20	MDOUBLE bestScore(){ return _bestScore;}
	21	void setOfstream(ostream* out);
	22	private:
	23	ostream* _out;
	24	vector<char> * _nodeNotToSwap;
	25	private:
	26	tree _bestTree;
	27	MDOUBLE _bestScore;
	28	vector<sequenceContainer>& _sc;
	29	vector<stochasticProcess>& _sp;
	30	const vector<Vdouble > _weights;
	31
	32	MDOUBLE evalTree(tree& et);
	33	tree NNIswap1(tree et,tree::nodeP mynode);
	34	tree NNIswap2(tree et,tree::nodeP mynode);
	35	int _treeEvaluated;
	36
	37	};
	38	#endif

+174

-0

libs/phylogeny/NNiSep.cpp less more

	0	// $Id: NNiSep.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "treeIt.h"
	4	#include "treeUtil.h"
	5	#include "NNiSep.h"
	6	#include "bblEM.h"
	7	#include "logFile.h"
	8	#include "bblEMSeperate.h"
	9
	10	#include <algorithm>
	11	#include <iostream>
	12	#include <iomanip>
	13
	14	using namespace std;
	15
	16	NNiSep::NNiSep(vector<sequenceContainer>& sc,
	17	vector<stochasticProcess>& sp,
	18	const vector<Vdouble > weights,
	19	vector<char>* nodeNotToSwap): _nodeNotToSwap(nodeNotToSwap),
	20	_sc(sc),_sp(sp),_weights(weights) {
	21	_bestTrees.resize(sc.size());
	22	_bestScore=VERYSMALL;
	23	_treeEvaluated =-1;
	24
	25	}
	26
	27	void NNiSep::setOfstream(ostream* out) {
	28	_out = out;
	29	}
	30
	31
	32	vector<tree> NNiSep::NNIstep(vector<tree> et) {
	33	const int nGene = et.size();
	34	int z;
	35	for (z=0; z < nGene; ++z) {
	36	et[z].create_names_to_internal_nodes();
	37	}
	38	_bestTrees = et;
	39	_bestScore = evalTrees(_bestTrees);
	40
	41	treeIterTopDown tIt(et[0]);
	42
	43	vector<tree::nodeP> mynode(nGene);
	44	mynode[0] = tIt.first();
	45	for (z=1; z < nGene; ++z ) {
	46	mynode[z] = et[z].findNodeByName(mynode[0]->name());
	47	}
	48
	49	while (mynode[0] != tIt.end()) {
	50	bool haveToBeChecked = true;
	51	if ((mynode[0]->isLeaf() \|\| mynode[0]->isRoot())) haveToBeChecked = false;
	52	if (_nodeNotToSwap) {
	53	if ((*_nodeNotToSwap)[mynode[0]->id()]) {
	54	haveToBeChecked = false;
	55	}
	56	}
	57
	58	if (haveToBeChecked) { // swaping only internal nodes that are not "fixed"
	59	for (z=1; z < nGene; ++z ) {
	60	mynode[z] = et[z].findNodeByName(mynode[0]->name());
	61	}
	62
	63	vector<tree> newT1;
	64	vector<tree> newT2;
	65
	66	for (z=0; z < nGene; ++z ) {
	67	newT1.push_back(NNIswap1(et[z],mynode[z]));
	68	newT2.push_back(NNIswap2(et[z],mynode[z]));
	69	}
	70	MDOUBLE treeScore1 = evalTrees(newT1);
	71	if (treeScore1 > _bestScore) {
	72	_bestTrees = newT1;
	73	_bestScore = treeScore1;
	74	LOG(5,<<"new Best Trees: "<<_bestScore<<endl);
	75	if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
	76	if (_out) (*_out)<<"tree topology (of gene 1 in case of many genes): "<<endl;
	77	_bestTrees[0].output(*_out);
	78	}
	79	MDOUBLE treeScore2 = evalTrees(newT2);
	80	if (treeScore2 > _bestScore) {
	81	_bestTrees = newT2;
	82	_bestScore = treeScore2;
	83	LOG(5,<<"new Best Trees: "<<_bestScore<<endl);
	84	if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
	85	if (_out) (*_out)<<"tree topology (of gene 1 in case of many genes): "<<endl;
	86	_bestTrees[0].output(*_out);
	87	}
	88	}
	89	//nextloop:
	90	mynode[0] = tIt.next();
	91	}
	92	return _bestTrees;
	93	}
	94
	95	tree NNiSep::NNIswap1(tree et,tree::nodeP mynode) {
	96	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	97	#ifdef VERBOS
	98	LOG(5,<<"b4 swap1"<<endl);
	99	LOGDO(5,et.output(myLog::LogFile()));
	100	#endif
	101
	102	tree::nodeP fatherNode = mynodeInNewTree->father();
	103	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	104	// it might be me
	105	if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	106	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
	107
	108	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	109	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	110	nodeToSwap2->setFather(fatherNode);
	111	fatherNode->setSon(nodeToSwap2);
	112	nodeToSwap1->setFather(mynodeInNewTree);
	113	mynodeInNewTree->setSon(nodeToSwap1);
	114	#ifdef VERBOS
	115	LOG(5,<<"after swap1"<<endl);
	116	LOGDO(5,et.output(myLog::LogFile()));
	117	#endif
	118
	119	return et;
	120	}
	121
	122	tree NNiSep::NNIswap2(tree et,tree::nodeP mynode) {
	123	#ifdef VERBOS
	124	LOG(5,<<"b4 swap2"<<endl);
	125	LOGDO(5,et.output(myLog::LogFile()));
	126	#endif
	127	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	128
	129
	130	tree::nodeP fatherNode = mynodeInNewTree->father();
	131	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	132	// it might be me
	133	if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	134	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
	135	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	136	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	137	nodeToSwap2->setFather(fatherNode);
	138	fatherNode->setSon(nodeToSwap2);
	139	nodeToSwap1->setFather(mynodeInNewTree);
	140	mynodeInNewTree->setSon(nodeToSwap1);
	141	#ifdef VERBOS
	142	LOG(5,<<"after swap2"<<endl);
	143	LOGDO(5,et.output(myLog::LogFile()));
	144	#endif
	145	return et;
	146
	147	}
	148
	149
	150
	151
	152
	153	MDOUBLE NNiSep::evalTrees(vector<tree>& et) {
	154	#ifdef VERBOS
	155	LOG(5,<<"b4 bbl in alltrees"<<endl);
	156	for (vector<tree>::const_iterator i=et.begin();i!=et.end();++i)
	157	LOGDO(5,i->output(myLog::LogFile()));
	158	#endif
	159	bblEMSeperate bblemsep1(et,_sc,_sp,_weights);
	160	MDOUBLE res = bblemsep1.getTreeLikelihood();
	161	_treeEvaluated++;
	162	LOG(5,.precision(5));
	163	_out->precision(5);
	164
	165
	166	if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl);
	167	if ((_out)&&(_treeEvaluated)) (*_out)<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl;
	168	return res;
	169	}
	170
	171
	172
	173

+40

-0

libs/phylogeny/NNiSep.h less more

	0	// $Id: NNiSep.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___NNI_SEP
	3	#define ___NNI_SEP
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "definitions.h"
	9	#include "stochasticProcess.h"
	10	#include <vector>
	11	using namespace std;
	12
	13	class NNiSep {
	14	public:
	15	explicit NNiSep(vector<sequenceContainer>& sc,
	16	vector<stochasticProcess>& sp,
	17	const vector<Vdouble > weights,
	18	vector<char>* nodeNotToSwap);
	19
	20	vector<tree> NNIstep(vector<tree> et);
	21	MDOUBLE bestScore(){ return _bestScore;}
	22	void setOfstream(ostream* out);
	23
	24	private:
	25	vector<char>* _nodeNotToSwap;
	26	vector<tree> _bestTrees;
	27	MDOUBLE _bestScore;
	28	vector<sequenceContainer>& _sc;
	29	vector<stochasticProcess>& _sp;
	30	const vector<Vdouble > _weights;
	31
	32	MDOUBLE evalTrees(vector<tree>& et);
	33	tree NNIswap1(tree et,tree::nodeP mynode);
	34	tree NNIswap2(tree et,tree::nodeP mynode);
	35	int _treeEvaluated;
	36	ostream* _out;
	37
	38	};
	39	#endif

+119

-0

libs/phylogeny/Nni.cpp less more

	0	// $Id: Nni.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4	#include "definitions.h"
	5	#include "treeUtil.h"
	6	#include "treeIt.h"
	7	#include "Nni.h"
	8	#include "bblEM.h"
	9	#include "logFile.h"
	10	#include <algorithm>
	11	#include <iostream>
	12	using namespace std;
	13
	14	NNI::NNI(const sequenceContainer& sc,
	15	const stochasticProcess& sp,
	16	const Vdouble * weights): _sc(sc),_sp(sp),_weights(weights) {
	17	_bestScore = VERYSMALL;
	18	}
	19
	20
	21	tree NNI::NNIstep(tree et) {
	22	et.create_names_to_internal_nodes();
	23	treeIterTopDown tIt(et);
	24	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	25	if (mynode->isLeaf() \|\| mynode->isRoot()) continue; // swaping only internal nodes
	26	tree newT1 = NNIswap1(et,mynode);
	27	tree newT2 = NNIswap2(et,mynode);
	28	MDOUBLE treeScore1 = evalTree(newT1,_sc);
	29	MDOUBLE treeScore2 = evalTree(newT2,_sc);
	30	if (treeScore1 > _bestScore) {
	31	_bestTree = newT1;
	32	_bestScore = treeScore1;
	33	LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
	34	LOGDO(5,et.output(myLog::LogFile()));
	35	}
	36	if (treeScore2 > _bestScore) {
	37	_bestTree = newT2;
	38	_bestScore = treeScore2;
	39	LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
	40	LOGDO(5,et.output(myLog::LogFile()));
	41	}
	42	}
	43	return _bestTree;
	44	}
	45
	46	tree NNI::NNIswap1(tree et,tree::nodeP mynode) {
	47	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	48	#ifdef VERBOS
	49	LOG(5,<<"b4 swap1"<<endl);
	50	LOGDO(5,et.output(myLog::LogFile()));
	51	#endif
	52
	53	tree::nodeP fatherNode = mynodeInNewTree->father();
	54	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	55	// it might be me
	56	if (nodeToSwap1 == mynodeInNewTree)
	57	nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	58	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
	59
	60	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	61	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	62	nodeToSwap2->setFather(fatherNode);
	63	fatherNode->setSon(nodeToSwap2);
	64	nodeToSwap1->setFather(mynodeInNewTree);
	65	mynodeInNewTree->setSon(nodeToSwap1);
	66	#ifdef VERBOS
	67	LOG(5,<<"after swap1"<<endl);
	68	LOGDO(5,et.output(myLog::LogFile()));
	69	#endif
	70
	71	return et;
	72	}
	73
	74	tree NNI::NNIswap2(tree et,tree::nodeP mynode) {
	75	#ifdef VERBOS
	76	LOG(5,<<"b4 swap2"<<endl);
	77	LOGDO(5,et.output(myLog::LogFile()));
	78	#endif
	79	tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
	80
	81
	82	tree::nodeP fatherNode = mynodeInNewTree->father();
	83	tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
	84	// it might be me
	85	if (nodeToSwap1 == mynodeInNewTree)
	86	nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
	87	tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
	88	et.removeNodeFromSonListOfItsFather(nodeToSwap1);
	89	et.removeNodeFromSonListOfItsFather(nodeToSwap2);
	90	nodeToSwap2->setFather(fatherNode);
	91	fatherNode->setSon(nodeToSwap2);
	92	nodeToSwap1->setFather(mynodeInNewTree);
	93	mynodeInNewTree->setSon(nodeToSwap1);
	94	#ifdef VERBOS
	95	LOG(5,<<"after swap2"<<endl);
	96	LOGDO(5,et.output(myLog::LogFile()));
	97	#endif //VERBOS
	98	return et;
	99
	100	}
	101
	102
	103
	104
	105
	106	MDOUBLE NNI::evalTree(tree& et,const sequenceContainer& sc) {
	107	#ifdef VERBOS
	108	LOG(5,<<"b4 bbl in alltrees"<<endl);
	109	LOGDO(5,et.output(myLog::LogFile()));
	110	#endif
	111	bblEM bblEM1(et,sc,_sp,_weights);
	112	MDOUBLE res = bblEM1.getTreeLikelihood();
	113	return res;
	114	}
	115
	116
	117
	118

+32

-0

libs/phylogeny/Nni.h less more

	0	// $Id: Nni.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___NNI
	3	#define ___NNI
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include <vector>
	10	using namespace std;
	11
	12	class NNI {
	13	public:
	14	explicit NNI(const sequenceContainer& sc,
	15	const stochasticProcess& sp,
	16	const Vdouble * weights);
	17
	18	tree NNIstep(tree et);
	19	MDOUBLE bestScore(){ return _bestScore;}
	20
	21	private:
	22	tree _bestTree;
	23	MDOUBLE _bestScore;
	24	const sequenceContainer& _sc;
	25	const stochasticProcess& _sp;
	26	const Vdouble * _weights;
	27	MDOUBLE evalTree(tree& et,const sequenceContainer& sd);
	28	tree NNIswap1(tree et,tree::nodeP mynode);
	29	tree NNIswap2(tree et,tree::nodeP mynode);
	30	};
	31	#endif

+360

-0

libs/phylogeny/Parameters.cpp less more

	0	#include <iostream>
	1	#include <sstream>
	2	#include <vector>
	3	#include "Parameters.h"
	4	#include "ConversionUtils.h"
	5	#include <stdio.h>
	6	#include <cstdlib>
	7	using namespace std;
	8
	9	typedef Parameters::ParamType ParamType;
	10
	11	class Parameter
	12	{
	13	public:
	14
	15	Parameter();
	16	Parameter(const string& name, const int val);
	17	Parameter(const string& name, const float val);
	18	Parameter(const string& name, const string& val);
	19	Parameter(const Parameter& param);
	20
	21	void dump(FILE* outputFile) const;
	22
	23	~Parameter() {}
	24	const string& paramLabel() const;
	25	ParamType paramType() const;
	26
	27	int intValue() const;
	28	float floatValue() const;
	29	const string& stringValue() const;
	30
	31	Parameter& operator=(const Parameter& param);
	32
	33	friend bool operator<(const Parameter& p, const Parameter& q);
	34	friend ostream& operator<<(ostream& out, const Parameter& p);
	35
	36	private:
	37	string paramName;
	38	ParamType type;
	39	union {
	40	int i;
	41	float f;
	42	};
	43	string s;
	44	};
	45
	46	typedef vector<Parameter> ParamList;
	47
	48	static ParamList paramList;
	49
	50	Parameter::Parameter() : paramName(), type(Parameters::Undef)
	51	{}
	52
	53	Parameter::Parameter(const string& name, const int val)
	54	{
	55	paramName = name;
	56	i = val;
	57	type = Parameters::Int;
	58	}
	59
	60	Parameter::Parameter(const string& name, const float val)
	61	{
	62	paramName = name;
	63	f = val;
	64	type = Parameters::Float;
	65	}
	66
	67	Parameter::Parameter(const string& name, const string& val)
	68	{
	69	paramName = name;
	70	s = val;
	71	type = Parameters::Str;
	72	}
	73	Parameter::Parameter(const Parameter& param)
	74	{
	75	paramName = param.paramName;
	76	type = param.type;
	77	if (type == Parameters::Int)
	78	i = param.i;
	79	else
	80	f = param.f;
	81	s = param.s;
	82	}
	83
	84
	85	const string& Parameter::paramLabel() const
	86	{
	87	return paramName;
	88	}
	89
	90	ParamType Parameter::paramType() const
	91	{
	92	return type;
	93	}
	94
	95	int Parameter::intValue() const
	96	{
	97	return i;
	98	}
	99
	100	float Parameter::floatValue() const
	101	{
	102	return f;
	103	}
	104
	105	const string& Parameter::stringValue() const
	106	{
	107	return s;
	108	}
	109
	110	Parameter& Parameter::operator=(const Parameter& param)
	111	{
	112	paramName = param.paramName;
	113	type = param.type;
	114	if (type == Parameters::Int)
	115	i = param.i;
	116	else
	117	f = param.f;
	118	s = param.s;
	119	return *this;
	120	}
	121
	122	bool operator<(const Parameter& p, const Parameter& q)
	123	{
	124	return (p.paramName < q.paramName);
	125	}
	126
	127	ostream& operator<<(ostream& out, const Parameter& p) {
	128	switch(p.type) {
	129	case Parameters::Int:
	130	return out << p.paramName << '\t' << "(Int)" << '\t' << p.i;
	131	case Parameters::Float:
	132	return out << p.paramName << '\t' << "(Float)" << '\t' << p.f;
	133	case Parameters::Str:
	134	return out << p.paramName << '\t' << "(Str)" << '\t' << p.s;
	135	case Parameters::Undef:
	136	break;
	137	}
	138	return out << '\n';
	139	}
	140
	141
	142	void Parameter::dump(FILE* outputFile) const {
	143	switch(type) {
	144	case Parameters::Int:
	145	fprintf(outputFile, "%s = %d", paramName.c_str(), i);
	146	case Parameters::Float:
	147	fprintf(outputFile, "%s = %f", paramName.c_str(), f);
	148	case Parameters::Str:
	149	fprintf(outputFile, "%s = %s", paramName.c_str(), s.c_str());
	150	case Parameters::Undef:
	151	break;
	152	}
	153	}
	154
	155
	156	ParamList::iterator findInsertionPoint(ParamList& paramList,
	157	const string& paramName)
	158	{
	159	unsigned short start = 0;
	160	unsigned short stop = paramList.size();
	161	while (stop != start) {
	162	unsigned short pos = start + (stop-start)/2;
	163	int comp = paramName.compare(paramList[pos].paramLabel());
	164	if (comp == 0)
	165	stop = start = pos;
	166	else if (comp > 0)
	167	start = pos + 1;
	168	else
	169	stop = pos;
	170	}
	171
	172	ParamList::iterator it=paramList.begin();
	173	it+=stop;
	174	return it;
	175	}
	176
	177	Parameters::Parameters()
	178	{}
	179
	180	void Parameters::readParameters(istream& paramStream)
	181	{
	182	while (!paramStream.eof()) {
	183	string param;
	184	getline(paramStream, param);
	185	param = trim(param);
	186	string paramName = nextToken(param);
	187
	188	if (paramName.length() == 0) continue;
	189
	190	if (*(paramName.data()) == '#') continue;
	191
	192	updateParameter(paramName, param.c_str());
	193	}
	194	}
	195
	196
	197	bool Parameters::empty() {
	198	return paramList.empty();
	199	}
	200
	201	void Parameters::addParameter(const string& paramName, const int value)
	202	{
	203	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	204	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	205	(*pos) = Parameter(paramName, value);
	206	else
	207	paramList.insert(pos, Parameter(paramName, value));
	208	}
	209
	210	void Parameters::addParameter(const string& paramName, const double value)
	211	{
	212	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	213	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	214	(*pos) = Parameter(paramName, (float)value);
	215	else
	216	paramList.insert(pos, Parameter(paramName, (float)value));
	217	}
	218
	219	void Parameters::addParameter(const string& paramName, const string& value)
	220	{
	221	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	222	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	223	(*pos) = Parameter(paramName, value);
	224	else
	225	paramList.insert(pos, Parameter(paramName, value));
	226	}
	227
	228	void Parameters::updateParameter(const string& paramName,
	229	const char* const value)
	230	{
	231	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	232	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	233	switch ((*pos).paramType()) {
	234	case Int:
	235	(*pos) = Parameter(paramName, atoi(value));
	236	break;
	237	case Float:
	238	(*pos) = Parameter(paramName, (float)atof(value));
	239	break;
	240	case Str:
	241	(*pos) = Parameter(paramName, string(value));
	242	case Undef:
	243	(*pos) = Parameter(paramName, string(value));
	244	}
	245	else
	246	paramList.insert(pos, Parameter(paramName, string(value)));
	247	}
	248
	249
	250	ParamType Parameters::paramType(const string& paramName)
	251	{
	252	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	253	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	254	return (*pos).paramType();
	255	else
	256	return Undef;
	257	}
	258
	259
	260	int Parameters::getInt(const string& paramName, const int& defaultValue)
	261	{
	262	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	263	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	264	switch ((*pos).paramType()) {
	265	case Int:
	266	return (*pos).intValue();
	267	case Float:
	268	return (int)(*pos).floatValue();
	269	case Str:
	270	return atoi((*pos).stringValue().data());
	271	case Undef:
	272	break;
	273	}
	274	return defaultValue;
	275	}
	276
	277	float Parameters::getFloat(const string& paramName, const float& defaultValue)
	278	{
	279	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	280	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	281	switch ((*pos).paramType()) {
	282	case Float:
	283	return (*pos).floatValue();
	284	case Int:
	285	return (float)(*pos).intValue();
	286	case Str:
	287	return (float) atof((*pos).stringValue().data());
	288	case Undef:
	289	break;
	290	}
	291	return defaultValue;
	292	}
	293
	294	string Parameters::getString(const string& paramName,const string& defaultValue)
	295	{
	296	ParamList::iterator pos = findInsertionPoint(paramList, paramName);
	297	if (pos != paramList.end() && (*pos).paramLabel() == paramName)
	298	switch ((*pos).paramType()) {
	299	case Str:
	300	return (*pos).stringValue();
	301	case Float: {
	302	return appendDouble2string((*pos).floatValue());
	303	}
	304	case Int: {
	305	return appendInt2string((*pos).intValue());
	306	}
	307	case Undef:
	308	break;
	309	}
	310	return defaultValue;
	311	}
	312
	313	void Parameters::dump(ostream& out)
	314	{
	315	for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i)
	316	out << *i << '\n';
	317	}
	318
	319	//void Parameters::dump(DebugStream& out, const unsigned int msgLevel)
	320	//{
	321	// for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i)
	322	// out(msgLevel) << *i;
	323	//}
	324
	325	void Parameters::dump(FILE* outputFile) {
	326	for (ParamList::iterator i = paramList.begin() ; i != paramList.end() ; i++) {
	327	i->dump(outputFile);
	328	fprintf(outputFile, "\n");
	329	}
	330
	331	fprintf(outputFile, "\n");
	332	}
	333
	334	string Parameters::nextToken(string& str)
	335	{
	336	unsigned int start = 0;
	337	while (start < str.length() &&
	338	(str[start] == ' ' \|\| str[start] == '\t' \|\| str[start] == '\n'))
	339	++start;
	340
	341	if (start >= str.length()) {
	342	str = "";
	343	return "";
	344	}
	345
	346	unsigned int stop = start+1;
	347	while (stop < str.length() &&
	348	str[stop] != ' ' && str[stop] != '\t' && str[stop] != '\n')
	349	++stop;
	350
	351	unsigned int next = stop;
	352	while (next < str.length() &&
	353	(str[next] == ' ' \|\| str[next] == '\t' \|\| str[next] == '\n'))
	354	++next;
	355
	356	string result = str.substr((int)start, stop-start);
	357	str = str.substr((int)next);
	358	return result;
	359	}

+251

-0

libs/phylogeny/Parameters.h less more

	0	#ifndef _Parameters_h
	1	#define _Parameters_h
	2
	3	#include <iostream>
	4	#include <ostream>
	5	#include <string>
	6	#include <cstdlib>
	7	#include <cstdio>
	8	//#include "macros.h"
	9	//#include "DebugStream.h"
	10	//#include "StringUtils.h"
	11
	12	using std::string;
	13	using std::istream;
	14	using namespace std;
	15
	16	/*
	17	CLASS
	18	Parameters
	19
	20	A utility class used to manage program parameters. The class supports
	21	setting default values for parameters, reading values from a parameters
	22	file and accessing parameters values from other parts of the program.
	23
	24	KEYWORDS
	25	parameters
	26
	27	AUTHORS
	28	Meir Fuchs (mailto: meirfux@math.tau.ac.il)
	29
	30	Copyright: SAMBA group, Tel-Aviv Univ. Israel, 1997.
	31
	32	CHANGES LOG
	33	<UL>
	34	<LI>9.01.05 Dina:
	35	Bug fix: adding check to iterator end() to findInsertionPoint result
	36	to paramType, getInt, getString, getFloat functions
	37	</LI>
	38	<LI>17.05.04 Oranit Dror:
	39	Adding new methods: dump() and empty()
	40	</LI>
	41	</UL>
	42
	43	GOALS
	44	Aid in managing program parameters. The Parameters class's main goal is to
	45	relieve programmers from the need to rewrite specialized parameters reading
	46	code sections for each of the programs. The Parameters class holds integer,
	47	floating point or string values in static storage indexed using the
	48	parameter's name. Class also supplies method for parsing strings.
	49
	50	USAGE
	51	The following section covers several issues regarding the Parameters class
	52	and its usage. Users should understand the issues covered below before
	53	using the class.
	54
	55	USAGE: SETTING DEFAULT PARAMETERS
	56	Default parameters are set using the addParameter methods. Note that the
	57	type of the parameter is set according to the addParameter arguments. If
	58	a parameter is set using addParameter with an integer argument then
	59	subsequent updates (using updateParameter) to the same parameter will all
	60	be stored as integers. Therefore the following code should output a 0:
	61	EXAMPLE
	62	Parameters::addParameter("Dummy", 3);
	63	Parameters::updateParameter("Dummy", "This should set it to zero");
	64	cout << Parameters::getstring("Dummy");
	65	END
	66
	67	Note also that when setting defuault values of float parameters always use
	68	a decimal point or else these parameters will be added as intgers. For
	69	example:
	70	EXAMPLE
	71	Parameters::addParameter("CubeSize", 1.0); OK
	72	Parameters::addParameter("CubeSize", 1); Not OK. Integer parameter
	73	END
	74
	75	USAGE: READING PARAMETERS FROM FILE
	76	The readParameters method recieves an input stream from which parameters are
	77	to be read. Files are structured so that each line specifies the value of a
	78	parameter. Each line gives the parameter name, a white space and then the
	79	parameter value. Lines whose first non white-space charachter is # are
	80	ignored. A basic schema for using the Parameters class is to set the default
	81	values using addParameter calls and then calling readParameters to read in
	82	parameters with other values or new parameters. The following example works
	83	as such using the Parameters::dump method to print all the parameters
	84	and their values:
	85	EXAMPLE
	86	Parameters::addParameter("CubeSize", 1.0);
	87	Parameters::addParameter("MinVote", 8);
	88	ifstream params("params");
	89	Parameters::readParameters(params);
	90	params.close();
	91	Parameters::dump(cout);
	92	END
	93	With the following parameters file:
	94	EXAMPLE
	95	CubeSize 0.5
	96	File pdb4hhb.ent
	97	END
	98	The following output should result:
	99	EXAMPLE
	100	CubeSize (Float) 0.5
	101	File (Str) pdb4hhb.ent
	102	MinVote (Int) 8
	103	END
	104
	105	USAGE: ACCESSING PARAMETERS VALUES
	106	using the getInt, getFloat and getstring methods one may access the
	107	parameters values. Note that a value will always be returned even if the
	108	parameter is not stored as the same type. The get methods attempt to
	109	convert the parameter type to the requested return type of the method.
	110	The follwing code should produce 3 1's as its output:
	111	EXAMPLE:
	112	Parameters::addParameter("MaxMix", 1); OK added an integer parameter
	113	cout << Parameters::getInt("MaxMix");
	114	cout << Parameters::getFloat("MaxMix");
	115	cout << Parameters::getstring("MaxMix");
	116	END
	117	Also note that parameters names are case sensitive.
	118
	119	USAGE: SUBCLASSING AND PERFORMANCE
	120	The Parameters engine keeps the parameters in a sorted list. Although
	121	finding a parameter and its value in this list is considerably fast most
	122	users will not want this overhead of searching for the parameter using
	123	string comparisons inside their main loops, as part of a code which can be
	124	executed a great number of times.
	125	The idea is to subclass the Parameters class and hold the values which
	126	require direct and fast access in seperate static variables. All parameters
	127	are accessed not throguh the getParameter methods but rather through
	128	specialized methods of the subclass. The following is an example of such an
	129	implementation. Notice the readParameters method.
	130	EXAMPLE:
	131	static int min_vote = 8; // Default values
	132	static float cube_size = 1.0;
	133
	134	class ProgParams : protected Parameters
	135	{
	136	int minVote() { return min_vote };
	137
	138	float cubeSize() { return cube_size };
	139
	140	// file name is not held in static variable. Don't care about parameter
	141	// access time.
	142	string fileName() { return getstring("FileName"); }
	143
	144	int readParameters(char* paramsfile) {
	145	addParameter("MinVote", min_vote);
	146	addParameter("CubeSize", cube_size);
	147
	148	ifstream params(paramsfile);
	149	Parameters::readParameters(params);
	150	params.close();
	151
	152	min_vote = getInt("MinVote");
	153	cube_size = getFloat("CubeSize");
	154	}
	155	}
	156	END
	157	*/
	158	class Parameters
	159	{
	160	public:
	161	//// Used by the paramType method. See below.
	162	enum ParamType { Undef, Int, Float, Str };
	163
	164	//// readParameters recieves an input stream and reads parameters off this
	165	// input stream. See the usage section for details of how a parameters
	166	// file may be structured.
	167	static void readParameters(istream& paramStream);
	168
	169	////
	170	// Returns true if no parameters are defined. <br>
	171	// Author: Oranit Dror (oranit@tau.ac.il)
	172	static bool empty();
	173
	174	// GROUP: Setting parameters
	175
	176	//// Adds an integer parameter. The integer value added will actually be
	177	// stored as an integer. Subsequent updates to the same parameter using
	178	// updateParameter will all be stored as integers.
	179	static void addParameter(const string& paramName, const int value);
	180
	181	//// Adds a float parameter. The float value added will actually be
	182	// stored as a float. Subsequent updates to the same parameter using
	183	// updateParameter will all be stored as floats.
	184	static void addParameter(const string& paramName, const double value);
	185
	186	//// Adds a string parameter. The string value added will actually be
	187	// stored as a string. Subsequent updates to the same parameter using
	188	// updateParameter will all be stored as strings.
	189	static void addParameter(const string& paramName, const string& value);
	190
	191	//// Update the parameter value without changing the parameter type. The
	192	// value parameter is converted to the parameter's type if this parameter
	193	// already exists. If the parameter is not yet listed then updateParameter
	194	// adds a new parameter of string type.
	195	static void updateParameter(const string& paramName,
	196	const char* const value);
	197
	198	// GROUP: Getting parameters values.
	199
	200	//// Returns the storage type of the given parameter. If a parameter
	201	// of the given name does not exist then Undef is returned. See enum
	202	// ParamType above for possible return values.
	203	static ParamType paramType(const string& paramName);
	204
	205	//// Gets the integer value of a given parameter. If parameter is not of
	206	// integer type then its value is converted to integer. If parameter does
	207	// not exist a 0 is returned.
	208	static int getInt(const string& paramName, const int& defaultValue=0);
	209
	210	//// Gets the float value of a given parameter. If parameter is not of
	211	// float type then its value is converted to float. If parameter does
	212	// not exist a 0 is returned.
	213	static float getFloat(const string& paramName, const float& defaultValue=0.0);
	214
	215	//// Gets the string value of a given parameter. If parameter is not of
	216	// string type then its value is converted to string. If parameter does
	217	// not exist an empty string is returned.
	218	static string getString(const string& paramName, const string& defaultValue=string());
	219
	220	// GROUP: Other methods
	221
	222
	223
	224	//// Output all listed parameters. Used for debugging.
	225	static void dump(ostream& out);
	226
	227	//// Output all listed parameters. Used for debugging.
	228	//static void dump(DebugStream& out, const unsigned int msgLevel);
	229
	230	////
	231	// Output all listed parameters. <br>
	232	// Author: Oranit Dror (oranit@tau.ac.il)
	233	static void dump(FILE* outputFile);
	234
	235	//// A utility method. nextToken recieves an argument string, finds the first
	236	// white-space delimited token in this string and returns it while cutting
	237	// this token off of the argument string (It it passed by reference). Tokens
	238	// are returned without any spaces. This method may be used repetitively to
	239	// tokenize a string.
	240	static string nextToken(string& str);
	241
	242	protected:
	243	//// Constructor is protected since all methods are static. No need to
	244	// actually form an instance of this class.
	245	Parameters();
	246	};
	247
	248	#endif
	249
	250

+7

-0

libs/phylogeny/aaJC.cpp less more

	0	// $Id: aaJC.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "aaJC.h"
	3	#include "errorMsg.h"
	4
	5
	6

+52

-0

libs/phylogeny/aaJC.h less more

	0	// $Id: aaJC.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___AA_JC
	3	#define ___AA_JC
	4
	5	#include "replacementModel.h"
	6	#include <cmath>
	7	using namespace std;
	8
	9	namespace aaDef {
	10	const MDOUBLE Alp = 20.0;
	11	const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
	12	const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
	13	const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
	14	const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
	15	}
	16
	17	class aaJC : public replacementModel {
	18	public:
	19
	20	virtual replacementModel* clone() const { return new aaJC(*this); }// see note down:
	21	// virtual aaJC* clone() const { return new aaJC(*this); }
	22	const int alphabetSize() const {return 20;}
	23
	24	explicit aaJC(){};
	25	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	26	//(wrong!) return ((i==j) ? 0.05+0.95exp(-20.0d): 0.05-0.05exp(-20.0d));
	27	return ((i==j) ? aaDef::odAl+aaDef::om_odAlexp(aaDef::m_alDiv_omalpd): aaDef::odAl-aaDef::odAlexp(aaDef::m_alDiv_omalpd));
	28
	29	}
	30
	31	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	32	//(worng!)return ((i==j) ? -19.0exp(-20.0d): exp(-20.0*d));
	33	return ((i==j) ? -exp(aaDef::m_alDiv_omalpd): exp(aaDef::m_alDiv_omalpd)/(aaDef::Alp-1));
	34	}
	35	const MDOUBLE freq(const int i) const {return aaDef::odAl;};
	36
	37	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	38	//(wrong!) return ((i==j) ? 19.020.0exp(-20.0d): 0.0-20.0exp(-20.0*d));
	39	return ((i==j) ? aaDef::alDiv_omalpexp(aaDef::m_alDiv_omalpd): aaDef::m_alDiv_omalpexp(aaDef::m_alDiv_omalpd));
	40	}
	41
	42	};
	43
	44	#endif
	45
	46	// note: according to the new C++ rules, the clone function should be like this:
	47	// virtual aaJC* clone() const { return new aaJC(*this); }
	48	// however, not all compiler support it yet. look at More Effective C++ page 126.
	49
	50
	51

+72

-0

libs/phylogeny/adrianCodon.dat.q less more

	0	" 634 "
	1	" 25105 560 "
	2	" 1209 37271 620 "
	3	" 1353 344 196 494 "
	4	" 112 2048 176 34 21460 "
	5	" 0 140 1656 380 71026 41523 "
	6	" 238 255 56 2967 35040 33972 43340 "
	7	" 8628 295 812 370 1546 65 0 23 "
	8	" 328 7142 272 370 715 4680 1286 876 707 "
	9	" 1192 289 7588 303 103 124 1929 82 52300 924 "
	10	" 509 0 304 10057 836 0 806 6124 1328 45060 1132 "
	11	" 607 43 47 105 5067 0 0 0 863 56 221 189 "
	12	" 0 301 43 0 0 2141 279 0 0 475 32 0 27331 "
	13	" 167 88 393 141 1487 366 3364 545 193 140 538 162 5087 1030 "
	14	" 34 0 42 421 0 0 346 3233 0 0 61 718 31469 35230 1626 "
	15	" 2841 308 69 647 711 76 0 346 1297 278 124 413 193 49 200 0 "
	16	" 195 2491 229 114 57 356 73 12 114 945 197 0 8 74 42 9 2449 "
	17	" 286 295 1514 350 199 128 640 63 66 257 565 175 42 15 241 41 31892 2201 "
	18	" 352 19 175 3379 195 32 0 441 246 85 129 1259 106 0 126 176 4155 62775 2262 "
	19	" 190 36 58 114 2112 0 0 0 0 51 81 158 201 0 114 51 2926 203 490 116 "
	20	" 37 204 30 71 0 1701 355 109 35 444 1 0 27 114 56 21 205 1284 335 79 21842 "
	21	" 81 99 218 95 183 0 4067 30 94 182 10 76 164 61 192 0 617 512 2569 361 57041 44793 "
	22	" 54 30 30 239 134 158 0 2062 10 30 35 370 101 0 70 141 263 0 183 1574 32490 33996 32457 "
	23	" 1891 0 623 93 0 147 671 0 46674 151 12628 0 11 0 0 134 8237 543 0 277 818 47 0 0 "
	24	" 701 549 1184 0 0 246 241 87 5836 1540 12311 0 6 41 48 0 452 5598 739 0 16 841 253 0 40388 "
	25	" 854 120 2602 57 54 69 359 0 13337 47 37725 91 0 31 105 0 0 660 5014 399 118 0 2656 0 82443 40802 "
	26	" 695 0 735 893 81 28 0 661 12916 0 6008 2384 89 35 60 56 1344 0 484 9142 0 0 0 1483 85032 87710 53112 "
	27	" 208 39 0 46 600 0 0 0 19 0 0 55 7884 0 1512 386 2427 200 95 0 3069 0 0 0 2011 0 15 0 "
	28	" 35 133 6 0 0 387 59 0 0 142 42 0 365 3634 769 272 79 813 191 114 0 1470 0 70 95 1012 0 0 17551 "
	29	" 0 15 74 0 97 91 378 52 27 44 46 8 876 732 2298 588 106 83 604 90 286 0 1947 0 0 70 707 0 33878 14863 "
	30	" 63 0 14 229 8 0 114 484 67 48 0 147 280 278 720 3849 349 0 160 1407 0 0 0 1951 0 3 43 1427 22703 32337 15002 "
	31	" 1304 155 0 389 408 75 0 79 444 170 0 236 197 11 45 0 2595 59 234 256 149 35 74 60 51 0 0 143 109 12 0 27 "
	32	" 120 2602 73 69 0 258 160 112 46 821 22 78 0 43 18 0 158 647 151 46 14 149 84 17 1 119 23 2 0 42 7 20 2320 "
	33	" 0 168 893 221 158 73 415 109 0 180 336 209 35 1 131 44 138 148 1538 143 107 83 168 39 1 91 217 0 0 26 55 14 23280 3052 "
	34	" 117 9 91 3406 173 5 0 311 55 62 40 1017 39 0 16 75 274 0 113 787 57 14 76 93 0 16 26 138 10 0 1 44 3660 28072 2533 "
	35	" 450 59 100 310 7741 0 0 0 225 220 182 557 1008 0 588 153 639 41 127 145 2469 39 211 190 150 0 48 78 625 0 97 61 1324 82 245 122 "
	36	" 28 466 94 52 0 6013 75 0 50 1265 106 0 0 452 240 0 47 248 183 0 14 2010 303 164 55 277 0 61 0 333 64 62 86 670 189 0 17008 "
	37	" 130 336 356 168 0 401 16072 0 103 537 357 370 656 161 817 0 379 93 512 228 0 428 10166 0 0 195 789 0 0 83 543 33 0 379 1907 42 47381 29661 "
	38	" 89 80 65 525 0 0 0 7268 98 211 0 1307 86 67 226 484 125 11 108 230 58 130 0 2312 0 0 130 252 29 17 64 381 133 0 145 799 30850 26704 28871 "
	39	" 285 65 23 253 446 24 106 6 2230 278 0 315 177 0 65 33 468 34 17 54 163 23 55 33 366 0 0 0 145 26 0 28 1661 180 0 104 2231 92 0 278 "
	40	" 28 1227 58 189 12 521 129 103 53 5470 0 0 37 87 33 21 31 344 83 26 64 236 268 30 0 941 162 1 0 80 13 28 0 1655 105 0 0 2186 744 149 19297 "
	41	" 27 356 299 139 176 0 843 160 0 684 3262 829 29 45 241 0 110 65 309 60 76 115 522 18 0 0 1073 0 0 2 82 0 0 67 1559 183 750 315 5134 73 44365 23295 "
	42	" 92 205 66 1727 96 190 0 728 0 13 0 7147 0 0 47 96 0 0 89 555 60 34 0 335 244 0 0 1432 18 0 18 105 59 0 63 2203 356 0 0 2632 28434 37047 23095 "
	43	" 318 54 33 115 3527 41 76 0 518 181 0 64 23970 0 1303 260 576 75 64 47 821 131 0 0 179 0 0 0 4505 11 14 274 764 39 80 51 6746 0 0 30 1310 0 179 0 "
	44	" 27 179 23 44 3 2249 0 308 11 354 78 34 330 12669 395 164 61 157 53 32 75 413 144 0 0 108 75 38 251 3338 87 38 51 294 54 0 0 4666 0 0 0 797 0 0 22326 "
	45	" 20 26 113 25 429 137 2071 322 0 22 220 58 3262 1931 2537 1548 21 38 128 48 121 44 321 57 0 41 121 25 34 50 1723 0 34 0 336 11 1230 167 5933 77 0 0 790 43 45141 19340 "
	46	" 76 42 6 207 135 150 294 2554 64 143 0 486 810 110 539 13791 171 2 57 142 0 0 134 537 0 31 5 200 0 58 22 3459 129 0 7 388 0 0 0 5346 0 31 0 1160 31707 35610 22203 "
	47	" 18 407 23 0 0 68 19 36 42 165 0 0 2 88 44 2 117 3381 122 0 0 99 45 6 0 290 36 0 0 266 30 38 18 159 13 0 1 58 142 23 0 80 5 0 3 144 35 0 "
	48	" 33 0 23 658 24 44 108 126 0 20 64 327 60 14 66 133 254 286 87 4548 15 0 8 90 141 9 0 754 142 1 81 288 33 0 33 255 39 24 0 112 18 0 29 130 37 14 20 166 53555 "
	49	" 277 164 108 290 6514 235 482 1018 165 446 8 1100 435 12 319 0 838 111 227 157 5890 0 507 0 340 0 64 3 320 0 0 0 245 73 90 109 6631 419 0 627 412 59 338 125 825 102 176 201 47 59 "
	50	" 51 577 50 66 169 4821 1421 355 54 2047 24 106 112 72 80 62 188 439 166 46 0 5279 0 0 0 368 104 23 0 445 72 0 30 239 87 0 264 4869 738 374 122 466 103 38 4 415 40 126 541 37 22923 "
	51	" 110 82 145 163 1203 0 14459 754 24 1451 151 763 183 30 477 38 0 233 599 0 273 0 12183 0 111 219 802 0 707 0 0 0 110 158 176 42 520 675 20335 0 0 499 1107 178 0 0 564 0 146 0 76141 40261 "
	52	" 112 181 54 602 1180 581 0 5578 112 651 68 1954 0 31 157 150 297 90 115 657 0 135 0 5714 0 0 0 679 0 0 41 578 87 3 74 288 631 521 937 5109 167 107 21 611 147 31 96 454 0 834 31553 32600 44414 "
	53	" 31 241 33 0 45 319 0 86 16 1649 42 219 33 130 46 2 125 576 73 12 0 102 64 24 110 1890 226 0 0 317 18 43 0 51 24 0 56 410 279 66 0 774 82 0 50 219 33 124 1297 0 172 1595 327 77 "
	54	" 20 14 49 22 39 18 92 0 48 39 549 56 30 14 95 20 79 42 201 130 28 0 142 1 270 84 1199 29 74 19 105 19 10 3 27 6 35 31 67 0 8 14 446 0 13 9 56 41 166 229 174 47 576 33 341 "
	55	" 43 8 28 397 156 20 280 403 108 352 75 2043 29 21 59 187 145 43 89 989 22 0 91 211 106 0 104 2711 123 20 35 305 27 19 7 86 180 46 200 423 72 80 88 1072 108 58 112 224 0 2135 495 0 187 2090 61046 387 "
	56	" 123 9 9 101 615 6 102 140 180 69 106 0 6752 231 1116 418 193 9 104 167 205 38 23 15 0 83 4 103 54777 7485 8703 8464 67 34 41 0 611 88 0 90 49 0 18 92 6666 153 0 364 159 265 4644 80 0 186 96 51 168 "
	57	" 12 70 0 0 23 155 11 48 3 70 0 0 70 742 186 61 38 346 27 46 25 170 6 0 0 117 17 14 220 2693 284 0 5 27 8 0 0 162 124 20 3 48 23 0 28 957 87 0 3979 750 23 924 0 0 574 154 61 1268 "
	58	" 59 45 80 75 192 81 637 163 28 0 74 99 1733 57 3345 832 131 42 198 55 181 6 226 149 53 0 94 204 14044 5603 27723 9664 97 0 37 34 172 50 1000 141 1 32 254 0 523 72 2117 491 102 84 1377 107 5207 276 111 744 201 47609 814 "
	59	" 9 17 12 85 47 14 107 170 21 21 26 101 326 48 262 910 73 74 23 359 0 0 55 201 17 0 20 146 234 0 387 2714 16 0 5 30 82 2 39 186 30 18 5 66 172 13 125 928 625 4904 160 0 206 991 125 212 787 1638 32469 1494 "
	60	" 0.0282483 0.0206292 0.0319075 0.0182494 0.0168831 0.0159757 0.0058938 0.0144022 0.0135116 0.0190724 0.0118542 0.0136325 0.0093705 0.0199714 0.0218874 0.0174818 "
	61	" 0.0136792 0.0143825 0.0337043 0.0116006 0.0177685 0.0150006 0.0058835 0.0176118 0.0061893 0.0087184 0.0084944 0.0054224 0.0080368 0.0173529 0.0373569 0.0150280 "
	62	" 0.0311168 0.0246045 0.0388972 0.0251865 0.0179100 0.0212765 0.0059683 0.0199671 0.0184506 0.0176209 0.0132786 0.0115579 0.0083782 0.0137699 0.0265260 0.0136025 "
	63	" 0.0159995 0.0132055 0.0133496 0.0159777 0.0043280 0.0171276 0.0119089 0.0124708 0.0109899 0.0085271 0.0195872 0.0141357 0.0190797 "
	64	" AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT "
	65	" CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT "
	66	" GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT "
	67	" TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT "
	68	" S_ij = S_ji and PI_i based on the empirical codon matrix: "
	69	" A Schneider, GM Cannarozzi and GH Gonnet. Empirical codon "
	70	" substitution matrix. BMC Bioinformatics 6:134. 2005. "
	71

+134

-0

libs/phylogeny/allTrees.cpp less more

	0	// $Id: allTrees.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "allTrees.h"
	4	#include "treeUtil.h"
	5	#include "treeIt.h"
	6	#include "bblEM.h"
	7	#include <algorithm>
	8	#include <iostream>
	9
	10	#include "someUtil.h"
	11
	12	using namespace std;
	13	#ifndef VERBOS
	14	#define VERBOS
	15	#endif
	16
	17
	18	allTrees::allTrees(bool keepAllTrees) : _keepAllTrees(keepAllTrees) {
	19	_bestScore = VERYSMALL;
	20	}
	21
	22	void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
	23	tree& starT,
	24	vector<int>& idList){
	25	sequenceContainer::constTaxaIterator tIt;
	26	sequenceContainer::constTaxaIterator tItEnd;
	27	tIt.begin(*sc);
	28	tItEnd.end(*sc);
	29	while(tIt != tItEnd) {
	30	idList.push_back(tIt->id());
	31	++tIt;
	32	}
	33	if (sc->numberOfSeqs()<3) errorMsg::reportError(" searching a tree for number of sequences < 3 ");
	34	starT.createRootNode();
	35	starT.createNode(starT.getRoot(),1);
	36	starT.createNode(starT.getRoot(),2);
	37	starT.createNode(starT.getRoot(),3);
	38
	39	const string nameOfSeq1 = (*sc)[idList[idList.size()-1]].name();
	40	const string nameOfSeq2 = (*sc)[idList[idList.size()-2]].name();
	41	const string nameOfSeq3 = (*sc)[idList[idList.size()-3]].name();
	42	idList.pop_back();
	43	idList.pop_back();
	44	idList.pop_back();
	45
	46	starT.getRoot()->getSon(0)->setName(nameOfSeq1);
	47	starT.getRoot()->getSon(1)->setName(nameOfSeq2);
	48	starT.getRoot()->getSon(2)->setName(nameOfSeq3);
	49	starT.createFlatLengthMatrix();
	50	}
	51
	52	void allTrees::recursiveFind( const sequenceContainer* sc,
	53	const stochasticProcess* sp,
	54	const Vdouble * weights,
	55	const int maxIterations,
	56	const MDOUBLE epsilon){
	57	tree starT;
	58	vector<int> ids;
	59	get3seqTreeAndIdLeftVec(sc,starT,ids);
	60	recursiveFind(starT,sp,sc,ids,weights,maxIterations,epsilon);
	61	}
	62
	63	tree getAnewTreeFrom(const tree& et, tree::nodeP & mynode,
	64	vector<int> & idLeft, const string& nameToAdd) {
	65	tree newT = et;
	66	tree::nodeP mynodeInNewTree = newT.findNodeByName(mynode->name());
	67	// int NameToAdd = idLeft[idLeft.size()-1];
	68	idLeft.pop_back();
	69	tree::nodeP fatherNode = mynodeInNewTree->father();
	70	tree::nodeP newInternalNode = newT.createNode(fatherNode, newT.getNodesNum());
	71	mynodeInNewTree->setFather(newInternalNode);
	72	newInternalNode->setSon(mynodeInNewTree);
	73
	74	fatherNode->removeSon(mynodeInNewTree);
	75	tree::nodeP newOTU= newT.createNode(newInternalNode, newT.getNodesNum());;
	76	//string nameX = (*sc)[NameToAdd].name();
	77	newOTU->setName(nameToAdd);
	78	newOTU->setDisToFather(tree::FLAT_LENGTH_VALUE);
	79	newInternalNode->setDisToFather(tree::FLAT_LENGTH_VALUE);
	80	newT.create_names_to_internal_nodes();
	81
	82	return newT;
	83	}
	84
	85	void allTrees::recursiveFind(tree et,
	86	const stochasticProcess& sp,
	87	const sequenceContainer& sc,
	88	vector<int> idLeft,
	89	const Vdouble * weights,
	90	const int maxIterations,
	91	const MDOUBLE epsilon) {
	92
	93	if (idLeft.empty()) {
	94	//static int k=1; k++;
	95	MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights);
	96	if (_keepAllTrees) {
	97	_allPossibleTrees.push_back(et);
	98	_allPossibleScores.push_back(treeScore);
	99	}
	100	LOG(5,<<".");
	101	//LOG(5,<<"tree: "<<k<<" l= "<<treeScore<<endl);
	102	if (treeScore > _bestScore) {
	103	//LOG(5,<<"new Best score!"<<endl);
	104	_bestTree = et;
	105	_bestScore = treeScore;
	106	}
	107	} else {
	108	treeIterTopDown tIt(et);
	109	tree::nodeP mynode = tIt.first();
	110	mynode = tIt.next(); // skipping the root
	111	for (; mynode != tIt.end(); mynode = tIt.next()) {
	112	int NameToAdd = idLeft[idLeft.size()-1];
	113	tree newT = getAnewTreeFrom(et,mynode,idLeft,sc[NameToAdd].name());
	114	recursiveFind(newT,sp,sc,idLeft,weights,maxIterations,epsilon);
	115	idLeft.push_back(NameToAdd);
	116	}
	117	}
	118	}
	119
	120	MDOUBLE allTrees::evalTree( tree& et,
	121	const stochasticProcess& sp,
	122	const sequenceContainer& sc,
	123	const int maxIterations,
	124	const MDOUBLE epsilon,
	125	const Vdouble * weights) {
	126	bblEM bblEM1(et,sc,sp,weights,maxIterations,epsilon);
	127	MDOUBLE res =bblEM1.getTreeLikelihood();
	128	return res;
	129	}
	130
	131
	132
	133

+68

-0

libs/phylogeny/allTrees.h less more

	0	// $Id: allTrees.h 1731 2007-02-26 13:45:23Z itaymay $
	1
	2	#ifndef ___ALL_TREES
	3	#define ___ALL_TREES
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include <vector>
	10	using namespace std;
	11
	12	void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
	13	tree& starT,
	14	vector<int>& idList);
	15
	16	tree getAnewTreeFrom( const tree& et,
	17	tree::nodeP & mynode,
	18	vector<int> & idLeft,
	19	const string& nameToAdd);
	20	class allTrees {
	21	public:
	22	explicit allTrees(bool keepAllTrees = false);
	23	MDOUBLE getBestScore() {return _bestScore;}
	24	tree getBestTree() {return _bestTree;}
	25
	26	void getAllTreesAndLikelihoods(vector<tree>& resTree,VdoubleRep & scores) {
	27	resTree = _allPossibleTrees;
	28	scores = _allPossibleScores;
	29	}
	30
	31	void recursiveFind( tree et,
	32	const stochasticProcess& sp,
	33	const sequenceContainer& sc,
	34	vector<int> idLeft,
	35	const Vdouble * weights = NULL,
	36	const int maxIterations=1000,
	37	const MDOUBLE epsilon=0.05);
	38
	39	void recursiveFind( const sequenceContainer* sc,
	40	const stochasticProcess* sp,
	41	const Vdouble * weights = NULL,
	42	const int maxIterations=1000,
	43	const MDOUBLE epsilon=0.05); // one tree.
	44
	45
	46
	47	private:
	48	tree _bestTree;
	49	MDOUBLE _bestScore;
	50	vector<tree> _allPossibleTrees;
	51	vector<doubleRep> _allPossibleScores;
	52	const bool _keepAllTrees;
	53
	54
	55	MDOUBLE evalTree(tree& et,
	56	const stochasticProcess& sp,
	57	const sequenceContainer& sc,
	58	const int maxIterations,
	59	const MDOUBLE epsilon,
	60	const Vdouble * weights = NULL);
	61
	62
	63
	64
	65	};
	66	#endif
	67

+83

-0

libs/phylogeny/allTreesSeparateModel.cpp less more

	0	// $Id: allTreesSeparateModel.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "treeIt.h"
	4	#include "allTreesSeparateModel.h"
	5	#include "bblEMSeperate.h"
	6	#include <algorithm>
	7	#include <iostream>
	8
	9	#include "someUtil.h"
	10
	11	using namespace std;
	12	#ifndef VERBOS
	13	#define VERBOS
	14	#endif
	15
	16
	17	allTreesSeparateModel::allTreesSeparateModel(){
	18	_bestScore = VERYSMALL;
	19	}
	20
	21	void allTreesSeparateModel::recursiveFind( const vector<sequenceContainer>* sc,
	22	const vector<stochasticProcess>* sp,
	23	const vector<Vdouble* > * weights,
	24	const int maxIterations,
	25	const MDOUBLE epsilon){
	26	tree starT;
	27	vector<int> ids;
	28	get3seqTreeAndIdLeftVec(&(*sc)[0],starT,ids);
	29	recursiveFind(starT,sp,sc,ids,weights,maxIterations,epsilon);
	30	}
	31
	32	void allTreesSeparateModel::recursiveFind(tree et,
	33	const vector<stochasticProcess>& sp,
	34	const vector<sequenceContainer>& sc,
	35	vector<int> idLeft,
	36	const vector<Vdouble* > * weights,
	37	const int maxIterations,
	38	const MDOUBLE epsilon) {
	39
	40	if (idLeft.empty()) {
	41	//static int k=1; k++;
	42	MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights);
	43	//LOG(5,<<"tree: "<<k<<" l= "<<treeScore<<endl);
	44	LOG(5,<<".");
	45	if (treeScore > _bestScore) {
	46	//LOG(5,<<"new Best score!"<<endl);
	47	_bestTree = et;
	48	_bestScore = treeScore;
	49	_treeVecBest = _treeVecTmp; // keep the seperate trees too.
	50	}
	51	} else {
	52	et.create_names_to_internal_nodes();
	53	treeIterTopDown tIt(et);
	54	tree::nodeP mynode = tIt.first();
	55	mynode = tIt.next(); // skipping the root
	56	for (; mynode != tIt.end(); mynode = tIt.next()) {
	57	int NameToAdd = idLeft[idLeft.size()-1];
	58	tree newT = getAnewTreeFrom(et,mynode,idLeft,sc[0][NameToAdd].name());
	59	recursiveFind(newT,sp,sc,idLeft,weights,maxIterations,epsilon);
	60	idLeft.push_back(NameToAdd);
	61	}
	62	}
	63	}
	64
	65	MDOUBLE allTreesSeparateModel::evalTree( tree& et,
	66	const vector<stochasticProcess>& sp,
	67	const vector<sequenceContainer>& sc,
	68	const int maxIterations,
	69	const MDOUBLE epsilon,
	70	const vector<Vdouble* > * weights) {
	71	MDOUBLE res = 0;
	72	vector<tree> tVec;
	73	for (int k=0; k < sc.size(); ++k ) tVec.push_back(et);
	74	bblEMSeperate bblemsep1(tVec,sc,sp,weights,maxIterations,epsilon);
	75	res = bblemsep1.getTreeLikelihood();
	76	_treeVecTmp = tVec;
	77	return res;
	78	}
	79
	80
	81
	82

+76

-0

libs/phylogeny/allTreesSeparateModel.h less more

	0	// $Id: allTreesSeparateModel.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___ALL_TREES_SEPARATE_MODEL
	3	#define ___ALL_TREES_SEPARATE_MODEL
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include <vector>
	10	using namespace std;
	11
	12	void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
	13	tree& starT,
	14	vector<int>& idList);
	15
	16	tree getAnewTreeFrom( const tree& et,
	17	tree::nodeP & mynode,
	18	vector<int> & idLeft,
	19	const string& nameToAdd);
	20
	21
	22	class allTreesSeparateModel {
	23	public:
	24	explicit allTreesSeparateModel();
	25	MDOUBLE getBestScore() {return _bestScore;}
	26	tree getBestTree() {return _bestTree;}
	27
	28	void recursiveFind(tree et,
	29	const vector<stochasticProcess>& sp,
	30	const vector<sequenceContainer>& sc,
	31	vector<int> idLeft,
	32	const vector<Vdouble* > * weights=NULL,
	33	const int maxIterations=1000,
	34	const MDOUBLE epsilon=0.05);
	35
	36	void recursiveFind( const vector<sequenceContainer>* sc,
	37	const vector<stochasticProcess>* sp,
	38	const vector<Vdouble* > * weights= NULL,
	39	const int maxIterations=1000,
	40	const MDOUBLE epsilon=0.05); // one tree.
	41
	42	vector<tree> getTreeVecBest() {return _treeVecBest;}
	43
	44	private:
	45	tree _bestTree;
	46	MDOUBLE _bestScore;
	47	vector<tree> _treeVecTmp; // same tree topologies, diff branch lengths
	48	vector<tree> _treeVecBest;// same tree topologies, diff branch lengths
	49
	50
	51	MDOUBLE evalTree( tree& et,
	52	const vector<stochasticProcess>& sp,
	53	const vector<sequenceContainer>& sc,
	54	const int maxIterations,
	55	const MDOUBLE epsilon,
	56	const vector<Vdouble* > * weights = NULL);
	57
	58	};
	59	#endif
	60
	61	// const stochasticProcess* _sp;
	62	//const sequenceContainer* _sc;
	63	//const Vdouble * _weights;
	64
	65	//vector<tree> getBestTreesSep() {return _bestSepTrees;}
	66	//vector<tree> _bestSepTrees;
	67	//vector<tree> _tmpSepTrees;
	68	//vector<tree> recursiveFindSep(const vector<sequenceContainer>* sc,
	69	// const vector<stochasticProcess>* sp,
	70	// const vector<Vdouble > weights,
	71	// const int maxIterations=1000,
	72	// const MDOUBLE epsilon=0.05); // sep model
	73	//const vector<sequenceContainer>* _scVec;
	74	//vector<stochasticProcess>* _spVec; // not const, so in proportional for example it can be changed.
	75	//const vector<Vdouble > _weightsVec;

+56

-0

libs/phylogeny/alphaTrivialAccelerator.h less more

	0	// $Id: alphaTrivialAccelerator.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___ALPHA_TRIVIAL_ACCELERATOR
	3	#define ___ALPHA_TRIVIAL_ACCELERATOR
	4
	5	#include "pijAccelerator.h"
	6	#include "readDatMatrix.h"
	7	class alphaTrivialAccelerator : public pijAccelerator {
	8	public:
	9
	10	explicit alphaTrivialAccelerator(pupAll* pb, const MDOUBLE alpha) :
	11	_pb(static_cast<pupAll *> (pb->clone())),
	12	_alpha(alpha)
	13	{};
	14
	15	alphaTrivialAccelerator(const alphaTrivialAccelerator& other):
	16	_pb(NULL),
	17	_alpha(other._alpha) {
	18	if (other._pb != NULL)
	19	_pb = static_cast<pupAll *>(other._pb->clone());
	20	}
	21
	22	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_tAlpha(i,j,d,_alpha);}
	23
	24	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt(i,j,d,_alpha);};
	25
	26	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt2(i,j,d,_alpha);};
	27
	28	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const {return _pb->Pij_tAlpha(i,j,d,alpha);}
	29
	30	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt(i,j,d,alpha);};
	31
	32	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt2(i,j,d,alpha);};
	33
	34	const MDOUBLE freq(const int i) const{return _pb->freq(i);}
	35
	36	virtual pijAccelerator* clone() const { return new alphaTrivialAccelerator(*this);}
	37
	38	virtual ~alphaTrivialAccelerator() {delete _pb;}
	39
	40	virtual const int alphabetSize() const {return _pb->alphabetSize();}
	41
	42	virtual replacementModel* getReplacementModel() const {
	43	return (static_cast<replacementModel * const>(_pb));
	44	}
	45
	46	const MDOUBLE alpha(void) const {return _alpha;}
	47	void setAlpha(const MDOUBLE alpha) {_alpha=alpha;}
	48
	49	private:
	50	pupAll* _pb;
	51	MDOUBLE _alpha;
	52	};
	53
	54	#endif
	55

+7

-0

libs/phylogeny/alphabet.cpp less more

	0	// $Id: alphabet.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "alphabet.h"
	3
	4	alphabet::~alphabet(){}
	5	// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
	6	// assignment

+32

-0

libs/phylogeny/alphabet.h less more

	0	// $Id: alphabet.h 1901 2007-03-15 13:21:06Z nimrodru $
	1
	2	// version 1.01
	3	// last modified 1 Jan 2004
	4
	5	#ifndef ___ALPHABET_H
	6	#define ___ALPHABET_H
	7
	8	#include <string>
	9	#include <vector>
	10	using namespace std;
	11
	12	class alphabet {
	13	public:
	14	virtual int relations(const int charInSeq, const int charToCheck) const =0;
	15	virtual int fromChar(const string& seq,const int pos) const =0;
	16	virtual string fromInt(const int in_id) const =0;
	17	virtual int size() const =0;
	18	virtual ~alphabet()=0;
	19	virtual int unknown() const =0;
	20	virtual int gap() const =0;
	21	virtual alphabet* clone() const = 0;
	22	virtual int stringSize() const =0;
	23	virtual vector<int> fromString(const string& str) const =0;
	24
	25	// "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	26	virtual bool isSpecific(const int in_id) const =0;
	27
	28	};
	29
	30	#endif
	31

+152

-0

libs/phylogeny/amino.cpp less more

	0	// $Id: amino.cpp 2414 2007-10-08 14:34:42Z adist $
	1
	2	#include "amino.h"
	3
	4	//VVint amino::_relation;
	5
	6	amino::amino() {
	7	_relation.resize(24); // relation should realy be an allocted, two dimentional array, not a vector.
	8	for (int i=0; i < _relation.size(); ++i) { // this implementation would be much faster. with some c-tricks, this checkup could be done with one access only.
	9	_relation[i].resize(20);
	10	}
	11
	12	for (int k=-2;k<=21;++k){
	13	for (int j=0;j<20;++j){
	14	_relation[k+2][j]=relations_internal(k,j);
	15	}
	16	}
	17	}
	18
	19	int amino::fromChar(const char s) const{
	20	switch (s) {
	21	case 'A' : case'a' : return 0 ; break;
	22	case 'R' : case'r' : return 1 ; break;
	23	case 'N' : case'n' : return 2 ; break;
	24	case 'D' : case'd' : return 3 ; break;
	25	case 'C' : case'c' : return 4 ; break;
	26	case 'Q' : case'q' : return 5 ; break;
	27	case 'E' : case'e' : return 6 ; break;
	28	case 'G' : case'g' : return 7 ; break;
	29	case 'H' : case'h' : return 8 ; break;
	30	case 'I' : case'i' : return 9 ; break;
	31	case 'L' : case'l' : return 10; break;
	32	case 'K' : case'k' : return 11; break;
	33	case 'M' : case'm' : return 12; break;
	34	case 'F' : case'f' : return 13; break;
	35	case 'P' : case'p' : return 14; break;
	36	case 'S' : case's' : return 15; break;
	37	case 'T' : case't' : return 16; break;
	38	case 'W' : case'w' : return 17; break;
	39	case 'Y' : case'y' : return 18; break;
	40	case 'V' : case'v' : return 19; break;
	41	case 'B' : case'b' : return 20 ; break; // aspartate(D) or asparagine(N)
	42	case 'Z' : case'z' : return 21 ; break; // glutamate (E) or glutamine(Q)
	43	case '-' : case'_' : return -1; break;
	44	case '?' : case'*' : return -2; break;
	45	case 'x' : case'X' : return -2; break;
	46	case '.' : return -3; break;
	47	default:
	48	vector<string> err;
	49	err.push_back(" The amino-acid sequences contained the character: ");
	50	err[0]+=s;
	51	err.push_back(" Amino acid was not one of the following: ");
	52	err.push_back(" A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, X, Z, -, ?");
	53	err.push_back(" a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, x, z, _, *");
	54	errorMsg::reportError(err);
	55	}// end of switch
	56	return -99; // never suppose to be here.
	57	}// end of function
	58
	59	vector<int> amino::fromString(const string &str) const {
	60	vector<int> vec;
	61	for (int i=0;i<str.size();i++)
	62	vec.push_back(fromChar(str[i]));
	63	return vec;
	64	}
	65
	66	string amino::fromInt(const int in_id) const{
	67	char res = 0;
	68	switch (in_id) {
	69	case 0 : res = 'A' ; break;
	70	case 1 : res = 'R' ; break;
	71	case 2 : res = 'N' ; break;
	72	case 3 : res = 'D' ; break;
	73	case 4 : res = 'C' ; break;
	74	case 5 : res = 'Q' ; break;
	75	case 6 : res = 'E' ; break;
	76	case 7 : res = 'G' ; break;
	77	case 8 : res = 'H' ; break;
	78	case 9 : res = 'I' ; break;
	79	case 10: res = 'L' ; break;
	80	case 11: res = 'K' ; break;
	81	case 12: res = 'M' ; break;
	82	case 13: res = 'F' ; break;
	83	case 14: res = 'P' ; break;
	84	case 15: res = 'S' ; break;
	85	case 16: res = 'T' ; break;
	86	case 17: res = 'W' ; break;
	87	case 18: res = 'Y' ; break;
	88	case 19: res = 'V' ; break;
	89	case 20: res = 'B' ; break;
	90	case 21: res = 'Z' ; break;
	91	case -1: res = '-' ; break;
	92	case -2: res = 'X' ; break;
	93	case -3: res = '.' ; break;
	94	default:
	95	vector<string> err;
	96	err.push_back(" unable to print amino ac_id. amino ac_id was not one of the following: ");
	97	err.push_back("A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, Z, -, ?");
	98	err.push_back("a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, z, _, *");
	99	errorMsg::reportError(err);
	100	}//end of switch
	101	string vRes;
	102	vRes.append(1,res);
	103	return vRes;
	104	}// end of function
	105
	106	int amino::relations(const int charInSeq, const int charToCheck) const{
	107	if (charInSeq == -1) {
	108	errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions");
	109	}
	110	return _relation[charInSeq+2][charToCheck];// <-MATAN, HERE YOU SWITHCED THE ORDER...
	111	}
	112
	113	int amino::fromChar(const string& str, const int pos) const{
	114	return fromChar(str[pos]);
	115	}
	116
	117	int amino::relations_internal(const int charInSeq, const int charToCheck) const{
	118	if (charInSeq == charToCheck) return 1;
	119	else if (charInSeq == fromChar('?')) return 1;
	120	else if ((charInSeq == fromChar('B')) &&
	121	((charToCheck == fromChar('N')) \|\|
	122	(charToCheck == fromChar('D')))) return 1; // B is either N or D
	123	else if ((charInSeq == fromChar('Z')) &&
	124	((charToCheck == fromChar('Q')) \|\|
	125	(charToCheck == fromChar('E')))) return 1; // Z is either E or Q
	126	return 0;
	127	}
	128
	129
	130	vector<int> aminoUtility::codonOf(const int a, codon &cod){
	131	vector<int> codons;
	132	amino amin;
	133	string strAmino=amin.fromInt(a);
	134	map <string, string> genCode=cod.geneticCode();
	135	map <string, string>::iterator it=genCode.begin();
	136	int tmp2=genCode.size();
	137	while (it!=genCode.end()){
	138	string tmp=(*it).second;
	139	if ((*it).second==strAmino){
	140	string strCodon=(*it).first;
	141	int c=cod.fromChar(strCodon,0);
	142	codons.push_back(c);
	143	}
	144	it++;
	145	}
	146	if (codons.empty()){
	147	cout<<tmp2<<" amino is = "<<a<<endl;
	148	errorMsg::reportError("error in function aminoUtility::codonOf: no codon found for amino acid");
	149	}
	150	return codons;
	151	}

+46

-0

libs/phylogeny/amino.h less more

	0	// $Id: amino.h 1901 2007-03-15 13:21:06Z nimrodru $
	1
	2	#ifndef ____AMINO
	3	#define ____AMINO
	4
	5	#include "definitions.h"
	6	#include "errorMsg.h"
	7	#include "alphabet.h"
	8	#include "geneticCodeHolder.h"
	9	#include "codon.h"
	10
	11
	12	//utility of amino acid
	13	class aminoUtility {
	14	public:
	15
	16	static vector<int> codonOf(const int a, codon &cod); //returns vector of codons that code to a under a specific genetic code.
	17
	18	};
	19
	20	//based on the amino-acid list found in http://www.dur.ac.uk/~dbl0www/Bioinformatics/aminoacids.htm
	21	class amino : public alphabet {
	22	public:
	23	explicit amino();
	24	virtual ~amino() {}
	25	virtual alphabet* clone() const { return new amino(*this); }
	26	int unknown() const {return -2;}
	27	int gap() const {return -1;}
	28	int size() const {return 20;}
	29	int stringSize() const {return 1;} // one letter code.
	30	int relations(const int charInSeq, const int charToCheck) const;
	31	int fromChar(const string& str, const int pos) const;
	32	int fromChar(const char s) const;
	33	string fromInt(const int in_id) const;
	34	vector<int> fromString(const string& str) const;
	35	// "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	36	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	37
	38	private:
	39	int relations_internal(const int charInSeq, const int charToCheck) const;
	40	VVint _relation;
	41	};//end of class
	42
	43	#endif
	44
	45

+201

-0

libs/phylogeny/bblEM.cpp less more

	0	// $Id: bblEM.cpp 9854 2011-09-15 11:36:23Z cohenofi $
	1	#include "bblEM.h"
	2	#include "likelihoodComputation.h"
	3	using namespace likelihoodComputation;
	4	#include "computeUpAlg.h"
	5	#include "computeDownAlg.h"
	6	#include "computeCounts.h"
	7	#include "treeIt.h"
	8	#include "fromCountTableComponentToDistance.h"
	9	#include <ctime>
	10
	11	bblEM::bblEM(tree& et,
	12	const sequenceContainer& sc,
	13	const stochasticProcess& sp,
	14	const Vdouble * weights,
	15	const int maxIterations,
	16	const MDOUBLE epsilon,
	17	const MDOUBLE tollForPairwiseDist,
	18	unObservableData* _unObservableData_p,
	19	const MDOUBLE* likelihoodLast) :
	20	_et(et),_sc(sc),_sp(sp),_weights(weights),_unObservableData_p(_unObservableData_p)
	21	{
	22	time_t ltime1;
	23	time( &ltime1 );
	24	_treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast);
	25	time_t ltime2;
	26	time( &ltime2 );
	27	int t = static_cast<long>(ltime2 - ltime1);
	28	LOG(4,<<"Overall running time for BBL = "<<t<<" sec"<<endl);
	29	}
	30
	31
	32	/********************************************************************************************
	33	*********************************************************************************************/
	34	MDOUBLE bblEM::compute_bblEM(
	35	const int maxIterations,
	36	const MDOUBLE epsilon,
	37	const MDOUBLE tollForPairwiseDist,
	38	const MDOUBLE* likelihoodLast){
	39	allocatePlace();
	40	MDOUBLE oldL=VERYSMALL;
	41	MDOUBLE currL = VERYSMALL;
	42	tree oldT = _et;
	43	for (int i=0; i < maxIterations; ++i) {
	44	time_t ltime1;
	45	time( &ltime1 );
	46	computeUp();
	47	currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
	48	LOGnOUT(4,<<"--- Iter="<<i<<" logL="<<currL<<endl);
	49	if(oldL<=currL){ // make sure not to use tree with lower likelihood then last computed likelihood (before BBL-EM)
	50	if(likelihoodLast){ // likelihood from external model
	51	if(*likelihoodLast<=currL)
	52	oldT = _et; // L didn't go down, update old tree
	53	else{
	54	LOGnOUT(4,<<"Likelihood went down compared pre-BBL oldL="<<*likelihoodLast<<" newL="<<currL<<" Do not update old tree"<<endl);
	55	break;
	56	}
	57	}
	58	else{
	59	oldT = _et; // L didn't go down
	60	LOGnOUT(7,<<"LikelihoodLast was not sent to bblEM"<<endl);
	61	}
	62	}
	63	else
	64	LOGnOUT(4,<<"Likelihood went down oldL="<<oldL<<" newL="<<currL<<" Do not update tree"<<endl);
	65
	66	if (currL < oldL + epsilon) { // need to break
	67	if (currL<oldL) {
	68	_et = oldT;
	69	if(_unObservableData_p)
	70	_unObservableData_p->setLforMissingData(_et,&_sp);
	71	return oldL; // keep the old tree, and old likelihood
	72	} else {
	73	//update the tree and likelihood and return
	74	return currL;
	75	}
	76	}
	77	bblEM_it(tollForPairwiseDist);
	78	oldL = currL;
	79	time_t ltime2;
	80	time( &ltime2 );
	81	int t = static_cast<long>(ltime2 - ltime1);
	82	LOG(6,<<"Time BBL iteration = "<<t<<" sec"<<endl);
	83	}
	84
	85	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	86	computeUp();
	87	currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
	88	if (currL<oldL) {
	89	_et = oldT;
	90	if(_unObservableData_p)
	91	_unObservableData_p->setLforMissingData(_et,&_sp);
	92	return oldL; // keep the old tree, and old likelihood
	93	}
	94	else
	95	return currL;
	96	}
	97
	98
	99	/********************************************************************************************
	100	*********************************************************************************************/
	101	void bblEM::allocatePlace() {
	102	_computeCountsV.resize(_et.getNodesNum()); //initiateTablesOfCounts
	103	for (int i=0; i < _computeCountsV.size(); ++i) {
	104	_computeCountsV[i].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories());
	105	}
	106	_cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
	107	_cdown.allocatePlace(_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
	108	}
	109
	110	/********************************************************************************************
	111	*********************************************************************************************/
	112	void bblEM::bblEM_it(const MDOUBLE tollForPairwiseDist){
	113	//string costTable = "countBBLEMTable.txt";
	114	//ofstream costTableStream(costTable.c_str());
	115	for (int i=0; i < _computeCountsV.size(); ++i) {
	116	_computeCountsV[i].zero();
	117	//_computeCountsV[i].printTable(costTableStream);
	118	}
	119
	120	for (int i=0; i < _sc.seqLen(); ++i) {
	121	computeDown(i);
	122	addCounts(i); // computes the counts and adds to the table.
	123	}
	124
	125	//for (int i=0; i < _computeCountsV.size(); ++i) { // used for Debug - check the need for 'zero()'
	126	// _computeCountsV[i].printTable(costTableStream);
	127	//}
	128
	129	optimizeBranches(tollForPairwiseDist);
	130	if(_unObservableData_p){
	131	_unObservableData_p->setLforMissingData(_et,&_sp);
	132	}
	133	}
	134	/********************************************************************************************
	135	*********************************************************************************************/
	136	void bblEM::optimizeBranches(const MDOUBLE tollForPairwiseDist){
	137	treeIterDownTopConst tIt(_et);
	138	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	139	if (!tIt->isRoot()) {
	140	fromCountTableComponentToDistance from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p);
	141	from1.computeDistance();
	142	mynode->setDisToFather(from1.getDistance());
	143	if(_unObservableData_p){ // needed only before likelihood computation
	144	_unObservableData_p->setLforMissingData(_et,&_sp);
	145	}
	146	}
	147	}
	148	}
	149	/********************************************************************************************
	150	*********************************************************************************************/
	151	void bblEM::computeUp(){
	152	_pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
	153	computeUpAlg cupAlg;
	154	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	155	for (int categor = 0; categor < _sp.categories(); ++categor) {
	156	cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
	157	}
	158	}
	159	}
	160
	161	void bblEM::computeDown(const int pos){
	162	computeDownAlg cdownAlg;
	163	for (int categor = 0; categor < _sp.categories(); ++categor) {
	164	cdownAlg.fillComputeDown(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
	165	}
	166	}
	167	/********************************************************************************************
	168	*********************************************************************************************/
	169	void bblEM::addCounts(const int pos){
	170	//MDOUBLE posProb =
	171	// likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
	172
	173	MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
	174	if (weig == 0) return;
	175	treeIterDownTopConst tIt(_et);
	176	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	177	if (!tIt->isRoot()) {
	178	addCounts(pos,mynode,_posLike[pos],weig);
	179	}
	180	}
	181	}
	182	/********************************************************************************************
	183	*********************************************************************************************/
	184	void bblEM::addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
	185
	186	computeCounts cc;
	187	for (int categor =0; categor< _sp.categories(); ++ categor) {
	188	cc.computeCountsNodeFatherNodeSonHomPos(_sc,
	189	_pij[categor],
	190	_sp,
	191	_cup[pos][categor],
	192	_cdown[categor],
	193	weig,
	194	posProb,
	195	mynode,
	196	_computeCountsV[mynode->id()][categor],
	197	_sp.ratesProb(categor));
	198	}
	199	}
	200

+58

-0

libs/phylogeny/bblEM.h less more

	0	// $Id: bblEM.h 8174 2010-06-20 08:38:12Z cohenofi $
	1	#ifndef ___BBL_EM_H
	2	#define ___BBL_EM_H
	3
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "stochasticProcess.h"
	7	#include "sequenceContainer.h"
	8	#include "countTableComponent.h"
	9	#include "computePijComponent.h"
	10	#include "suffStatComponent.h"
	11	#include "unObservableData.h"
	12
	13	#include <vector>
	14
	15	using namespace std;
	16
	17	class bblEM {
	18	public:
	19	explicit bblEM(tree& et,
	20	const sequenceContainer& sc,
	21	const stochasticProcess& sp,
	22	const Vdouble * weights = NULL,
	23	const int maxIterations=50,
	24	const MDOUBLE epsilon=0.05,
	25	const MDOUBLE tollForPairwiseDist=0.001,
	26	unObservableData* unObservableData_p=NULL,
	27	const MDOUBLE* likelihoodLast=NULL);
	28	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	29
	30	private:
	31	MDOUBLE compute_bblEM(const int maxIterations,
	32	const MDOUBLE epsilon,
	33	const MDOUBLE tollForPairwiseDist,
	34	const MDOUBLE* likelihoodLast=NULL);
	35	void bblEM_it(const MDOUBLE tollForPairwiseDist);
	36	void computeDown(const int pos);
	37	void computeUp();
	38	void addCounts(const int pos);
	39	void addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
	40	void optimizeBranches(const MDOUBLE tollForPairwiseDist);
	41	void allocatePlace();
	42
	43
	44	MDOUBLE _treeLikelihood;
	45	tree& _et;
	46	const sequenceContainer& _sc;
	47	const stochasticProcess& _sp;
	48	vector<countTableComponentGam> _computeCountsV; // for each node - a table of ratealphalph
	49	computePijGam _pij;
	50	suffStatGlobalGam _cup;
	51	suffStatGlobalGamPos _cdown;
	52	const Vdouble * _weights;
	53	VdoubleRep _posLike;
	54	unObservableData* _unObservableData_p;
	55	};
	56
	57	#endif

+181

-0

libs/phylogeny/bblEM2USSRV.cpp less more

	0	// $Id: bblEM2USSRV.cpp 1944 2007-04-18 12:41:14Z osnatz $
	1	#include "bblEM2USSRV.h"
	2
	3	bblEM2USSRV::bblEM2USSRV(tree& et,
	4	const sequenceContainer& sc,
	5	const sequenceContainer& baseSc,
	6	const ussrvModel& model,
	7	const Vdouble * weights,
	8	int maxIterations,
	9	MDOUBLE epsilon,
	10	MDOUBLE tollForPairwiseDist) :
	11	_et(et),_sc(sc),_baseSc(baseSc),_model(model),_weights (weights)
	12	{
	13	LOG(5,<<"****BBL EM USSRV*******"<<endl<<endl);
	14	_treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
	15	}
	16
	17	// @@@@ Need to check if we can make it more efficient
	18	MDOUBLE bblEM2USSRV::compute_bblEM(
	19	int maxIterations,
	20	MDOUBLE epsilon,
	21	MDOUBLE tollForPairwiseDist){
	22
	23	allocatePlace();
	24	MDOUBLE oldL = VERYSMALL;
	25	MDOUBLE currL = VERYSMALL;
	26	tree oldT = _et;
	27	for (int i=0; i < maxIterations; ++i) {
	28	computeUp();
	29	// Calculate the likelihood and fill the _posLike
	30	currL = likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(_et,
	31	_sc,_baseSc,_model,_cupBase,_cupSSRV,_posLike,_weights);
	32	//////////////
	33	LOGDO(5,printTime(myLog::LogFile()));
	34	LOG(5,<<"iteration no "<<i << " in BBL "<<endl);
	35	LOG(5,<<"old best L= "<<oldL<<endl);
	36	LOG(5,<<"current best L= "<<currL<<endl);
	37
	38
	39	if (currL < oldL + epsilon) { // need to break
	40	if (currL<oldL) {
	41	cout<<"****** PROBLEMS IN BBL USSRV*******"<<endl;
	42	LOG(5,<<"old best L= "<<oldL<<endl);
	43	LOG(5,<<"current best L= "<<currL<<endl);
	44	_et = oldT;
	45	return oldL; // keep the old tree, and old likelihood
	46	} else {
	47	//update the tree and likelihood and return
	48	LOG(5,<<"old best L= "<<oldL<<endl);
	49	LOG(5,<<"current best L= "<<currL<<endl);
	50	return currL;
	51	}
	52	}
	53	oldT = _et;
	54	bblEM_it(tollForPairwiseDist);
	55	oldL = currL;
	56	}
	57	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	58	computeUp();
	59	currL = likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(_et,
	60	_sc,_baseSc,_model,_cupBase,_cupSSRV,_posLike,_weights);
	61	if (currL<oldL) {
	62	_et = oldT;
	63	return oldL; // keep the old tree, and old likelihood
	64	}
	65	else
	66	return currL;
	67	}
	68
	69
	70	void bblEM2USSRV::allocatePlace() {
	71	_computeCountsBaseV.resize(_et.getNodesNum()); //initiateTablesOfCounts
	72	_computeCountsSsrvV.resize(_et.getNodesNum()); //initiateTablesOfCounts
	73
	74	for (int i=0; i < _computeCountsBaseV.size(); ++i) {
	75	_computeCountsBaseV[i].countTableComponentAllocatePlace(_model.getBaseModel().alphabetSize(),_model.noOfCategor());
	76	_computeCountsSsrvV[i].countTableComponentAllocatePlace(_model.getSSRVmodel().alphabetSize());
	77	}
	78	_cupBase.allocatePlace(_baseSc.seqLen(),_model.noOfCategor(), _et.getNodesNum(), _baseSc.alphabetSize());
	79	_cupSSRV.allocatePlace(_sc.seqLen(), _et.getNodesNum(), _sc.alphabetSize());
	80
	81	_cdownBase.allocatePlace(_model.noOfCategor(), _et.getNodesNum(), _baseSc.alphabetSize());
	82	_cdownSSRV.allocatePlace( _et.getNodesNum(), _sc.alphabetSize());
	83
	84	}
	85
	86	void bblEM2USSRV::bblEM_it(MDOUBLE tollForPairwiseDist){
	87	for (int i=0; i < _computeCountsBaseV.size(); ++i) {
	88	_computeCountsBaseV[i].zero();
	89	_computeCountsSsrvV[i].zero();
	90	}
	91	for (int i=0; i < _sc.seqLen(); ++i) {
	92	computeDown(i);
	93	addCounts(i); // computes the counts and adds to the table.
	94	}
	95	optimizeBranches(tollForPairwiseDist);
	96	}
	97
	98	// @@@@ need to print the tree
	99	void bblEM2USSRV::optimizeBranches(MDOUBLE tollForPairwiseDist){
	100	treeIterDownTopConst tIt(_et);
	101	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	102	if (!tIt->isRoot()) {
	103	fromCountTableComponentToDistance2USSRV
	104	from1(_computeCountsBaseV[mynode->id()],_computeCountsSsrvV[mynode->id()],_model,tollForPairwiseDist,mynode->dis2father());
	105	from1.computeDistance();
	106	mynode->setDisToFather(from1.getDistance());
	107	}
	108	}
	109	}
	110
	111	void bblEM2USSRV::computeUp(){
	112	_pijBase.fillPij(_et,_model.getBaseModel(),0); // 0 is becaues we compute Pij(t) and not its derivations...
	113	_pijSSRV.fillPij(_et,_model.getSSRVmodel(),0);
	114
	115	computeUpAlg cupAlg;
	116	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	117	// compute up for the base model
	118	for (int categor = 0; categor < _model.noOfCategor(); ++categor) {
	119	cupAlg.fillComputeUp(_et,_baseSc,pos,_pijBase[categor],_cupBase[pos][categor]);
	120	}
	121	// compute up for the ssrv model
	122	cupAlg.fillComputeUp(_et,_sc,pos,_pijSSRV,_cupSSRV[pos]);
	123	}
	124	}
	125
	126	void bblEM2USSRV::computeDown(int pos){
	127	computeDownAlg cdownAlg;
	128	// compute down for the base model
	129	for (int categor = 0; categor < _model.noOfCategor(); ++categor) {
	130	cdownAlg.fillComputeDown(_et,_baseSc,pos,_pijBase[categor],_cdownBase[categor],_cupBase[pos][categor]);
	131	}
	132	// compute down for the ssrv model
	133	cdownAlg.fillComputeDown(_et,_sc,pos,_pijSSRV,_cdownSSRV,_cupSSRV[pos]);
	134	}
	135
	136	void bblEM2USSRV::addCounts(int pos){
	137
	138	MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
	139	if (weig == 0) return;
	140	treeIterDownTopConst tIt(_et);
	141	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	142	if (!tIt->isRoot()) {
	143	addCounts(pos,mynode,_posLike[pos],weig);
	144	}
	145	}
	146	}
	147
	148	void bblEM2USSRV::addCounts(int pos, tree::nodeP mynode, doubleRep posProb, MDOUBLE weig){
	149
	150	computeCounts cc;
	151	int categor;
	152	// base Model
	153	for (categor =0; categor< _model.noOfCategor(); ++categor) {
	154	cc.computeCountsNodeFatherNodeSonHomPos(_baseSc,
	155	_pijBase[categor],
	156	_model.getBaseModel(),
	157	_cupBase[pos][categor],
	158	_cdownBase[categor],
	159	weig,
	160	posProb,
	161	mynode,
	162	_computeCountsBaseV[mynode->id()][categor],
	163	_model.getCategorProb(categor)*(1-_model.getF()));
	164
	165	}
	166	// SSRV model
	167	cc.computeCountsNodeFatherNodeSonHomPos(_sc,
	168	_pijSSRV,
	169	_model.getSSRVmodel(),
	170	_cupSSRV[pos],
	171	_cdownSSRV,
	172	weig,
	173	posProb,
	174	mynode,
	175	_computeCountsSsrvV[mynode->id()],
	176	_model.getF());
	177	}
	178
	179
	180

+73

-0

libs/phylogeny/bblEM2USSRV.h less more

	0	// $Id: bblEM2USSRV.h 1504 2007-01-15 14:04:44Z osnatz $
	1	//copy of bblEM of the codon model + changes
	2	#ifndef ___BBL_EM_2_USSRV
	3	#define ___BBL_EM_2_USSRV
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "stochasticProcess.h"
	8	#include "sequenceContainer.h"
	9	#include "countTableComponent.h"
	10	#include "computePijComponent.h"
	11	#include "suffStatComponent.h"
	12	#include "ussrvModel.h"
	13	#include "computeUpAlg.h"
	14	#include "computeDownAlg.h"
	15	#include "computeCounts.h"
	16	#include "treeIt.h"
	17	#include "fromCountTableComponentToDistance2USSRV.h"
	18	#include "likelihoodComputation2USSRV.h"
	19	#include "someUtil.h"
	20	#include <vector>
	21	using namespace std;
	22	// @@@@ maybe should inherit from bblEM
	23	class bblEM2USSRV {
	24	public:
	25	explicit bblEM2USSRV(tree& et,
	26	const sequenceContainer& sc,
	27	const sequenceContainer& baseSc,
	28	const ussrvModel &model,
	29	const Vdouble * weights = NULL,
	30	const int maxIterations=50,
	31	const MDOUBLE epsilon=0.05,
	32	const MDOUBLE tollForPairwiseDist=0.001);
	33	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	34
	35	private:
	36	MDOUBLE compute_bblEM(int maxIterations,
	37	MDOUBLE epsilon,
	38	MDOUBLE tollForPairwiseDist);
	39	void bblEM_it(MDOUBLE tollForPairwiseDist);
	40	void computeDown(int pos);
	41	void computeUp();
	42	void addCounts(int pos);
	43	void addCounts(int pos, tree::nodeP mynode, doubleRep posProb, MDOUBLE weig);
	44	void optimizeBranches(MDOUBLE tollForPairwiseDist);
	45	void allocatePlace();
	46
	47	MDOUBLE _treeLikelihood;
	48	tree& _et;
	49	const sequenceContainer& _sc;
	50	const sequenceContainer& _baseSc;
	51	const ussrvModel& _model;
	52	vector<countTableComponentGam> _computeCountsBaseV; // for each node - a table of ratealphalph (see below)
	53	vector<countTableComponentHom> _computeCountsSsrvV; // for each node - a table of ratealphalph (see below)
	54	computePijGam _pijBase;
	55	computePijHom _pijSSRV;
	56	suffStatGlobalGam _cupBase;
	57	suffStatGlobalHom _cupSSRV;
	58	suffStatGlobalGamPos _cdownBase;
	59	suffStatGlobalHomPos _cdownSSRV;
	60	const Vdouble * _weights;
	61	VdoubleRep _posLike;
	62	};
	63
	64	// _computeCountsV is a vector containing for each node a countTableComponentGam.
	65	// countTableComponentGam is a vector containing for each rate category a table of size alphabet*alphabet
	66	// (VVdouble) which should be pre-filled with Pij(x,y,rk) from equation (17) in the EM-BBL theory summary.
	67	// Pij(x,y,rk) represents the probability of observing x and y along a branch ti at position j with rate from
	68	// category k.
	69	// For this reason, we need to initialize this class and calculate it again for every position.
	70
	71
	72	#endif // bblEM2USSRV

+165

-0

libs/phylogeny/bblEM2codon.cpp less more

	0	// $Id: bblEM2codon.cpp 2350 2007-08-20 10:53:51Z adist $
	1	#include "bblEM2codon.h"
	2	#include "likelihoodComputation.h"
	3	#include "likelihoodComputation2Codon.h"
	4	#include "fromCountTableComponentToDistance2Codon.h"
	5	using namespace likelihoodComputation;
	6	using namespace likelihoodComputation2Codon;
	7	#include "computeUpAlg.h"
	8	#include "computeDownAlg.h"
	9	#include "computeCounts.h"
	10	#include "treeIt.h"
	11	#include "errorMsg.h"
	12	#include "logFile.h"
	13	#include <ctime>
	14
	15	bblEM2codon::bblEM2codon(tree& et,
	16	const sequenceContainer& sc,
	17	const vector<stochasticProcess>& spVec,
	18	const distribution *in_distr,
	19	const Vdouble * weights,
	20	const int maxIterations,
	21	const MDOUBLE epsilon,
	22	const MDOUBLE tollForPairwiseDist) :
	23	_et(et),_sc(sc),_spVec(spVec),_distr(in_distr->clone()),_weights (weights) {
	24
	25	LOG(5,<<"****BEGIN OF BBL EM*******"<<endl<<endl);
	26	_treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
	27	LOG(5,<<"****END OF BBL EM*******"<<endl<<endl);
	28	}
	29
	30	bblEM2codon::~bblEM2codon(){
	31	delete _distr;
	32	}
	33
	34	MDOUBLE bblEM2codon::compute_bblEM(
	35	const int maxIterations,
	36	const MDOUBLE epsilon,
	37	const MDOUBLE tollForPairwiseDist){
	38	allocatePlace();
	39	MDOUBLE oldL=VERYSMALL;
	40	MDOUBLE currL = VERYSMALL;
	41	tree oldT = _et;
	42	for (int i=0; i < maxIterations; ++i) {
	43
	44	computeUp();
	45	//currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights);
	46	currL = likelihoodComputation2Codon::getTreeLikelihoodFromUp2(_et,_sc,_spVec[0],_cup,_posLike,_distr,_weights);
	47	//////////////
	48	if (i!=0)
	49	LOG(5,<<"last best L= "<<oldL<<endl);
	50	LOG(5,<<"current best L= "<<currL<<endl<<endl);
	51
	52	//MDOUBLE checkUpLL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et, _sc, _sp, _weights);
	53	//cerr << "checkUpLL = "<<checkUpLL <<" curll = "<<currL<<endl;
	54	///////////////
	55
	56	if (currL < oldL + epsilon) { // need to break
	57	if (currL<oldL) {
	58	_et = oldT;
	59	return oldL; // keep the old tree, and old likelihood
	60	} else {
	61	//update the tree and likelihood and return
	62	return currL;
	63	}
	64	}
	65	oldT = _et;
	66	bblEM_it(tollForPairwiseDist);
	67	oldL = currL;
	68	}
	69	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	70	computeUp();
	71	currL = likelihoodComputation2Codon::getTreeLikelihoodFromUp2(_et,_sc,_spVec[0],_cup,_posLike,_distr,_weights);
	72	//currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights);
	73	if (currL<oldL) {
	74	_et = oldT;
	75	return oldL; // keep the old tree, and old likelihood
	76	}
	77	else
	78	return currL;
	79	}
	80
	81	void bblEM2codon::allocatePlace() {
	82	_computeCountsV.resize(_et.getNodesNum()); //initiateTablesOfCounts
	83	for (int i=0; i < _computeCountsV.size(); ++i) {
	84	_computeCountsV[i].countTableComponentAllocatePlace(_spVec[0].alphabetSize(),_distr->categories());
	85	}
	86	_cup.allocatePlace(_sc.seqLen(),_distr->categories(), _et.getNodesNum(), _sc.alphabetSize());
	87	_cdown.allocatePlace(_distr->categories(), _et.getNodesNum(), _sc.alphabetSize());
	88	}
	89
	90	void bblEM2codon::bblEM_it(const MDOUBLE tollForPairwiseDist){
	91	int i;
	92	for (i=0; i < _computeCountsV.size(); ++i) {
	93	_computeCountsV[i].zero();
	94	}
	95	for (i=0; i < _sc.seqLen(); ++i) {
	96	computeDown(i);
	97	addCounts(i); // computes the counts and adds to the table.
	98	}
	99	optimizeBranches(tollForPairwiseDist);
	100	}
	101
	102	void bblEM2codon::optimizeBranches(const MDOUBLE tollForPairwiseDist){
	103	treeIterDownTopConst tIt(_et);
	104	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	105	if (!tIt->isRoot()) {
	106	fromCountTableComponentToDistance2Codon from1(_computeCountsV[mynode->id()],_spVec,tollForPairwiseDist,mynode->dis2father());
	107	from1.computeDistance();
	108	mynode->setDisToFather(from1.getDistance());
	109	}
	110	}
	111	}
	112
	113	void bblEM2codon::computeUp(){
	114	//_pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
	115	_pij._V.resize(_spVec.size());
	116	for (int i=0; i < _spVec.size(); ++i) {
	117	_pij._V[i].fillPij(_et,_spVec[i]);
	118	}
	119	computeUpAlg cupAlg;
	120	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	121	for (int categor = 0; categor < _spVec.size(); ++categor) {
	122	cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
	123	}
	124	}
	125	}
	126
	127	void bblEM2codon::computeDown(const int pos){
	128	computeDownAlg cdownAlg;
	129	for (int categor = 0; categor < _distr->categories(); ++categor) {
	130	cdownAlg.fillComputeDown(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
	131	}
	132	}
	133
	134	void bblEM2codon::addCounts(const int pos){
	135	//MDOUBLE posProb =
	136	// likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
	137
	138	MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
	139	if (weig == 0) return;
	140	treeIterDownTopConst tIt(_et);
	141	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	142	if (!tIt->isRoot()) {
	143	addCounts(pos,mynode,_posLike[pos],weig);
	144	}
	145	}
	146	}
	147
	148	void bblEM2codon::addCounts(const int pos, tree::nodeP mynode, const MDOUBLE posProb, const MDOUBLE weig){
	149
	150	computeCounts cc;
	151	for (int categor =0; categor< _distr->categories(); ++ categor) {
	152	cc.computeCountsNodeFatherNodeSonHomPos(_sc,
	153	_pij[categor],
	154	_spVec[categor],
	155	_cup[pos][categor],
	156	_cdown[categor],
	157	weig,
	158	posProb,
	159	mynode,
	160	_computeCountsV[mynode->id()][categor],
	161	_distr->ratesProb(categor));
	162	}
	163	}
	164

+54

-0

libs/phylogeny/bblEM2codon.h less more

	0	//copy of bblEM of the lib + changing to codon model
	1	#ifndef ___BBL_EM_2_CODON_H
	2	#define ___BBL_EM_2_CODON_H
	3
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "stochasticProcess.h"
	7	#include "sequenceContainer.h"
	8	#include "countTableComponent.h"
	9	#include "computePijComponent.h"
	10	#include "suffStatComponent.h"
	11	#include <vector>
	12	using namespace std;
	13
	14	class bblEM2codon {
	15	public:
	16	explicit bblEM2codon(tree& et,
	17	const sequenceContainer& sc,
	18	const vector<stochasticProcess> &spVec,
	19	const distribution *in_distr,
	20	const Vdouble * weights = NULL,
	21	const int maxIterations=50,
	22	const MDOUBLE epsilon=0.05,
	23	const MDOUBLE tollForPairwiseDist=0.001);
	24	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	25	virtual ~bblEM2codon();
	26	private:
	27	MDOUBLE compute_bblEM(const int maxIterations,
	28	const MDOUBLE epsilon,
	29	const MDOUBLE tollForPairwiseDist);
	30	void bblEM_it(const MDOUBLE tollForPairwiseDist);
	31	void computeDown(const int pos);
	32	void computeUp();
	33	void addCounts(const int pos);
	34	void addCounts(const int pos, tree::nodeP mynode, const MDOUBLE posProb, const MDOUBLE weig);
	35	void optimizeBranches(const MDOUBLE tollForPairwiseDist);
	36	void allocatePlace();
	37
	38
	39	MDOUBLE _treeLikelihood;
	40	tree& _et;
	41	const sequenceContainer& _sc;
	42	const vector<stochasticProcess>& _spVec;
	43	const distribution *_distr;
	44	vector<countTableComponentGam> _computeCountsV; // for each node - a table of ratealphalph
	45	computePijGam _pij;
	46	suffStatGlobalGam _cup;
	47	suffStatGlobalGamPos _cdown;
	48	const Vdouble * _weights;
	49	Vdouble _posLike;
	50
	51	};
	52
	53	#endif

+51

-0

libs/phylogeny/bblEMProportional.h less more

	0	// $Id: bblEMProportional.h 9304 2011-02-20 16:53:19Z rubi $
	1	#ifndef ___BBL_EM_PROPORTIONAL_H
	2	#define ___BBL_EM_PROPORTIONAL_H
	3
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "stochasticProcess.h"
	7	#include "sequenceContainer.h"
	8
	9	#include <vector>
	10	using namespace std;
	11
	12
	13	class bblEMProportional {
	14	public:
	15	explicit bblEMProportional(tree& et,
	16	const vector<sequenceContainer>& sc,
	17	const vector<stochasticProcess>& sp,
	18	const vector<Vdouble > weights = NULL,
	19	const int maxIterations=50,
	20	const MDOUBLE epsilon=0.05,
	21	const MDOUBLE tollForPairwiseDist=0.0001);
	22	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	23
	24
	25	private:
	26	MDOUBLE compute_bblEMProp(const int maxIterations,const MDOUBLE epsilon,const MDOUBLE tollForPairwiseDist);
	27	void allocatePlaceProp();
	28	void computeUpProp();
	29	void bblEM_itProp(const MDOUBLE tollForPairwiseDist);
	30	void computeDownProp(const int gene, const int pos);
	31	void addCountsProp(const int gene, const int pos);
	32	void addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
	33	void optimizeBranchesProp(const MDOUBLE tollForPairwiseDist);
	34
	35	MDOUBLE _treeLikelihood;
	36	tree& _et;
	37	const vector<sequenceContainer>& _sc;
	38	const vector<stochasticProcess>& _sp;
	39	const vector<Vdouble > _weights;
	40	int _numberOfGenes;
	41	vector< vector<countTableComponentGam> > _computeCountsV; // for each gene, for each node - a table of ratealphalph
	42	vector<suffStatGlobalGam> _cup;
	43	vector<suffStatGlobalGamPos> _cdown;
	44	vector<computePijGam> _pij;
	45	VVdoubleRep _posLike;
	46
	47
	48	};
	49
	50	#endif

+225

-0

libs/phylogeny/bblEMProportionalEB.cpp less more

	0	// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "bblEM.h"
	2	#include "bblEMProportionalEB.h"
	3	#include "likelihoodComputation.h"
	4	using namespace likelihoodComputation;
	5	#include "computeUpAlg.h"
	6	#include "computeDownAlg.h"
	7	#include "computeCounts.h"
	8	#include "treeIt.h"
	9	#include "fromCountTableComponentToDistance.h"
	10	#include <ctime>//#define VERBOS
	11	#include "fromCountTableComponentToDistancePropEB.h"
	12
	13	bblEMProportionalEB::bblEMProportionalEB(tree& et,
	14	const vector<sequenceContainer>& sc,
	15	multipleStochasticProcess* msp,
	16	const gammaDistribution* pProportionDist,
	17	const bool optimizeSelectedBranches,
	18	const vector<Vdouble > weights,
	19	const int maxIterations,
	20	const MDOUBLE epsilon,
	21	const MDOUBLE tollForPairwiseDist,
	22	const MDOUBLE* likelihoodLast):
	23
	24	_et(et),_sc(sc),_msp(msp),_pProportionDist(pProportionDist),_weights (weights),_optimizeSelectedBranches(optimizeSelectedBranches) {
	25	_numberOfGenes = _sc.size();
	26	assert(_msp->getSPVecSize() == _sc.size());
	27	_treeLikelihoodVec = compute_bblEMPropEB(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast);
	28	}
	29
	30	Vdouble bblEMProportionalEB::compute_bblEMPropEB(
	31	const int maxIterations,
	32	const MDOUBLE epsilon,
	33	const MDOUBLE tollForPairwiseDist,
	34	const MDOUBLE* likelihoodLast){
	35	LOGnOUT(5,<<"Allocating place"<<endl);
	36	allocatePlacePropEB();
	37	LOGnOUT(5,<<"Done Allocating place"<<endl);
	38	Vdouble oldLvec(_numberOfGenes,VERYSMALL);
	39	Vdouble currLvec;
	40	currLvec.resize(_numberOfGenes);
	41	tree oldT = _et;
	42	//doubleRep epsilonDR(epsilon);//DR
	43	for (int i=0; i < maxIterations; ++i) {
	44	LOGnOUT(5,<<"Calling computeUpPropEB on iteration "<<i<<endl);
	45	computeUpPropEB();
	46	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	47	currLvec[geneN] = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],_msp->getSp(geneN),_cup[geneN],_pProportionDist,_posLike[geneN],(_weights?(_weights)[geneN]:NULL));
	48	}
	49	LOGnOUT(5,<<"--- Iter="<<i<<" logL="<<sumVdouble(currLvec)<<endl);
	50	if(sumVdouble(oldLvec)<=sumVdouble(currLvec)){ // make sure not to use tree with lower likelihood then last computed likelihood (before BBL-EM)
	51	LOGnOUT(4,<<"Likelihood improved. oldL = "<<sumVdouble(oldLvec)<<" newL = "<<sumVdouble(currLvec)<<endl);
	52	if(likelihoodLast){
	53	if(*likelihoodLast<=sumVdouble(currLvec))
	54	oldT = _et; // L didn't go down
	55	else
	56	LOGnOUT(4,<<"Likelihood went down compared pre-BBL oldL="<<*likelihoodLast<<" newL="<<sumVdouble(currLvec)<<" Do not update tree"<<endl);
	57	}
	58	else{
	59	oldT = _et; // L didn't go down
	60	LOGnOUT(7,<<"Tree Updated"<<endl);
	61	}
	62	}
	63	else
	64	LOGnOUT(4,<<"Likelihood did not improve. oldL="<<sumVdouble(oldLvec)<<" newL="<<sumVdouble(currLvec)<<" Do not update tree"<<endl);
	65
	66	if (sumVdouble(currLvec) < sumVdouble(oldLvec) + epsilon) { // need to break
	67	if (sumVdouble(currLvec)<sumVdouble(oldLvec)) {
	68	_et = oldT; //return to older tree
	69	LOGnOUT(4,<<"Finished bblEMPropEB. Likelihood ="<<sumVdouble(oldLvec)<<endl);
	70	return oldLvec; // keep the old tree, and old likelihood
	71	} else {
	72	//update the tree and likelihood and return
	73	LOGnOUT(4,<<"Finished bblEMPropEB. Likelihood ="<<sumVdouble(currLvec)<<endl);
	74	return currLvec;
	75	}
	76	}
	77	bblEM_itPropEB(tollForPairwiseDist);
	78	oldLvec = currLvec;
	79	}
	80	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	81	computeUpPropEB();
	82	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	83	currLvec[geneN] = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],_msp->getSp(geneN),_cup[geneN],_pProportionDist,_posLike[geneN],(_weights?(_weights)[geneN]:NULL));
	84	}
	85	if (sumVdouble(currLvec)<sumVdouble(oldLvec)) {
	86	_et = oldT;
	87	LOGnOUT(4,<<"Finished bblEMPropEB max iter. Likelihood ="<<sumVdouble(oldLvec)<<endl);
	88	return oldLvec; // keep the old tree, and old likelihood
	89	}
	90	else{
	91	LOGnOUT(4,<<"Finished bblEMPropEB max iter. Likelihood ="<<sumVdouble(currLvec)<<endl);
	92	return currLvec;
	93	}
	94	}
	95
	96	void bblEMProportionalEB::allocatePlacePropEB() {
	97	_computeCountsV.resize(_numberOfGenes);
	98	_cup.resize(_numberOfGenes);
	99	_cdown.resize(_numberOfGenes);
	100	_pij.resize(_numberOfGenes);
	101	_posLike.resize(_numberOfGenes);
	102	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	103	_posLike[geneN].resize(_sc[geneN].seqLen());
	104	for(int pos = 0;pos < _sc[geneN].seqLen();++pos){
	105	_posLike[geneN][pos].resize(_pProportionDist->categories(),0.0);
	106	}
	107	stochasticProcess * sp = _msp->getSp(geneN);
	108	_pij[geneN].resize(_pProportionDist->categories());
	109	_computeCountsV[geneN].resize(_et.getNodesNum()); //initiateTablesOfCounts
	110	for (int i=0; i < _computeCountsV[geneN].size(); ++i) {
	111	_computeCountsV[geneN][i].countTableComponentAllocatePlace(sp->alphabetSize(),_pProportionDist->categories(),sp->categories());
	112	}
	113	_cup[geneN].allocatePlace(_sc[geneN].seqLen(),_pProportionDist->categories(),sp->categories(),_et.getNodesNum(), _sc[geneN].alphabetSize());
	114	_cdown[geneN].allocatePlace(_pProportionDist->categories(),sp->categories(),_et.getNodesNum(), _sc[geneN].alphabetSize());
	115	}
	116	}
	117
	118	void bblEMProportionalEB::computeUpPropEB(){
	119	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	120	for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){
	121	_msp->getSp(geneN)->setGlobalRate(_pProportionDist->rates(globalRateCategor));
	122	_pij[geneN][globalRateCategor].fillPij(_et,*_msp->getSp(geneN),0); // 0 is becaues we compute Pij(t) and not its derivations...
	123	computeUpAlg cupAlg;
	124	for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
	125	for (int localRateCategor = 0; localRateCategor < _msp->getSp(geneN)->categories(); ++localRateCategor) {
	126	cupAlg.fillComputeUp(_et,_sc[geneN],pos,_pij[geneN][globalRateCategor][localRateCategor],_cup[geneN][pos][globalRateCategor][localRateCategor]);
	127	}
	128	}
	129	}
	130	}
	131	}
	132
	133	void bblEMProportionalEB::bblEM_itPropEB(const MDOUBLE tollForPairwiseDist){
	134	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	135	for (int treeNode=0; treeNode < _computeCountsV[geneN].size(); ++treeNode) {
	136	_computeCountsV[geneN][treeNode].zero();
	137	}
	138	for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
	139	computeDownPropEB(geneN,pos);
	140	addCountsPropEB(geneN,pos); // computes the counts and adds to the table.
	141	}
	142	}
	143	optimizeBranchesPropEB(tollForPairwiseDist);
	144	}
	145
	146	void bblEMProportionalEB::computeDownPropEB(const int gene, const int pos){
	147	computeDownAlg cdownAlg;
	148	stochasticProcess * sp = _msp->getSp(gene);
	149	for (int globalRateCategor = 0; globalRateCategor < _pProportionDist->categories(); ++globalRateCategor) {
	150	for (int localRateCategor = 0; localRateCategor < sp->categories(); ++localRateCategor) {
	151	//no need to set the global rate for each sp cause it was already performed in computeUpPropEB
	152	cdownAlg.fillComputeDown(_et,_sc[gene],pos,_pij[gene][globalRateCategor][localRateCategor],_cdown[gene][globalRateCategor][localRateCategor],_cup[gene][pos][globalRateCategor][localRateCategor]);
	153	}
	154	}
	155	}
	156
	157	void bblEMProportionalEB::addCountsPropEB(const int gene, const int pos){
	158	vector<MDOUBLE> * weightsOfGene = (_weights?(*_weights)[gene]:NULL);
	159	MDOUBLE weig = (weightsOfGene ? (*weightsOfGene)[pos] : 1.0);
	160	if (weig == 0) return;
	161	treeIterDownTopConst tIt(_et);
	162	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	163	if (!tIt->isRoot()) {
	164	addCountsPropEB(gene,pos,mynode,_posLike[gene][pos],weig);
	165	}
	166	}
	167	}
	168
	169	void bblEMProportionalEB::addCountsPropEB(const int gene,const int pos, tree::nodeP mynode, const VdoubleRep posProb, const MDOUBLE weig){
	170	computeCounts cc;
	171	stochasticProcess * sp = _msp->getSp(gene);
	172	for (int globalRateCategor =0; globalRateCategor< _pProportionDist->categories(); ++globalRateCategor) {
	173	for (int localRateCategor =0; localRateCategor < sp->categories(); ++localRateCategor) {
	174	//cc.computeCountsNodeFatherNodeSonHomPosProportionalEB(_sc[gene],
	175	// _pij[gene][globalRateCategor][localRateCategor],
	176	// *sp,
	177	// _cup[gene][pos][globalRateCategor][localRateCategor],
	178	// _cdown[gene][globalRateCategor][localRateCategor],
	179	// weig,
	180	// posProb,
	181	// mynode,
	182	// _computeCountsV[gene][mynode->id()][globalRateCategor][localRateCategor],
	183	// _pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor));
	184	cc.computeCountsNodeFatherNodeSonHomPosProportionalEB(_sc[gene],
	185	_pij[gene][globalRateCategor][localRateCategor],
	186	*sp,
	187	_cup[gene][pos][globalRateCategor][localRateCategor],
	188	_cdown[gene][globalRateCategor][localRateCategor],
	189	weig,
	190	posProb,
	191	mynode,
	192	_computeCountsV[gene][mynode->id()][globalRateCategor][localRateCategor]);
	193	}
	194	}
	195	}
	196
	197	/*
	198	//tal's old implementation, where i think there's a bug cause he sends _computeCountsV[mynode->id()] to the
	199	//fromCountTableComponentToDistanceProp constructor, but the first dimension of _computeCountsV is the genes and
	200	//the tree nodes is only the second dimension
	201	void bblEMProportionalEB::optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist){
	202	treeIterDownTopConst tIt(_et);
	203	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	204	if (!tIt->isRoot()) {
	205	fromCountTableComponentToDistanceProp from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father());
	206	from1.computeDistance();
	207	mynode->setDisToFather(from1.getDistance());
	208	}
	209	}
	210	}
	211	*/
	212
	213	void bblEMProportionalEB::optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist){
	214	treeIterDownTopConst tIt(_et);
	215	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	216	if (!tIt->isRoot()) {
	217	if((_optimizeSelectedBranches) && (tIt->getComment() != "1")) continue; //only selected branhes will be optimized
	218	fromCountTableComponentToDistancePropEB from1(_computeCountsV,mynode->id(),_msp,_pProportionDist,tollForPairwiseDist,mynode->dis2father());
	219	from1.computeDistance();
	220	mynode->setDisToFather(from1.getDistance());
	221	}
	222	}
	223	}
	224

+55

-0

libs/phylogeny/bblEMProportionalEB.h less more

	0	// $Id: bblEMProportional.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___BBL_EM_PROPORTIONALEB_H
	2	#define ___BBL_EM_PROPORTIONALEB_H
	3
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "stochasticProcess.h"
	7	#include "sequenceContainer.h"
	8	#include "multipleStochasticProcess.h"
	9
	10	#include <vector>
	11	using namespace std;
	12
	13
	14	class bblEMProportionalEB {
	15	public:
	16	explicit bblEMProportionalEB(tree& et,
	17	const vector<sequenceContainer>& sc,
	18	multipleStochasticProcess* msp,
	19	const gammaDistribution* pProportionDist,
	20	const bool optimizeSelectedBranches=false,
	21	const vector<Vdouble > weights = NULL,
	22	const int maxIterations=50,
	23	const MDOUBLE epsilon=0.05,
	24	const MDOUBLE tollForPairwiseDist=0.0001,
	25	const MDOUBLE* likelihoodLast=NULL);
	26	Vdouble getTreeLikelihood() const {return _treeLikelihoodVec;}
	27
	28	private:
	29	Vdouble compute_bblEMPropEB(const int maxIterations,const MDOUBLE epsilon,const MDOUBLE tollForPairwiseDist,const MDOUBLE* likelihoodLast=NULL);
	30	void allocatePlacePropEB();
	31	void computeUpPropEB();
	32	void bblEM_itPropEB(const MDOUBLE tollForPairwiseDist);
	33	void computeDownPropEB(const int gene, const int pos);
	34	void addCountsPropEB(const int gene, const int pos);
	35	void addCountsPropEB(const int gene,const int pos, tree::nodeP mynode, const VdoubleRep posProb, const MDOUBLE weig);
	36	void optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist);
	37
	38	Vdouble _treeLikelihoodVec;
	39	tree& _et;
	40	const vector<sequenceContainer>& _sc;
	41	multipleStochasticProcess* _msp;
	42	const gammaDistribution* _pProportionDist;
	43	const vector<Vdouble > _weights;
	44	int _numberOfGenes;
	45	vector< vector<countTableComponentGamProportional> > _computeCountsV; // for each gene, for each node - a table of globalRatelocalRatealph*alph - [globalRateCategory][localRateCategory][character]
	46	vector<suffStatGlobalGamProportional> _cup; //[gene][pos][globalRateCategory][localRateCategory][nodeID][character]
	47	vector<suffStatGlobalGamProportionalPos> _cdown; //[gene][globalRateCategory][localRateCategory][nodeID][character]
	48	vector< vector<computePijGam> > _pij;//[gene][globalRateCategory]
	49	VVVdoubleRep _posLike;//[gene][pos][globalRateCategory]
	50	const bool _optimizeSelectedBranches;
	51
	52	};
	53
	54	#endif

+142

-0

libs/phylogeny/bblEMProprtional.cpp less more

	0	// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "bblEM.h"
	2	#include "bblEMProportional.h"
	3	#include "likelihoodComputation.h"
	4	using namespace likelihoodComputation;
	5	#include "computeUpAlg.h"
	6	#include "computeDownAlg.h"
	7	#include "computeCounts.h"
	8	#include "treeIt.h"
	9	#include "fromCountTableComponentToDistance.h"
	10	#include <ctime>//#define VERBOS
	11	#include "fromCountTableComponentToDistanceProp.h"
	12
	13	bblEMProportional::bblEMProportional(tree& et,
	14	const vector<sequenceContainer>& sc,
	15	const vector<stochasticProcess>& sp,
	16	const vector<Vdouble > weights,
	17	const int maxIterations,
	18	const MDOUBLE epsilon,
	19	const MDOUBLE tollForPairwiseDist):
	20
	21	_et(et),_sc(sc),_sp(sp),_weights (weights) {
	22	_numberOfGenes = _sc.size();
	23	assert(_sp.size() == _sc.size());
	24	_treeLikelihood = compute_bblEMProp(maxIterations,epsilon,tollForPairwiseDist);
	25	}
	26
	27	MDOUBLE bblEMProportional::compute_bblEMProp(
	28	const int maxIterations,
	29	const MDOUBLE epsilon,
	30	const MDOUBLE tollForPairwiseDist){
	31	allocatePlaceProp();
	32	MDOUBLE oldL=VERYSMALL;
	33	MDOUBLE currL = VERYSMALL;
	34	for (int i=0; i < maxIterations; ++i) {
	35	computeUpProp();
	36	currL = 0;
	37	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	38	currL += likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],_sp[geneN],_cup[geneN],_posLike[geneN],(_weights?(*_weights)[geneN]:NULL));
	39	}
	40	tree oldT = _et;
	41	if (currL < oldL + epsilon) { // need to break
	42	if (currL<oldL) {
	43	_et = oldT;
	44	return oldL; // keep the old tree, and old likelihood
	45	} else {
	46	//update the tree and likelihood and return
	47	return currL;
	48	}
	49	}
	50	bblEM_itProp(tollForPairwiseDist);
	51	oldL = currL;
	52	}
	53	return currL;
	54	}
	55
	56	void bblEMProportional::allocatePlaceProp() {
	57	_computeCountsV.resize(_numberOfGenes);
	58	_cup.resize(_numberOfGenes);
	59	_cdown.resize(_numberOfGenes);
	60	_pij.resize(_numberOfGenes);
	61	_posLike.resize(_numberOfGenes);
	62	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	63	_computeCountsV[geneN].resize(_et.getNodesNum()); //initiateTablesOfCounts
	64	for (int i=0; i < _computeCountsV[geneN].size(); ++i) {
	65	_computeCountsV[geneN][i].countTableComponentAllocatePlace(_sp[geneN].alphabetSize(),_sp[geneN].categories());
	66	}
	67	_cup[geneN].allocatePlace(_sc[geneN].seqLen(),_sp[geneN].categories(), _et.getNodesNum(), _sc[geneN].alphabetSize());
	68	_cdown[geneN].allocatePlace(_sp[geneN].categories(), _et.getNodesNum(), _sc[geneN].alphabetSize());
	69	}
	70	}
	71
	72	void bblEMProportional::computeUpProp(){
	73	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	74	_pij[geneN].fillPij(_et,_sp[geneN],0); // 0 is becaues we compute Pij(t) and not its derivations...
	75	computeUpAlg cupAlg;
	76	for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
	77	for (int categor = 0; categor < _sp[geneN].categories(); ++categor) {
	78	cupAlg.fillComputeUp(_et,_sc[geneN],pos,_pij[geneN][categor],_cup[geneN][pos][categor]);
	79	}
	80	}
	81	}
	82	}
	83
	84	void bblEMProportional::bblEM_itProp(const MDOUBLE tollForPairwiseDist){
	85	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
	86	for (int i=0; i < _computeCountsV.size(); ++i) {
	87	_computeCountsV[geneN][i].zero();
	88	}
	89	for (int i=0; i < _sc[geneN].seqLen(); ++i) {
	90	computeDownProp(geneN,i);
	91	addCountsProp(geneN,i); // computes the counts and adds to the table.
	92	}
	93	}
	94	optimizeBranchesProp(tollForPairwiseDist);
	95	}
	96
	97	void bblEMProportional::computeDownProp(const int gene, const int pos){
	98	computeDownAlg cdownAlg;
	99	for (int categor = 0; categor < _sp[gene].categories(); ++categor) {
	100	cdownAlg.fillComputeDown(_et,_sc[gene],pos,_pij[gene][categor],_cdown[gene][categor],_cup[gene][pos][categor]);
	101	}
	102	}
	103
	104	void bblEMProportional::addCountsProp(const int gene, const int pos){
	105	vector<MDOUBLE> * weightsOfGene = (_weights?(*_weights)[gene]:NULL);
	106	MDOUBLE weig = (weightsOfGene ? (*weightsOfGene)[pos] : 1.0);
	107	if (weig == 0) return;
	108	treeIterDownTopConst tIt(_et);
	109	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	110	if (!tIt->isRoot()) {
	111	addCountsProp(gene,pos,mynode,_posLike[gene][pos],weig);
	112	}
	113	}
	114	}
	115
	116	void bblEMProportional::addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
	117	computeCounts cc;
	118	for (int categor =0; categor< _sp[gene].categories(); ++ categor) {
	119	cc.computeCountsNodeFatherNodeSonHomPos(_sc[gene],
	120	_pij[gene][categor],
	121	_sp[gene],
	122	_cup[gene][pos][categor],
	123	_cdown[gene][categor],
	124	weig,
	125	posProb,
	126	mynode,
	127	_computeCountsV[gene][mynode->id()][categor],
	128	_sp[gene].ratesProb(categor));
	129	}
	130	}
	131
	132	void bblEMProportional::optimizeBranchesProp(const MDOUBLE tollForPairwiseDist){
	133	treeIterDownTopConst tIt(_et);
	134	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	135	if (!tIt->isRoot()) {
	136	fromCountTableComponentToDistanceProp from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father());
	137	from1.computeDistance();
	138	mynode->setDisToFather(from1.getDistance());
	139	}
	140	}
	141	}

+28

-0

libs/phylogeny/bblEMSeperate.cpp less more

	0	// $Id: bblEMSeperate.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "bblEM.h"
	2	#include "bblEMSeperate.h"
	3	#include "logFile.h"
	4	//#define VERBOS
	5
	6
	7	bblEMSeperate::bblEMSeperate(vector<tree>& et,
	8	const vector<sequenceContainer>& sc,
	9	const vector<stochasticProcess> &sp,
	10	const vector<Vdouble > weights,
	11	const int maxIterations,
	12	const MDOUBLE epsilon,
	13	const MDOUBLE tollForPairwiseDist) {
	14	MDOUBLE newL =0;
	15	for (int i=0; i < et.size(); ++i) {
	16	#ifdef VERBOS
	17	LOG(5,<<" OPTIMIZING GENE "<<i<<" ... "<<endl);
	18	#endif
	19	bblEM bblEM1(et[i],sc[i],sp[i],(weights?(*weights)[i]:NULL),maxIterations,epsilon);
	20	MDOUBLE resTmp = bblEM1.getTreeLikelihood();
	21	#ifdef VERBOS
	22	LOG(5,<<" GENE "<<i<<" LOG-L = "<< resTmp<<endl);
	23	#endif
	24	newL += resTmp;
	25	}
	26	_treeLikelihood = newL;
	27	}

+30

-0

libs/phylogeny/bblEMSeperate.h less more

	0	// $Id: bblEMSeperate.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___BBL_EM_SEPERATE_H
	2	#define ___BBL_EM_SEPERATE_H
	3
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "stochasticProcess.h"
	7	#include "sequenceContainer.h"
	8
	9	#include <vector>
	10	using namespace std;
	11
	12
	13	class bblEMSeperate {
	14	public:
	15	explicit bblEMSeperate(vector<tree>& et,
	16	const vector<sequenceContainer>& sc,
	17	const vector<stochasticProcess> &sp,
	18	const vector<Vdouble > weights,
	19	const int maxIterations=50,
	20	const MDOUBLE epsilon=0.05,
	21	const MDOUBLE tollForPairwiseDist=0.0001);
	22	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	23
	24	private:
	25	MDOUBLE _treeLikelihood;
	26
	27	};
	28
	29	#endif

+236

-0

libs/phylogeny/bblEMfixRoot.cpp less more

	0	// $Id: bblEM.cpp 4478 2008-07-17 17:09:55Z cohenofi $
	1	#include "bblEMfixRoot.h"
	2	#include "likelihoodComputation.h"
	3	using namespace likelihoodComputation;
	4	#include "computeUpAlg.h"
	5	#include "computeDownAlg.h"
	6	#include "computeCounts.h"
	7	#include "treeIt.h"
	8	#include "fromCountTableComponentToDistancefixRoot.h"
	9	#include <ctime>
	10
	11	bblEMfixRoot::bblEMfixRoot(tree& et,
	12	const sequenceContainer& sc,
	13	const stochasticProcess& sp,
	14	const Vdouble * weights,
	15	const int maxIterations,
	16	const MDOUBLE epsilon,
	17	const MDOUBLE tollForPairwiseDist,
	18	unObservableData* unObservableData_p,
	19	const MDOUBLE* likelihoodLast) :
	20	_et(et),_sc(sc),_sp(sp),_weights (weights),_unObservableData_p(unObservableData_p)
	21	{
	22	//if(!plogLforMissingData){
	23	// _plogLforMissingData = NULL;
	24	//}
	25	_treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast);
	26	}
	27
	28	/********************************************************************************************
	29	*********************************************************************************************/
	30	MDOUBLE bblEMfixRoot::compute_bblEM(
	31	const int maxIterations,
	32	const MDOUBLE epsilon,
	33	const MDOUBLE tollForPairwiseDist,
	34	const MDOUBLE* likelihoodLast){
	35	allocatePlace();
	36	MDOUBLE oldL=VERYSMALL;
	37	MDOUBLE currL = VERYSMALL;
	38	tree oldT = _et;
	39	for (int i=0; i < maxIterations; ++i) {
	40	//if(_unObservableData_p)
	41	// _unObservableData_p->setLforMissingData(_et,&_sp);
	42
	43	computeUp();
	44	currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
	45	LOGnOUT(4,<<"--- Iter="<<i<<" logL="<<currL<<endl);
	46	//if(_unObservableData_p){
	47	//if(!likelihoodLast)
	48	// LOGnOUT(4,<<" WARNING!!! likelihoodLast was not sent to bblEM with unObservableData prog"<<endl);
	49	if(oldL<=currL){ // make sure not to use tree with lower likelihood then last computed likelihood (before BBL-EM)
	50	if(likelihoodLast){
	51	if(*likelihoodLast<=currL)
	52	oldT = _et; // L didn't go down
	53	else
	54	LOGnOUT(4,<<"Likelihood went down compared pre-BBL oldL="<<*likelihoodLast<<" newL="<<currL<<" Do not update tree"<<endl);
	55	}
	56	else{
	57	oldT = _et; // L didn't go down
	58	LOGnOUT(7,<<"LikelihoodLast was not sent to bblEM"<<endl);
	59	}
	60	}
	61	else
	62	LOGnOUT(4,<<"Likelihood went down oldL="<<oldL<<" newL="<<currL<<" Do not update tree"<<endl);
	63	//}
	64	//else
	65	//oldT = _et; // all application that don't need to correct for unObservableData, update tree
	66
	67
	68	if (currL < oldL + epsilon) { // need to break
	69	if (currL<=oldL) {
	70	_et = oldT;
	71	if(_unObservableData_p)
	72	_unObservableData_p->setLforMissingData(_et,&_sp);
	73	return oldL; // keep the old tree, and old likelihood
	74	} else {
	75	//update the tree and likelihood and return
	76	return currL;
	77	}
	78	}
	79	bblEM_it(tollForPairwiseDist);
	80	oldL = currL;
	81	}
	82	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	83	computeUp();
	84	if(_unObservableData_p)
	85	_unObservableData_p->setLforMissingData(_et,&_sp);
	86	currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights, _unObservableData_p);
	87
	88	if (currL<=oldL)
	89	{
	90	_et = oldT;
	91	if(_unObservableData_p)
	92	_unObservableData_p->setLforMissingData(_et,&_sp);
	93	return oldL; // keep the old tree, and old likelihood
	94	}
	95	else
	96	return currL;
	97	}
	98
	99	/********************************************************************************************
	100	*********************************************************************************************/
	101	void bblEMfixRoot::allocatePlace() {
	102
	103	_computeCountsV.resize(_et.getNodesNum());//initiateTablesOfCounts
	104	for (int node=0; node < _computeCountsV.size(); ++node) {
	105	{
	106	_computeCountsV[node].resize(_sp.alphabetSize());
	107	for (int letterAtRoot = 0; letterAtRoot < _computeCountsV[node].size(); ++letterAtRoot)
	108	_computeCountsV[node][letterAtRoot].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories()); //_computeCountsV[node][letterAtRoot][rate][alph][alph]
	109	//_computeCountsV[i][letterAtRoot].zero(); // removed, a BUG, done later
	110	}
	111	}
	112
	113	_cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
	114	_cdown.resize(_sp.categories());
	115	for (int categor = 0; categor < _sp.categories(); ++categor)
	116	{
	117	// stay with the convention of fillComputeDownNonReversible where the first index is for rate cat and the second is for letterAtRoot
	118	_cdown[categor].allocatePlace(_sp.alphabetSize(), _et.getNodesNum(), _sc.alphabetSize()); //_cdown[categ][letter@root][nodeid][letter][prob]
	119	}
	120	}
	121
	122	/********************************************************************************************
	123	*********************************************************************************************/
	124	void bblEMfixRoot::bblEM_it(const MDOUBLE tollForPairwiseDist){
	125	string costTable = "costTableBBLEMit.txt"; //DEBUG
	126	ofstream costTableStream(costTable.c_str()); //DEBUG
	127	//cout<<"before zero\n";
	128	for (int node=0; node < _computeCountsV.size(); ++node) {
	129	for (int letAtRoot=0; letAtRoot < _computeCountsV[node].size(); ++letAtRoot) {
	130	_computeCountsV[node][letAtRoot].zero();
	131	_computeCountsV[node][letAtRoot].printTable(costTableStream); //DEBUG
	132	}
	133	}
	134	//cout<<"after zero\n";
	135
	136	for (int i=0; i < _sc.seqLen(); ++i) {
	137	computeDown(i);
	138	addCounts(i); // computes the counts and adds to the table.
	139	}
	140	//cout<<"after add counts\n";
	141	for (int node=0; node < _computeCountsV.size(); ++node) {
	142	for (int letAtRoot=0; letAtRoot < _computeCountsV[node].size(); ++letAtRoot) {
	143	_computeCountsV[node][letAtRoot].printTable(costTableStream); //DEBUG
	144	}
	145	}
	146
	147
	148	optimizeBranches(tollForPairwiseDist);
	149	if(_unObservableData_p)
	150	_unObservableData_p->setLforMissingData(_et,&_sp);
	151	}
	152
	153	/********************************************************************************************
	154	*********************************************************************************************/
	155	void bblEMfixRoot::optimizeBranches(const MDOUBLE tollForPairwiseDist){
	156	treeIterDownTopConst tIt(_et);
	157	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	158	if (!tIt->isRoot()) {
	159	fromCountTableComponentToDistancefixRoot from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p);
	160	from1.computeDistance();
	161	mynode->setDisToFather(from1.getDistance());
	162
	163	if(false){ //DEBUG
	164	if(_unObservableData_p)
	165	_unObservableData_p->setLforMissingData(_et,&_sp);
	166	computeUp();
	167	MDOUBLE bL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights, _unObservableData_p);
	168	LOG(6,<<" node "<<mynode->name()<<" L= "<<bL<<endl);
	169	}
	170	}
	171	}
	172	}
	173
	174	/********************************************************************************************
	175	*********************************************************************************************/
	176	void bblEMfixRoot::computeUp(){
	177	_pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
	178	computeUpAlg cupAlg;
	179	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	180	for (int categor = 0; categor < _sp.categories(); ++categor) {
	181	cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
	182	}
	183	}
	184	}
	185
	186	/********************************************************************************************
	187	*********************************************************************************************/
	188	void bblEMfixRoot::computeDown(const int pos){
	189	computeDownAlg cdownAlg;
	190	for (int categor = 0; categor < _sp.categories(); ++categor) {
	191	cdownAlg.fillComputeDownNonReversible(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
	192	}
	193	}
	194
	195	/********************************************************************************************
	196	*********************************************************************************************/
	197	void bblEMfixRoot::addCounts(const int pos){
	198	//MDOUBLE posProb =
	199	// likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
	200
	201	MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
	202	if (weig == 0) return;
	203	treeIterDownTopConst tIt(_et);
	204	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	205	if (!tIt->isRoot()) {
	206	addCountsFixedRoot(pos,mynode,_posLike[pos],weig);
	207	}
	208	}
	209	}
	210
	211	/********************************************************************************************
	212	*********************************************************************************************/
	213	// fill _computeCountsV: specific node, letterAtRoot and categor at a time
	214	void bblEMfixRoot::addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
	215
	216	computeCounts cc;
	217	for(int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); letterAtRoot++)
	218	{
	219	for (int categor =0; categor< _sp.categories(); ++ categor)
	220	{
	221	cc.computeCountsNodeFatherNodeSonHomPos(_sc,
	222	_pij[categor],
	223	_sp,
	224	_cup[pos][categor],
	225	_cdown[categor][letterAtRoot],
	226	weig,
	227	posProb,
	228	mynode,
	229	_computeCountsV[mynode->id()][letterAtRoot][categor],
	230	_sp.ratesProb(categor),
	231	letterAtRoot); // letterInFather is used in freq? or already by _cdown?
	232	}
	233	}
	234	}
	235

+89

-0

libs/phylogeny/bblEMfixRoot.h less more

	0	// $Id: bblEM.h 4478 2008-07-17 17:09:55Z cohenofi $
	1	#ifndef ___BBL_EM_GL__FIXED_ROOT
	2	#define ___BBL_EM_GL__FIXED_ROOT
	3
	4	/********************************************************************************************
	5	Class::bblEM (with variation: bblEMfixRoot, bblEM2codon)
	6	compute_bblEM
	7	allocatePlace (one more level for fixRoot - in computeDownAlg and countsTableVec)
	8	bblEM_it (called at each iteration of BBL)
	9	foreach pos{
	10	computeDown (use variants for fix root - fillComputeDownNonReversible
	11	vector<suffStatGlobalGamPos> _cdown; //_cdown[categ][letter@root][nodeid][letter][prob])
	12	addCounts
	13	addCountsFixedRoot (based on computeUp and computeDown... fill _computeCountsV)
	14	use class::computeCounts (but no duplicated class!!!)
	15	}
	16	optimizeBranches
	17	foreach node{
	18	class::fromCountTableComponentToDistance (with variation: ...fixRoot, ...2Codon)
	19	computeDistance() + set - based on
	20	class::likeDist (with variation: ...fixRoot, ...2Codon)
	21	giveDistance()
	22	giveDistanceBrent()
	23	C_evallikeDist and C_evallikeDist_d
	24	.... computation based on counts{alph1,alph2, root, rate(sp)}: sumL+= _ctc.getCounts(alph1,alph2,rateCategor)(log( _sp.Pij_t(alph1,alph2,distrate) )-log(_sp.freq(alph2)))
	25
	26
	27	}
	28	*********************************************************************************************/
	29
	30
	31	#include "definitions.h"
	32	#include "tree.h"
	33	#include "stochasticProcess.h"
	34	#include "sequenceContainer.h"
	35	#include "countTableComponent.h"
	36	#include "computePijComponent.h"
	37	#include "suffStatComponent.h"
	38	#include "gainLossAlphabet.h"
	39	#include "unObservableData.h"
	40	#include <vector>
	41
	42	using namespace std;
	43
	44	class bblEMfixRoot {
	45	public:
	46	explicit bblEMfixRoot(tree& et,
	47	const sequenceContainer& sc,
	48	const stochasticProcess& sp,
	49	const Vdouble * weights = NULL,
	50	const int maxIterations=50,
	51	const MDOUBLE epsilon=0.05,
	52	const MDOUBLE tollForPairwiseDist=0.001,
	53	unObservableData* _unObservableData_p=NULL,
	54	const MDOUBLE* likelihoodLast=NULL);
	55	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	56
	57	private:
	58	MDOUBLE compute_bblEM(const int maxIterations,
	59	const MDOUBLE epsilon,
	60	const MDOUBLE tollForPairwiseDist,
	61	const MDOUBLE* likelihoodLast=NULL);
	62	void bblEM_it(const MDOUBLE tollForPairwiseDist);
	63	void computeDown(const int pos);
	64	void computeUp();
	65	void addCounts(const int pos);
	66	void addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
	67
	68
	69	void optimizeBranches(const MDOUBLE tollForPairwiseDist);
	70	void allocatePlace();
	71
	72
	73
	74	MDOUBLE _treeLikelihood;
	75	tree& _et;
	76	const sequenceContainer& _sc;
	77	const stochasticProcess& _sp;
	78	vector< vector< countTableComponentGam > > _computeCountsV; // _computeCountsV [node] [letter@root] [rate][alph][alph]
	79	computePijGam _pij;
	80	suffStatGlobalGam _cup; //_cup[pos][categ][nodeid][letter][prob]
	81	//suffStatGlobalGamPos _cdown; // for each pos: computeDown(pos); addCounts(pos);
	82	vector<suffStatGlobalGamPos> _cdown; //_cdown[categ][letter@root][nodeid][letter][prob] - since fillComputeDownNonReversible uses this assumption
	83	const Vdouble * _weights;
	84	VdoubleRep _posLike;
	85	unObservableData* _unObservableData_p;
	86	};
	87
	88	#endif

+59

-0

libs/phylogeny/bblLSProportionalEB.cpp less more

	0	#include "bblLSProportionalEB.h"
	1	#include "numRec.h"
	2	#include "logFile.h"
	3	#include "errorMsg.h"
	4
	5
	6	bblLSProportionalEB::bblLSProportionalEB(tree& et, const vector<sequenceContainer>& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches, int maxIter, MDOUBLE epsilon)
	7	{
	8	_treeLikelihoodVec = optimizeBranches(et,sc,msp,pProportionDist,treeLikelihoodVec,optimizeSelectedBranches,maxIter,epsilon);
	9	}
	10
	11
	12	Vdouble bblLSProportionalEB::optimizeBranches(tree& et, const vector<sequenceContainer>& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& inTreeLikelihoodVec, const bool optimizeSelectedBranches, int maxIter, MDOUBLE epsilon)
	13	{
	14	Vdouble treeLikelihoodVec;
	15	if (inTreeLikelihoodVec.empty()){
	16	treeLikelihoodVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist);
	17	}
	18	else{
	19	treeLikelihoodVec = inTreeLikelihoodVec;
	20	}
	21	MDOUBLE treeLikelihood = sumVdouble(treeLikelihoodVec);
	22	LOGnOUT(5,<<"ll before bblLSr4sp"<<" logL="<<treeLikelihood<<endl);
	23	vector<tree::nodeP> nodesV;
	24	et.getAllNodes(nodesV,et.getRoot());
	25	MDOUBLE prevIterL = VERYSMALL;
	26	for (int iter = 0; iter < maxIter; ++iter)
	27	{
	28	if (treeLikelihood < prevIterL + epsilon){
	29	treeLikelihoodVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist);
	30	return treeLikelihoodVec; //likelihood converged
	31	}
	32	prevIterL = treeLikelihood;
	33	MDOUBLE paramFound;
	34	MDOUBLE oldBl;
	35	MDOUBLE newL;
	36	for (int i=0; i<nodesV.size(); i++)
	37	{
	38	if (nodesV[i]->isRoot()) continue;
	39	if((optimizeSelectedBranches) && (nodesV[i]->getComment() != "1")) continue; //only selected branhes will be optimized
	40	oldBl = nodesV[i]->dis2father();
	41	newL = -brent(0.0,oldBl,MAX_BRANCH_LENGTH,evalR4SPBranch(nodesV[i],et,sc,msp,pProportionDist),epsilon,&paramFound);
	42	LOGnOUT(4,<<"oldL="<<treeLikelihood<<" newL="<<newL<<" BL="<<nodesV[i]->dis2father()<<endl);
	43	if (newL >= treeLikelihood)
	44	{
	45	treeLikelihood = newL;
	46	nodesV[i]->setDisToFather(paramFound);
	47	}
	48	else //likelihood went down!
	49	{
	50	nodesV[i]->setDisToFather(oldBl); //return to previous BL
	51	LOGnOUT(4,<<"Likelihood went down. oldL="<<treeLikelihood<<" newL="<<newL<<" Do not update tree"<<endl);
	52	}
	53	}
	54	}
	55	treeLikelihoodVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist);
	56	return treeLikelihoodVec;
	57	}
	58

+54

-0

libs/phylogeny/bblLSProportionalEB.h less more

	0	#ifndef ___R4SP_BBL_LS
	1	#define ___R4SP_BBL_LS
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "sequenceContainer.h"
	7	#include "multipleStochasticProcess.h"
	8	#include "gammaDistribution.h"
	9	#include "likelihoodComputation.h"
	10	#include <vector>
	11	using namespace std;
	12
	13	#define MAX_BRANCH_LENGTH 10.0
	14
	15	/*
	16	This class optimize the branches using "naive" line search methodology.
	17	go over each branch and optimize it using brent.
	18	In one iteration it optimze seperatly all branches.
	19	This procedure continues until convergence is reached or until the maximum number of iteration is reached.
	20	*/
	21	class bblLSProportionalEB {
	22	public:
	23
	24	explicit bblLSProportionalEB(tree& et, const vector<sequenceContainer>& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches=false, int maxIter=50, MDOUBLE epsilon=0.05);
	25	~bblLSProportionalEB() {};
	26	Vdouble getTreeLikelihoodVec() const {return _treeLikelihoodVec;}
	27	private:
	28	Vdouble optimizeBranches(tree& et, const vector<sequenceContainer>& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches=false, int maxIter=50, MDOUBLE epsilon=0.05);
	29
	30	private:
	31	Vdouble _treeLikelihoodVec;
	32	};
	33
	34	class evalR4SPBranch{
	35	public:
	36	explicit evalR4SPBranch(tree::nodeP pNode, tree& et, const vector<sequenceContainer>& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist)
	37	:_pNode(pNode),_et(et), _sc(sc), _msp(msp), _pProportionDist(pProportionDist){};
	38	private:
	39	tree::nodeP _pNode;
	40	tree& _et;
	41	const vector<sequenceContainer>& _sc;
	42	multipleStochasticProcess* _msp;
	43	const gammaDistribution* _pProportionDist;
	44	public:
	45	MDOUBLE operator() (MDOUBLE bl) {
	46	_pNode->setDisToFather(bl);
	47	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,_sc,_msp,_pProportionDist);
	48	MDOUBLE res = sumVdouble(likeVec);
	49	return -res;
	50	}
	51	};
	52
	53	#endif

+444

-0

libs/phylogeny/bestAlpha.cpp less more

	0	// $Id: bestAlpha.cpp 10046 2011-12-09 15:35:00Z rubi $
	1
	2	#include <iostream>
	3	using namespace std;
	4
	5	#include "bestAlpha.h"
	6	#include "bblEM.h"
	7	#include "bblEMProportionalEB.h"
	8	#include "bblLSProportionalEB.h"
	9	#include "numRec.h"
	10	#include "logFile.h"
	11	#include "errorMsg.h"
	12
	13	#ifndef VERBOS
	14	#define VERBOS
	15	#endif
	16	//void bestAlpha::checkAllocation() {
	17	// if (_pi->stocProcessFromLabel(0)->getPijAccelerator() == NULL) {
	18	// errorMsg::reportError(" error in function findBestAlpha");
	19	// }
	20	//}
	21	//
	22	// @@@@ The method works with oldL,oldA,bestA and newL,newA.
	23	// Only when it's about to end, the members _bestAlpha and _bestL are filled.
	24
	25	bestAlphaAndBBL::bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL
	26	const sequenceContainer& sc,
	27	stochasticProcess& sp,
	28	const Vdouble * weights,
	29	const MDOUBLE initAlpha,
	30	const MDOUBLE upperBoundOnAlpha,
	31	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
	32	const MDOUBLE epsilonLoglikelihoodForBBL,
	33	const int maxBBLIterations,
	34	const int maxTotalIterations){
	35	// LOG(5,<<"find Best Alpha and best BBL"<<endl);
	36	// LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	37	// brLenOpt br1(et,pi,weights);
	38
	39	MDOUBLE oldL = VERYSMALL;
	40	MDOUBLE newL = VERYSMALL;
	41	const MDOUBLE bx=initAlpha;
	42	const MDOUBLE ax=0;
	43	const MDOUBLE cx=upperBoundOnAlpha;
	44	//
	45	MDOUBLE bestA=0;
	46	MDOUBLE oldA=0;
	47	int i=0;
	48	for (i=0; i < maxTotalIterations; ++i) {
	49	newL = -brent(ax,bx,cx,
	50	C_evalAlpha(et,sc,sp,weights),
	51	epsilonLoglikelihoodForAlphaOptimization,
	52	&bestA);
	53
	54	#ifdef VERBOS
	55	LOG(5,<<"# bestAlphaAndBBL::bestAlphaAndBBL iteration " << i <<endl
	56	<<"# old L = " << oldL << "\t"
	57	<<"# new L = " << newL << endl
	58	<<"# new Alpha = " << bestA << endl);
	59	#endif
	60	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	61	oldL = newL;
	62	oldA = bestA;
	63	} else {
	64	oldL = newL;
	65	oldA = bestA;
	66
	67
	68	_bestL = oldL;
	69	_bestAlpha= oldA;
	70	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	71	break;
	72	}
	73
	74	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	75	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	76	newL =bblEM1.getTreeLikelihood();
	77	#ifdef VERBOS
	78	LOG(5,<<"# bestAlphaAndBBL::bestAlphaAndBBL iteration " << i <<endl
	79	<<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
	80	<<"# The tree:" );
	81	LOGDO(5,et.output(myLog::LogFile()));
	82	#endif
	83
	84	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	85	oldL = newL;
	86	}
	87	else {
	88	oldL=newL;
	89	_bestL = oldL;
	90	_bestAlpha= oldA;
	91	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	92	break;
	93	}
	94	}
	95	if (i==maxTotalIterations) {
	96	_bestL = newL;
	97	_bestAlpha= bestA;
	98	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	99	}
	100	}
	101
	102	bestAlphasAndBBLProportional::bestAlphasAndBBLProportional(tree& et, //find Best Alphas (per gene - local and proportional factors - global) and best BBL
	103	vector<sequenceContainer>& sc,
	104	multipleStochasticProcess* msp,
	105	gammaDistribution* pProportionDist,
	106	Vdouble initLocalRateAlphas,
	107	const MDOUBLE upperBoundOnLocalRateAlpha,
	108	const MDOUBLE initGlobalRateAlpha,
	109	const MDOUBLE upperBoundOnGlobalRateAlpha,
	110	const int maxBBLIterations,
	111	const int maxTotalIterations,
	112	const bool optimizeSelectedBranches,
	113	const bool optimizeTree,
	114	const string branchLengthOptimizationMethod,
	115	const bool optimizeLocalAlpha,
	116	const bool optimizeGlobalAlpha,
	117	const Vdouble * weights,
	118	const MDOUBLE epsilonLoglikelihoodForLocalRateAlphaOptimization,
	119	const MDOUBLE epsilonLoglikelihoodForGlobalRateAlphaOptimization,
	120	const MDOUBLE epsilonLoglikelihoodForBBL){
	121	// LOG(5,<<"find Best Alpha and best BBL"<<endl);
	122	// LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	123	// brLenOpt br1(et,pi,weights);
	124
	125
	126	if(initLocalRateAlphas.size() != sc.size()){
	127	MDOUBLE val = initLocalRateAlphas[0];
	128	initLocalRateAlphas.resize(sc.size(),val);
	129	}
	130	int spIndex;
	131	_bestGlobalAlpha = initGlobalRateAlpha;
	132	pProportionDist->setAlpha(_bestGlobalAlpha);
	133	_bestLocalAlphaVec = initLocalRateAlphas;
	134	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	135	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	136	}
	137	//First compute the likelihood
	138	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	139	if((!optimizeTree) && (!optimizeLocalAlpha) && (!optimizeGlobalAlpha)) return;
	140	MDOUBLE currentGlobalAlpha;
	141	currentGlobalAlpha = initGlobalRateAlpha;
	142	Vdouble currentLocalAlphaVec;
	143	Vdouble newLvec;
	144	//doubleRep newL;//DR
	145	MDOUBLE newL;
	146	//doubleRep oldL(VERYSMALL);//DR
	147	MDOUBLE oldL = VERYSMALL;
	148	currentLocalAlphaVec = initLocalRateAlphas;
	149	newLvec.resize(msp->getSPVecSize());
	150	//doubleRep epsilonLoglikelihoodForGlobalRateAlphaOptimizationDR(epsilonLoglikelihoodForGlobalRateAlphaOptimization);//DR
	151	string alphas;
	152	//doubleRep minusOne(-1.0);//DR
	153	int i;
	154
	155	MDOUBLE a_localAlpha_x = 0.0;
	156	MDOUBLE c_localAlpha_x = upperBoundOnLocalRateAlpha;
	157	for(i=0; i < maxTotalIterations; ++i) {
	158	//Find best local alphas
	159	if(optimizeLocalAlpha){
	160	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	161	MDOUBLE b_localAlpha_x = _bestLocalAlphaVec[spIndex];
	162	newLvec[spIndex] = -brent(a_localAlpha_x,b_localAlpha_x,c_localAlpha_x,
	163	C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	164	epsilonLoglikelihoodForLocalRateAlphaOptimization,
	165	&currentLocalAlphaVec[spIndex]);
	166	if (newLvec[spIndex] >= _bestLvec[spIndex]) {
	167	_bestLvec[spIndex] = newLvec[spIndex];
	168	_bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex];
	169	}
	170	else
	171	{//likelihood went down!
	172	LOG(2,<<"likelihood went down in optimizing local alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	173	}
	174	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	175	}
	176	LOGnOUT(2,<<"Done with local alpha optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	177	LOGnOUT(2,<<"Local Alphas:");
	178	for(spIndex = 0;spIndex < _bestLocalAlphaVec.size();++spIndex){
	179	LOGnOUT(2,<<_bestLocalAlphaVec[spIndex]<<",";);
	180	}
	181	LOGnOUT(2,<<endl);
	182	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	183	}
	184	//Find best global alpha
	185	if(optimizeGlobalAlpha){
	186	//doubleRep b_globalAlpha_x(_bestGlobalAlpha);//DR
	187	//doubleRep a_globalAlpha_x(0.0);//DR
	188	//doubleRep c_globalAlpha_x(upperBoundOnGlobalRateAlpha);//DR
	189	MDOUBLE b_globalAlpha_x = _bestGlobalAlpha;
	190	MDOUBLE a_globalAlpha_x = 0.0;
	191	MDOUBLE c_globalAlpha_x = upperBoundOnGlobalRateAlpha;
	192
	193	//newL = minusOne*brentDoubleRep(a_globalAlpha_x,b_globalAlpha_x,c_globalAlpha_x,
	194	//C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	195	//epsilonLoglikelihoodForGlobalRateAlphaOptimizationDR,
	196	//&_bestGlobalAlpha);//DR
	197
	198	newL = -brent(a_globalAlpha_x,b_globalAlpha_x,c_globalAlpha_x,
	199	C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	200	epsilonLoglikelihoodForGlobalRateAlphaOptimization,
	201	&currentGlobalAlpha);
	202
	203	if (newL >= sumVdouble(_bestLvec)) { //converged
	204	_bestGlobalAlpha = currentGlobalAlpha;
	205	}
	206	else
	207	{//likelihood went down!
	208	LOG(2,<<"likelihood went down in optimizing global alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	209	}
	210	pProportionDist->setAlpha(_bestGlobalAlpha);
	211	//whether or not likelihood has improved we need to update _bestLvec
	212	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	213	LOGnOUT(2,<<"Done with global alpha optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	214	LOGnOUT(2,<<"Global Alpha:"<<_bestGlobalAlpha<<endl);
	215	}
	216
	217	if(optimizeTree){
	218	if(branchLengthOptimizationMethod == "bblLS"){
	219	bblLSProportionalEB bblLSPEB1(et,sc,msp,pProportionDist,_bestLvec,optimizeSelectedBranches,maxBBLIterations,epsilonLoglikelihoodForBBL);
	220	_bestLvec = bblLSPEB1.getTreeLikelihoodVec();
	221	LOGnOUT(2,<<"Done with bblLS"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	222	}
	223	else if(branchLengthOptimizationMethod == "bblEM"){
	224	bblEMProportionalEB bblEMPEB1(et,sc,msp,pProportionDist,optimizeSelectedBranches,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	225	_bestLvec = bblEMPEB1.getTreeLikelihood();
	226	LOGnOUT(2,<<"Done with bblEM"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	227	}
	228	LOGnOUT(2,<<et.stringTreeInPhylipTreeFormat()<<endl);
	229	}
	230	if (sumVdouble(_bestLvec) > oldL+epsilonLoglikelihoodForBBL) {
	231	//global and local alpha have already been updated individually
	232	oldL = sumVdouble(_bestLvec);
	233	}
	234	else {
	235	break;
	236	}
	237	LOGnOUT(2,<<"Done with optimization iteration "<<i<<". LL: "<<sumVdouble(_bestLvec)<<endl);
	238	}
	239	}
	240
	241	bestBetaAndBBL::bestBetaAndBBL(tree& et, //find Best Alpha and best BBL
	242	const sequenceContainer& sc,
	243	stochasticProcess& sp,
	244	const Vdouble * weights,
	245	const MDOUBLE initBeta,
	246	const MDOUBLE upperBoundOnBeta,
	247	const MDOUBLE epsilonLoglikelihoodForBetaOptimization,
	248	const MDOUBLE epsilonLoglikelihoodForBBL,
	249	const int maxBBLIterations,
	250	const int maxTotalIterations){
	251	// LOG(5,<<"find Best Beta and best BBL"<<endl);
	252	// LOG(5,<<" 1. bestBetaa::findBestBeta"<<endl);
	253	// brLenOpt br1(et,pi,weights);
	254
	255	MDOUBLE oldL = VERYSMALL;
	256	MDOUBLE newL = VERYSMALL;
	257	const MDOUBLE bx=initBeta;
	258	const MDOUBLE ax=0;
	259	const MDOUBLE cx=upperBoundOnBeta;
	260	//
	261	MDOUBLE bestB=0;
	262	MDOUBLE oldB=0;
	263	int i=0;
	264	for (i=0; i < maxTotalIterations; ++i) {
	265	newL = -brent(ax,bx,cx,
	266	C_evalBeta(et,sc,sp,weights),
	267	epsilonLoglikelihoodForBetaOptimization,
	268	&bestB);
	269
	270	#ifdef VERBOS
	271	LOG(5,<<"# bestBetaAndBBL::bestBetaAndBBL iteration " << i <<endl
	272	<<"# old L = " << oldL << "\t"
	273	<<"# new L = " << newL << endl
	274	<<"# new Beta = " << bestB << endl);
	275	#endif
	276	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	277	oldL = newL;
	278	oldB = bestB;
	279	} else {
	280	oldL = newL;
	281	oldB = bestB;
	282
	283
	284	_bestL = oldL;
	285	_bestBeta= oldB;
	286	(static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
	287	break;
	288	}
	289
	290	(static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
	291	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	292	newL =bblEM1.getTreeLikelihood();
	293	#ifdef VERBOS
	294	LOG(5,<<"# bestBetaAndBBL::bestBetaAndBBL iteration " << i <<endl
	295	<<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
	296	<<"# The tree:" );
	297	LOGDO(5,et.output(myLog::LogFile()));
	298	#endif
	299
	300	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	301	oldL = newL;
	302	}
	303	else {
	304	oldL=newL;
	305	_bestL = oldL;
	306	_bestBeta= oldB;
	307	(static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
	308	break;
	309	}
	310	}
	311	if (i==maxTotalIterations) {
	312	_bestL = newL;
	313	_bestBeta= bestB;
	314	(static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
	315	}
	316	}
	317
	318	bestAlphaFixedTree::bestAlphaFixedTree(const tree& et, //findBestAlphaFixedTree
	319	const sequenceContainer& sc,
	320	stochasticProcess& sp,
	321	const Vdouble * weights,
	322	const MDOUBLE upperBoundOnAlpha,
	323	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization){
	324	//LOG(5,<<"findBestAlphaFixedTree"<<endl);
	325	MDOUBLE bestA=0;
	326	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	327	const MDOUBLE bx=static_cast<gammaDistribution*>(sp.distr())->getAlpha();
	328	const MDOUBLE ax=0.0;
	329
	330
	331	_bestL = -brent(ax,bx,cx,
	332	C_evalAlpha(et,sc,sp,weights),
	333	epsilonLoglikelihoodForAlphaOptimization,
	334	&bestA);
	335	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	336	_bestAlpha= bestA;
	337	}
	338
	339
	340
	341	bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL
	342	const sequenceContainer& sc,
	343	stochasticProcess& sp,
	344	const Vdouble * weights,
	345	const MDOUBLE initAlpha,
	346	const MDOUBLE initBeta,
	347	const MDOUBLE upperBoundOnAlpha,
	348	const MDOUBLE upperBoundOnBeta,
	349	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
	350	const MDOUBLE epsilonLoglikelihoodForBetaOptimization,
	351	const MDOUBLE epsilonLoglikelihoodForBBL,
	352	const int maxBBLIterations,
	353	const int maxTotalIterations){
	354	// LOG(5,<<"find Best Alpha and Beta and best BBL"<<endl);
	355	// LOG(5,<<" 1. bestAlphaAndBetaAndBBL::findBestAlphaAndBeta"<<endl);
	356	// brLenOpt br1(et,pi,weights);
	357
	358	MDOUBLE oldL = VERYSMALL;
	359	MDOUBLE newL = VERYSMALL;
	360	MDOUBLE bx=initAlpha;
	361	const MDOUBLE ax=0;
	362	const MDOUBLE cx=upperBoundOnAlpha;
	363	MDOUBLE ex=initBeta;
	364	const MDOUBLE dx=0;
	365	const MDOUBLE fx=upperBoundOnBeta;
	366	bool optimize = false;
	367
	368	//
	369	MDOUBLE bestA=0;
	370	MDOUBLE oldA=0;
	371	MDOUBLE bestB=0;
	372	MDOUBLE oldB=0;
	373	int i=0;
	374	for (i=0; i < maxTotalIterations; ++i) {
	375	//optimize alpha
	376	newL = -brent(ax,bx,cx,
	377	C_evalAlpha(et,sc,sp,weights),
	378	epsilonLoglikelihoodForAlphaOptimization,
	379	&bestA);
	380	bx = bestA;
	381
	382	#ifdef VERBOS
	383	LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
	384	<<"# old L = " << oldL << "\t"
	385	<<"# new L = " << newL << endl
	386	<<"# new Alpha = " << bestA << endl);
	387	#endif
	388	if(newL < oldL)
	389	errorMsg::reportError("likelihood decreased in alhpa optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
	390	oldL = newL;
	391	oldA = bestA;
	392	_bestL = newL;
	393	_bestAlpha= bestA;
	394	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	395	optimize = true;
	396	}
	397	(static_cast<generalGammaDistribution*>(sp.distr()))->setAlpha(bestA);
	398
	399	//optimize beta
	400	newL = -brent(dx,ex,fx,
	401	C_evalBeta(et,sc,sp,weights),
	402	epsilonLoglikelihoodForBetaOptimization,
	403	&bestB);
	404	ex = bestB;
	405
	406	#ifdef VERBOS
	407	LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
	408	<<"# old L = " << oldL << "\t"
	409	<<"# new L = " << newL << endl
	410	<<"# new Beta = " << bestB << endl);
	411	#endif
	412	if(newL < oldL)
	413	errorMsg::reportError("likelihood decreased in beta optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
	414	oldL = newL;
	415	oldB = bestB;
	416	_bestL = oldL;
	417	_bestBeta= oldB;
	418	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	419	optimize = true;
	420	}
	421	(static_cast<generalGammaDistribution*>(sp.distr()))->setBeta(bestB);
	422
	423	//bblEM
	424	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	425	newL =bblEM1.getTreeLikelihood();
	426	#ifdef VERBOS
	427	LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
	428	<<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
	429	<<"# The tree:" );
	430	LOGDO(5,et.output(myLog::LogFile()));
	431	#endif
	432	if(newL < oldL)
	433	errorMsg::reportError("likelihood decreased in bbl optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
	434	oldL = newL;
	435	_bestL = newL;
	436	if (newL > oldL+epsilonLoglikelihoodForBBL) {
	437	optimize = true;
	438	}
	439	if (!optimize)
	440	break;
	441	}
	442	}
	443

+246

-0

libs/phylogeny/bestAlpha.h less more

	0	// $Id: bestAlpha.h 10000 2011-11-12 18:20:12Z rubi $
	1
	2	#ifndef ___BEST_ALPHA
	3	#define ___BEST_ALPHA
	4
	5	#include "definitions.h"
	6
	7	#include "likelihoodComputation.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "multipleStochasticProcess.h"
	11	#include "gammaDistribution.h"
	12	#include "tree.h"
	13	#include "logFile.h"
	14
	15	#ifndef VERBOS
	16	#define VERBOS
	17	#endif
	18
	19	class bestAlphaFixedTree {
	20	public:
	21	explicit bestAlphaFixedTree(const tree& et,
	22	const sequenceContainer& sc,
	23	stochasticProcess& sp,
	24	const Vdouble * weights=NULL,
	25	const MDOUBLE upperBoundOnAlpha = 15,
	26	const MDOUBLE epsilonAlphaOptimization = 0.01);
	27	MDOUBLE getBestAlpha() {return _bestAlpha;}
	28	MDOUBLE getBestL() {return _bestL;}
	29	private:
	30	MDOUBLE _bestAlpha;
	31	MDOUBLE _bestL;
	32	};
	33
	34	class bestAlphaAndBBL {
	35	public:
	36	explicit bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL
	37	const sequenceContainer& sc,
	38	stochasticProcess& sp,
	39	const Vdouble * weights=NULL,
	40	const MDOUBLE initAlpha = 1.5,
	41	const MDOUBLE upperBoundOnAlpha = 5.0,
	42	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
	43	const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
	44	const int maxBBLIterations=10,
	45	const int maxTotalIterations=5);
	46	MDOUBLE getBestAlpha() {return _bestAlpha;}
	47	MDOUBLE getBestL() {return _bestL;}
	48	private:
	49	MDOUBLE _bestAlpha;
	50	MDOUBLE _bestL;
	51	};
	52
	53	class bestAlphasAndBBLProportional {
	54	public:
	55	explicit bestAlphasAndBBLProportional(tree& et, //find Best Alphas (per gene - local and proportional factors - global) and best BBL
	56	vector<sequenceContainer>& sc,
	57	multipleStochasticProcess* msp,
	58	gammaDistribution* pProportionDist,
	59	Vdouble initLocalRateAlphas,
	60	const MDOUBLE upperBoundOnLocalRateAlpha,
	61	const MDOUBLE initGlobalRateAlpha,
	62	const MDOUBLE upperBoundOnGlobalRateAlpha,
	63	const int maxBBLIterations,
	64	const int maxTotalIterations,
	65	const bool optimizeSelectedBranches=false,
	66	const bool optimizeTree = true,
	67	const string branchLengthOptimizationMethod="bblLS",
	68	const bool optimizeLocalAlpha = true,
	69	const bool optimizeGlobalAlpha = true,
	70	const Vdouble * weights=NULL,
	71	const MDOUBLE epsilonLoglikelihoodForLocalRateAlphaOptimization= 0.01,
	72	const MDOUBLE epsilonLoglikelihoodForGlobalRateAlphaOptimization= 0.01,
	73	const MDOUBLE epsilonLoglikelihoodForBBL= 0.05);
	74	MDOUBLE getBestLocalAlpha(int spIndex){return _bestLocalAlphaVec[spIndex];}
	75	MDOUBLE getBestGlobalAlpha(){return _bestGlobalAlpha;}
	76	Vdouble getBestL() {return _bestLvec;}
	77	private:
	78	Vdouble _bestLocalAlphaVec;
	79	MDOUBLE _bestGlobalAlpha;
	80	Vdouble _bestLvec;
	81	};
	82
	83	class bestBetaAndBBL {
	84	public:
	85	explicit bestBetaAndBBL(tree& et, //find Best Beta and best BBL
	86	const sequenceContainer& sc,
	87	stochasticProcess& sp,
	88	const Vdouble * weights=NULL,
	89	const MDOUBLE initBeta = 1.5,
	90	const MDOUBLE upperBoundOnBeta = 5.0,
	91	const MDOUBLE epsilonLoglikelihoodForBetaOptimization= 0.01,
	92	const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
	93	const int maxBBLIterations=10,
	94	const int maxTotalIterations=5);
	95	MDOUBLE getBestBeta() {return _bestBeta;}
	96	MDOUBLE getBestL() {return _bestL;}
	97	private:
	98	MDOUBLE _bestBeta;
	99	MDOUBLE _bestL;
	100	};
	101
	102	class bestAlphaAndBetaAndBBL {
	103	public:
	104	explicit bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL
	105	const sequenceContainer& sc,
	106	stochasticProcess& sp,
	107	const Vdouble * weights=NULL,
	108	const MDOUBLE initAlpha = 1.5,
	109	const MDOUBLE initBeta = 1.5,
	110	const MDOUBLE upperBoundOnAlpha = 5.0,
	111	const MDOUBLE upperBoundOnBeta = 5.0,
	112	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
	113	const MDOUBLE epsilonLoglikelihoodForBetaOptimization = 0.01,
	114	const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
	115	const int maxBBLIterations=10,
	116	const int maxTotalIterations=5);
	117	MDOUBLE getBestAlpha() {return _bestAlpha;}
	118	MDOUBLE getBestBeta() {return _bestBeta;}
	119	MDOUBLE getBestL() {return _bestL;}
	120	private:
	121	MDOUBLE _bestAlpha;
	122	MDOUBLE _bestBeta;
	123	MDOUBLE _bestL;
	124	};
	125
	126
	127	class C_evalAlpha{
	128	public:
	129	C_evalAlpha( const tree& et,
	130	const sequenceContainer& sc,
	131	stochasticProcess& sp,
	132	const Vdouble * weights = NULL)
	133	: _et(et),_sc(sc),_weights(weights),_sp(sp){};
	134	private:
	135	const tree& _et;
	136	const sequenceContainer& _sc;
	137	const Vdouble * _weights;
	138	stochasticProcess& _sp;
	139	public:
	140	MDOUBLE operator() (MDOUBLE alpha) {
	141	if (_sp.categories() == 1) {
	142	errorMsg::reportError(" one category when trying to optimize alpha");
	143	}
	144	(static_cast<generalGammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	145
	146	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	147	//LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
	148	#ifdef VERBOS
	149	LOG(7,<<" while in brent: with alpha = "<<alpha<<" logL = "<<res<<endl);
	150	#endif
	151	return -res;
	152	}
	153	};
	154
	155	class C_evalLocalAlpha{
	156	public:
	157	C_evalLocalAlpha( const tree& et,
	158	const sequenceContainer& sc,
	159	stochasticProcess& sp,
	160	const gammaDistribution* pProportionDist,
	161	const Vdouble * weights = NULL)
	162	: _et(et),_sc(sc),_weights(weights),_sp(sp),_pProportionDist(pProportionDist){};
	163	private:
	164	const tree& _et;
	165	const sequenceContainer& _sc;
	166	const Vdouble * _weights;
	167	stochasticProcess& _sp;
	168	const gammaDistribution* _pProportionDist;
	169	public:
	170	MDOUBLE operator() (MDOUBLE alpha) {
	171	if (_sp.categories() == 1) {
	172	errorMsg::reportError("one category when trying to optimize local alpha");
	173	}
	174	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	175	vector<sequenceContainer> tmpScVec;
	176	tmpScVec.push_back(_sc);
	177	vector<stochasticProcess> tmpSpVec;
	178	tmpSpVec.push_back(_sp);
	179	multipleStochasticProcess * tmpMsp = new multipleStochasticProcess();
	180	tmpMsp->setSpVec(tmpSpVec);
	181	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist);
	182	MDOUBLE res = likeVec[0];
	183	delete(tmpMsp);
	184	LOG(5,<<" with local alpha = "<<alpha<<" logL = "<<res<<endl);
	185	return -res;
	186	}
	187	};
	188
	189	class C_evalGlobalAlpha{
	190	public:
	191	C_evalGlobalAlpha( const tree& et,
	192	vector<sequenceContainer>& sc,
	193	multipleStochasticProcess* msp,
	194	gammaDistribution* pProportionDist,
	195	const Vdouble * weights = NULL)
	196	: _et(et),_sc(sc),_weights(weights),_msp(msp),_pProportionDist(pProportionDist){};
	197	private:
	198	const tree& _et;
	199	vector<sequenceContainer>& _sc;
	200	const Vdouble * _weights;
	201	multipleStochasticProcess* _msp;
	202	gammaDistribution* _pProportionDist;
	203	public:
	204	MDOUBLE operator() (MDOUBLE alpha) {
	205	if (_pProportionDist->categories() < 1) {
	206	errorMsg::reportError(" less than one category when trying to optimize global alpha");
	207	}
	208	_pProportionDist->setAlpha(alpha);
	209	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,_sc,_msp,_pProportionDist);
	210	MDOUBLE res = sumVdouble(likeVec);
	211	LOG(5,<<" with global alpha = "<<alpha<<" logL = "<<res<<endl);
	212	return -res;
	213	}
	214	};
	215
	216	class C_evalBeta{
	217	public:
	218	C_evalBeta( const tree& et,
	219	const sequenceContainer& sc,
	220	stochasticProcess& sp,
	221	const Vdouble * weights = NULL)
	222	: _et(et),_sc(sc),_weights(weights),_sp(sp){};
	223	private:
	224	const tree& _et;
	225	const sequenceContainer& _sc;
	226	const Vdouble * _weights;
	227	stochasticProcess& _sp;
	228	public:
	229	MDOUBLE operator() (MDOUBLE beta) {
	230	if (_sp.categories() == 1) {
	231	errorMsg::reportError(" one category when trying to optimize beta");
	232	}
	233	(static_cast<generalGammaDistribution*>(_sp.distr()))->setBeta(beta);
	234
	235	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	236	//LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
	237	#ifdef VERBOS
	238	LOG(7,<<" while in brent: with beta = "<<beta<<" logL = "<<res<<endl);
	239	#endif
	240	return -res;
	241	}
	242	};
	243
	244	#endif
	245

+262

-0

libs/phylogeny/bestAlphaAndK.cpp less more

	0	#include "bestAlphaAndK.h"
	1	#include "computePijComponent.h"
	2	#include "betaOmegaDistribution.h"
	3	#include "codonUtils.h"
	4
	5
	6	optimizeSelectonParameters::optimizeSelectonParameters(tree& et, //find Best params and best BBL
	7	const sequenceContainer& sc,
	8	vector<stochasticProcess>& spVec,
	9	distribution * distr,
	10	bool bblFlag,
	11	bool isGamma, bool isBetaProbSet,bool isOmegaSet,
	12	bool isKappaSet, bool isAlphaSet, bool isBetaSet,
	13	const MDOUBLE upperBoundOnAlpha,
	14	const MDOUBLE upperBoundOnBeta,
	15	const MDOUBLE epsilonAlphaOptimization,
	16	const MDOUBLE epsilonKOptimization,
	17	const MDOUBLE epsilonLikelihoodImprovment,
	18	const int maxBBLIterations,
	19	const int maxTotalIterations){
	20	//initialization
	21	MDOUBLE lowerValueOfParamK = 0;
	22	MDOUBLE lowerValueOfParamAlpha = 0.1;
	23	MDOUBLE lowerValueOfParamBeta = 0.1;
	24	MDOUBLE omegaLowerBoundary = 0.99; // this is to allow brent to reach the exact lower bound value
	25	MDOUBLE omegaUpperBoundary = 5.0;
	26	MDOUBLE upperValueOfParamK = 5; // changed from 50, Adi S. 2/1/07
	27
	28	MDOUBLE initialGuessValueOfParamTr;
	29	initialGuessValueOfParamTr = _bestK = static_cast<wYangModel*>(spVec[0].getPijAccelerator()->getReplacementModel())->getK();
	30
	31	MDOUBLE initialGuessValueOfParamAlpha;
	32	if (isGamma) initialGuessValueOfParamAlpha = _bestAlpha = static_cast<generalGammaDistribution*>(distr)->getAlpha();
	33	else initialGuessValueOfParamAlpha = _bestAlpha = static_cast<betaOmegaDistribution*>(distr)->getAlpha();
	34
	35	MDOUBLE initialGuessValueOfParamBeta;
	36	if (isGamma) initialGuessValueOfParamBeta = _bestBeta = static_cast<generalGammaDistribution*>(distr)->getBeta();
	37	else initialGuessValueOfParamBeta = _bestBeta = static_cast<betaOmegaDistribution*>(distr)->getBeta();
	38
	39	MDOUBLE initialGuessValueOfParamOmega = -1;
	40	MDOUBLE initialGuessValueOfParamBetaProb = -1;
	41	if (!isGamma) {
	42	initialGuessValueOfParamOmega = _bestOmega = static_cast<betaOmegaDistribution*>(distr)->getOmega();
	43	initialGuessValueOfParamBetaProb = _bestBetaProb = static_cast<betaOmegaDistribution*>(distr)->getBetaProb();
	44	}
	45	_bestL = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(et,sc,spVec,distr);;
	46	MDOUBLE newL = _bestL;
	47
	48	MDOUBLE alphaFound = 0;
	49	MDOUBLE kFound = 0;
	50	MDOUBLE betaFound = 0;
	51	MDOUBLE omegaFound = 0;
	52	MDOUBLE betaProbFound = 0;
	53	bool changed = false;
	54	int i=0;
	55	LOG(5,<<endl<<"Beginning optimization of parameters"<<endl<<endl);
	56
	57	for (i=0; i < maxTotalIterations; ++i) {
	58	LOG(5,<<"Iteration Number= " << i <<endl);
	59	LOG(5,<<"---------------------"<<endl);
	60	cout<<"Iteration number = "<< i <<endl;
	61	alphaFound = omegaFound = betaProbFound = kFound = betaFound=0;
	62	changed = false;
	63	//ALPHA (beta or gamma distribution parameter)
	64	if (!isAlphaSet){
	65	if (isGamma) initialGuessValueOfParamAlpha = static_cast<generalGammaDistribution*>(distr)->getAlpha();
	66	else initialGuessValueOfParamAlpha = static_cast<betaOmegaDistribution*>(distr)->getAlpha();
	67	newL = -brent(lowerValueOfParamAlpha,
	68	initialGuessValueOfParamAlpha,
	69	upperBoundOnAlpha,
	70	evalParam(et,sc,spVec,-1,distr,isGamma),epsilonAlphaOptimization,&alphaFound);
	71
	72	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	73	LOG(5,<<"new L After alpha= " << newL<<endl);
	74	LOG(5,<<"new alpha = " <<alphaFound<<endl<<endl);
	75
	76
	77	if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
	78	_bestL = newL;
	79	_bestAlpha = alphaFound;
	80	if (isGamma) static_cast<generalGammaDistribution*>(distr)->setAlpha(alphaFound);
	81	else static_cast<betaOmegaDistribution*>(distr)->setAlpha(alphaFound);
	82	for (int categor = 0; categor < spVec.size();categor++)
	83	static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setW(distr->rates(categor));
	84	normalizeMatrices(spVec,distr);
	85	changed = true;
	86	}
	87	}
	88	//BETA (beta distribution parameter)
	89	if (!isBetaSet) {
	90	if (isGamma) initialGuessValueOfParamBeta = static_cast<generalGammaDistribution*>(distr)->getBeta();
	91	else initialGuessValueOfParamBeta = static_cast<betaOmegaDistribution*>(distr)->getBeta();
	92	newL = -brent(lowerValueOfParamBeta,
	93	initialGuessValueOfParamBeta,
	94	upperBoundOnBeta,
	95	evalParam(et,sc,spVec,-2,distr,isGamma),epsilonAlphaOptimization,&betaFound);
	96
	97	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	98	LOG(5,<<"new L After beta= " << newL<<endl);
	99	LOG(5,<<"new beta = " <<betaFound<<endl<<endl);
	100
	101	if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
	102	_bestL = newL;
	103	_bestBeta = betaFound;
	104	if (isGamma) static_cast<generalGammaDistribution*>(distr)->setBeta(betaFound);
	105	else static_cast<betaOmegaDistribution*>(distr)->setBeta(betaFound);
	106	for (int categor = 0; categor < spVec.size();categor++)
	107	static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setW(distr->rates(categor));
	108	normalizeMatrices(spVec,distr);
	109	changed = true;
	110	}
	111	}
	112	//K parameter
	113	if (!isKappaSet){
	114	initialGuessValueOfParamTr = static_cast<wYangModel*>(spVec[0].getPijAccelerator()->getReplacementModel())->getK();
	115	newL = -brent(lowerValueOfParamK, //optimaize Tr
	116	initialGuessValueOfParamTr,
	117	upperValueOfParamK,
	118	evalParam(et,sc,spVec,0,distr,isGamma),epsilonKOptimization,&kFound);
	119
	120	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	121	LOG(5,<<"new L After kappa= " << newL<<endl);
	122	LOG(5,<<"new kappa = " <<kFound<<endl);
	123
	124	if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood and model.
	125	_bestL = newL;
	126	_bestK = kFound;
	127	for (int categor = 0; categor < spVec.size();categor++)
	128	static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setK(kFound);
	129	normalizeMatrices(spVec,distr);
	130	changed = true;
	131	}
	132	}
	133	//beta distribution part (betaProb and additional omega)
	134	if (isGamma==false && !isBetaProbSet){ //optimize beta probs
	135	if (!isOmegaSet){ // optimize omega (M8 or M8b)
	136	MDOUBLE omegaFound;
	137	newL = -brent(omegaLowerBoundary,
	138	initialGuessValueOfParamOmega,
	139	omegaUpperBoundary,
	140	evalParam(et,sc,spVec,1,distr,isGamma),0.01,&omegaFound);
	141
	142	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	143	LOG(5,<<"new L After additional omega caetgory = " << newL<<endl);
	144	LOG(5,<<"new additional omega caetgory = " <<omegaFound<<endl<<endl);
	145
	146	if (newL > _bestL+epsilonLikelihoodImprovment ) {
	147	_bestL = newL;
	148	_bestOmega = omegaFound;
	149	static_cast<betaOmegaDistribution*>(distr)->setOmega(omegaFound);
	150	static_cast<wYangModel*>(spVec[spVec.size()-1].getPijAccelerator()->getReplacementModel())->setW(omegaFound);
	151	normalizeMatrices(spVec,distr);
	152	changed = true;
	153	}
	154	}
	155	MDOUBLE betaProbFound;
	156	newL = -brent(0.0,initialGuessValueOfParamBetaProb,1.0,
	157	evalParam(et,sc,spVec,2,distr,isGamma),0.01,&betaProbFound);
	158
	159	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	160	LOG(5,<<"new L After prob(additional omega caetgory)= " << newL<<endl);
	161	LOG(5,<<"new prob(additional omega caetgory)= " <<1 - betaProbFound<<endl<<endl);
	162	if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
	163	_bestL = newL;
	164	_bestBetaProb = betaProbFound;
	165	static_cast<betaOmegaDistribution*>(distr)->setBetaProb(betaProbFound);
	166	normalizeMatrices(spVec,distr);
	167	changed = true;
	168	}
	169	}
	170
	171	//BBL
	172	if (bblFlag==true) {
	173	//using epsilonAlphaOptimization as the epsilon for pairwise disatnce here
	174	bblEM2codon bbl(et,sc,spVec,distr,NULL,maxBBLIterations,epsilonLikelihoodImprovment,epsilonAlphaOptimization);
	175	newL = bbl.getTreeLikelihood();
	176
	177	LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
	178	LOG(5,<<"new L After BL = " << newL<<endl);
	179	LOG(5,<<"Tree after this BBL iteration: "<<endl);
	180	LOGDO(5,et.output(myLog::LogFile()));
	181
	182	if (newL > _bestL+epsilonLikelihoodImprovment) {
	183	_bestL = newL;
	184	changed = true;
	185	}
	186	}
	187
	188	if (changed==false)
	189	break;
	190
	191	}
	192
	193	LOG(5,<<endl<<"Finished optimization of parameters"<<endl<<endl);
	194
	195	if (i==maxTotalIterations) {
	196	LOG(5,<<"Too many iterations in function optimizeCodonModelAndBBL. The last optimized parameters are used for the calculations."<<endl<<endl);
	197
	198	}
	199
	200	}
	201
	202	evalParam::~evalParam(){
	203	if (_distr != NULL) delete _distr;
	204	}
	205
	206
	207	evalParam::evalParam(const evalParam &other): _et(other._et),_sc(other._sc),
	208	_spVec(other._spVec), _alphaOrKs(other._alphaOrKs),_isGamma(other._isGamma)
	209	{
	210	_distr=other._distr->clone();
	211	}
	212
	213
	214	MDOUBLE evalParam::operator()(MDOUBLE param){
	215
	216	if (_alphaOrKs==-1) updateAlpha(param);
	217	else if (_alphaOrKs==-2) updateBeta(param);
	218	else if (_alphaOrKs==0) updateK(param);
	219	else if (_alphaOrKs==1) updateOmega(param);
	220	else if (_alphaOrKs==2) updateBetaProb(param);
	221	MDOUBLE res = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_spVec,_distr);
	222	return -res; //return -log(likelihood).
	223	}
	224
	225	void evalParam::updateBeta(MDOUBLE param){
	226	if (_isGamma) static_cast<generalGammaDistribution*>(_distr)->setBeta(param);
	227	else static_cast<betaOmegaDistribution*>(_distr)->setBeta(param);
	228	for (int categor = 0; categor < _spVec.size();categor++){
	229	static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor));
	230
	231	}
	232	normalizeMatrices(_spVec,_distr);
	233	}
	234	void evalParam::updateAlpha(MDOUBLE param){
	235	if (_isGamma)static_cast<generalGammaDistribution*>(_distr)->setAlpha(param);
	236	else static_cast<betaOmegaDistribution*>(_distr)->setAlpha(param);
	237	for (int categor = 0; categor < _spVec.size();categor++){
	238	static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor));
	239
	240	}
	241	normalizeMatrices(_spVec,_distr);
	242	}
	243
	244	void evalParam::updateK(MDOUBLE param){
	245	for (int categor = 0; categor < _spVec.size();categor++){
	246	static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setK(param);
	247	}
	248	normalizeMatrices(_spVec,_distr);
	249	}
	250
	251
	252	void evalParam::updateOmega(MDOUBLE param){
	253	int size = _spVec.size();
	254	static_cast<wYangModel*>(_spVec[size-1].getPijAccelerator()->getReplacementModel())->setW(param);
	255	normalizeMatrices(_spVec,_distr);
	256	}
	257
	258	void evalParam::updateBetaProb(MDOUBLE param){
	259	static_cast<betaOmegaDistribution*>(_distr)->setBetaProb(param);
	260	normalizeMatrices(_spVec,_distr);
	261	}

+84

-0

libs/phylogeny/bestAlphaAndK.h less more

	0	#ifndef ___BEST_ALPHA_AND_K
	1	#define ___BEST_ALPHA_AND_K
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "likelihoodComputation.h"
	6	#include "likelihoodComputation2Codon.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include "generalGammaDistribution.h"
	10	#include "logFile.h"
	11	#include "wYangModel.h"
	12	#include "bblEM2codon.h"
	13	#include "computeUpAlg.h"
	14	#include "numRec.h"
	15
	16
	17
	18	//evaluate best parameters
	19	class optimizeSelectonParameters {
	20	public:
	21	explicit optimizeSelectonParameters(tree& et,
	22	const sequenceContainer& sc,
	23	vector<stochasticProcess>& spVec,
	24	distribution * distr,
	25	bool bblFlag = true,
	26	bool isGamma = true, bool isBetaProbSet=false,bool isOmegaSet = false,
	27	bool isKappaSet=false, bool isAlphaSet=false, bool isBetaSet=false,
	28	const MDOUBLE upperBoundOnAlpha = 3.0, // changed from 20, Adi S. 2/7/07
	29	const MDOUBLE upperBoundOnBeta = 3.0, // changed from 20, Adi S. 2/7/07
	30	const MDOUBLE epsilonAlphaOptimization= 0.01,
	31	const MDOUBLE epsilonKOptimization=0.01,
	32	const MDOUBLE epsilonLikelihoodImprovment= 0.1,
	33	const int maxBBLIterations=20,
	34	const int maxTotalIterations=20);
	35	const MDOUBLE getBestAlpha() const{return _bestAlpha;}
	36	const MDOUBLE getBestBeta() const{return _bestBeta;}
	37	const MDOUBLE getBestL() const {return _bestL;}
	38	const MDOUBLE getBestK() const {return _bestK;}
	39	const MDOUBLE getBestOmega() const {return _bestOmega;}
	40	const MDOUBLE getBestBetaProb() const {return _bestBetaProb;}
	41	private:
	42	MDOUBLE _bestAlpha;
	43	MDOUBLE _bestL;
	44	MDOUBLE _bestK;
	45	MDOUBLE _bestBeta;
	46	MDOUBLE _bestOmega;
	47	MDOUBLE _bestBetaProb;
	48	};
	49
	50
	51	//The functor to eval likelihood given a change in a parameters
	52	class evalParam{
	53	public:
	54	explicit evalParam(const tree& et,
	55	const sequenceContainer& sc,
	56	vector<stochasticProcess> spVec,
	57	int alphaOrKs,
	58	const distribution * in_distr,
	59	bool isGamma)
	60	: _et(et),_sc(sc),_spVec(spVec),_alphaOrKs(alphaOrKs),_isGamma(isGamma){_distr=in_distr->clone();};
	61	MDOUBLE operator()(MDOUBLE param);
	62
	63	virtual ~evalParam();
	64	evalParam(const evalParam &other);
	65	void updateAlpha(MDOUBLE param);
	66	void updateK(MDOUBLE param);
	67	void updateBeta(MDOUBLE param);
	68	void updateOmega(MDOUBLE param);
	69	void updateBetaProb(MDOUBLE param);
	70	private:
	71	const tree& _et;
	72	const sequenceContainer& _sc;
	73
	74	vector<stochasticProcess> _spVec;
	75	int _alphaOrKs; //flag to eval different parameters (alpha,beta or ks)
	76	distribution *_distr;
	77	bool _isGamma; //gamma = true/ beta=false
	78
	79	};
	80
	81	#endif
	82
	83

+177

-0

libs/phylogeny/bestAlphaAndNu.cpp less more

	0	// $Id: bestAlphaAndNu.cpp 1975 2007-04-22 13:47:28Z privmane $
	1	#include <iostream>
	2	using namespace std;
	3
	4	#include "bestAlphaAndNu.h"
	5
	6	// ******************
	7	// * USSRV *
	8	// ******************
	9
	10	MDOUBLE bestFFixedTreeUSSRV::operator()(const tree& et,
	11	const sequenceContainer& sc,
	12	const sequenceContainer& baseSc,
	13	ussrvModel& model,
	14	const Vdouble * weights,
	15	const MDOUBLE upperBoundOnF,
	16	const MDOUBLE epsilonFOptimization){
	17
	18	MDOUBLE bestF=0;
	19	const MDOUBLE cx=upperBoundOnF;// left, middle, right limit on alpha
	20	const MDOUBLE bx=model.getF();
	21	const MDOUBLE ax=0.0;
	22	LOG(5,<<"** Optimizing F ** " << endl<< "bestFFixedTreeSSRV::operator() bx is :" << bx << endl);
	23	LOG(9,<<"ax is :" << ax << " cx is :" << cx << endl);
	24	_bestL = -brent(ax,bx,cx,
	25	C_evalFUSSRV(et,sc,baseSc,&model,weights),
	26	epsilonFOptimization,
	27	&bestF);
	28	setF(bestF,model);
	29	_bestF= bestF;
	30	return _bestL;
	31	}
	32
	33	MDOUBLE bestAlphaFixedTreeUSSRV::operator()(const tree& et, //findBestAlphaFixedTree
	34	const sequenceContainer& sc,
	35	const sequenceContainer& baseSc,
	36	ussrvModel& model,
	37	const Vdouble * weights,
	38	const MDOUBLE upperBoundOnAlpha,
	39	const MDOUBLE epsilonAlphaOptimization){
	40
	41	MDOUBLE bestA=0;
	42	const MDOUBLE cx=upperBoundOnAlpha;// left, middle, right limit on alpha
	43	const MDOUBLE bx=model.getAlpha();
	44	const MDOUBLE ax=0.0;
	45	LOG(5,<<"** Optimizing Alpha ** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl);
	46	_bestL = -brent(ax,bx,cx,
	47	C_evalAlphaUSSRV(et,sc,baseSc,&model,weights),
	48	epsilonAlphaOptimization,
	49	&bestA);
	50	setAlpha(bestA,model);
	51	_bestAlpha= bestA;
	52	return _bestL;
	53	}
	54
	55	// Alpha is fixed
	56	MDOUBLE bestNuFixedTreeUSSRV::operator()(const tree& et,
	57	const sequenceContainer& sc,
	58	const sequenceContainer& baseSc,
	59	ussrvModel& model,
	60	const Vdouble * weights,
	61	const MDOUBLE upperBoundOnNu,
	62	const MDOUBLE epsilonNuOptimization){
	63
	64
	65	MDOUBLE bestN=0;
	66	// define the Nu bounds
	67	const MDOUBLE cx=upperBoundOnNu;// left, midle, right limit on alpha
	68	const MDOUBLE bx= model.getNu();
	69	const MDOUBLE ax=0.0;
	70	LOG(5,<<"** Optimizing Nu ** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl);
	71	_bestL = -brent(ax,bx,cx, C_evalNuUSSRV(et,sc,baseSc,&model,weights), epsilonNuOptimization, &bestN);
	72	setNu(bestN,model);
	73	_bestNu= bestN;
	74	return _bestL;
	75	}
	76
	77
	78	// ******************
	79	// * SSRV *
	80	// ******************
	81
	82	MDOUBLE bestAlphaFixedTreeSSRV::operator()(const tree& et, //findBestAlphaFixedTree
	83	const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights,
	84	const MDOUBLE lowerBoundOnAlpha, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonAlphaOptimization){
	85
	86	MDOUBLE bestA=0;
	87	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	88	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	89	gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
	90	const MDOUBLE bx=gammaDist->getAlpha();
	91	const MDOUBLE ax=lowerBoundOnAlpha;
	92	LOG(5,<<"** Optimizing Alpha ** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl);
	93	_bestL = -brent(ax,bx,cx,
	94	C_evalAlphaSSRV(et,sc,ssrvSp,weights), epsilonAlphaOptimization, &bestA);
	95
	96	setAlpha(bestA,ssrvSp);
	97	_bestAlpha= bestA;
	98	return _bestL;
	99	}
	100
	101	// Alpha is fixed
	102	MDOUBLE bestNuFixedTreeSSRV::operator()(const tree& et, const sequenceContainer& sc,
	103	stochasticProcessSSRV& ssrvSp, const Vdouble * weights, const MDOUBLE lowerBoundOnNu, const MDOUBLE upperBoundOnNu,
	104	const MDOUBLE epsilonNuOptimization) {
	105
	106	MDOUBLE bestN=0;
	107	// define the Nu bounds
	108	const MDOUBLE cx=upperBoundOnNu;// left, middle, right limit on alpha
	109	const MDOUBLE bx= static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->getRateOfRate();
	110	const MDOUBLE ax=lowerBoundOnNu;
	111	LOG(5,<<"** Optimizing Nu ** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl);
	112	_bestL = -brent(ax,bx,cx, C_evalNuSSRV(et,sc,ssrvSp,weights), epsilonNuOptimization, &bestN);
	113
	114	setNu(bestN,ssrvSp);
	115	_bestNu= bestN;
	116	return _bestL;
	117	}
	118
	119
	120	MDOUBLE bestTamura92ParamFixedTreeSSRV::operator()(const tree& et,
	121	const sequenceContainer& sc,
	122	stochasticProcessSSRV& ssrvSp,
	123	const Vdouble * weights/= NULL /,
	124	const int maxTotalIterations /* = 5 */,
	125	const MDOUBLE epsilonLikelihoodImprovment /* = 0.05 */,
	126	const MDOUBLE lowerBoundOnTrTv /* = 0.0 */,
	127	const MDOUBLE upperBoundOnTrTv /* = 10.0 */,
	128	const MDOUBLE lowerBoundOnTheta /* = 0.0 */,
	129	const MDOUBLE upperBoundOnTheta /* = 1.0 */,
	130	const MDOUBLE epsilonTrTvOptimization /* = 0.01 */,
	131	const MDOUBLE epsilonThetaOptimization /* = 0.01 */){
	132
	133	LOG(5,<<"Starting bestTamura92ParamFixedTreeSSRV::operator() : find Best TrTv and theta"<<endl);
	134	MDOUBLE oldL = VERYSMALL;
	135	MDOUBLE newL = VERYSMALL;
	136
	137	// first guess for the parameters
	138	MDOUBLE prevTrTv = static_cast<tamura92>(static_cast<replacementModelSSRV>(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTrTv();
	139	MDOUBLE prevTheta = static_cast<tamura92>(static_cast<replacementModelSSRV>(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTheta();
	140
	141	for (int i=0; i < maxTotalIterations; ++i) {
	142	// optimize TrTv
	143	newL = -brent(lowerBoundOnTrTv, prevTrTv, upperBoundOnTrTv,
	144	C_evalTrTvSSRV(et,sc,ssrvSp,weights),
	145	epsilonTrTvOptimization,
	146	&_bestTrTv);
	147	setTrTv(_bestTrTv,ssrvSp);
	148
	149	// optimize Theta
	150	newL = -brent(lowerBoundOnTheta, prevTheta, upperBoundOnTheta,
	151	C_evalThetaSSRV(et,sc,ssrvSp,weights),
	152	epsilonThetaOptimization,
	153	&_bestTheta);
	154	setTheta(_bestTheta,ssrvSp);
	155
	156	// check for improvement in the likelihood
	157	if (newL > oldL+epsilonLikelihoodImprovment) {
	158	prevTrTv = _bestTrTv;
	159	prevTheta = _bestTheta;
	160	oldL = newL;
	161	_bestL = newL;
	162	} else {
	163	if (newL>oldL) {
	164	_bestL = newL;
	165	} else {
	166	LOG(5,<<"bestTamura92ParamFixedTreeSSRV::operator() likelihood went down!"<<endl<<"oldL = "<< oldL <<" newL= "<<newL<<endl);
	167	_bestL = oldL;
	168	_bestTrTv = prevTrTv;
	169	_bestTheta = prevTheta;
	170	setTrTvAndTheta(prevTrTv,prevTheta,ssrvSp);
	171	}
	172	break;
	173	}
	174	}
	175	return _bestL;
	176	}

+215

-0

libs/phylogeny/bestAlphaAndNu.h less more

	0	// $Id: bestAlphaAndNu.h 1975 2007-04-22 13:47:28Z privmane $
	1	#ifndef ___BEST_ALPHA_AND_NU
	2	#define ___BEST_ALPHA_AND_NU
	3
	4	#include "definitions.h"
	5
	6	#include "sequenceContainer.h"
	7	#include "stochasticProcess.h"
	8	#include "gammaDistribution.h"
	9	#include "tree.h"
	10	#include "replacementModelSSRV.h"
	11	#include "tamura92.h"
	12	#include "stochasticProcessSSRV.h"
	13	#include "C_evalParamUSSRV.h"
	14	#include "bestAlpha.h"
	15	#include "numRec.h"
	16	#include "bblEM.h"
	17	#include "logFile.h"
	18
	19	// ******************
	20	// * USSRV *
	21	// ******************
	22
	23	// Nu is fixed. The tree is fixed
	24	class bestAlphaFixedTreeUSSRV {
	25	public:
	26	explicit bestAlphaFixedTreeUSSRV() {}
	27	MDOUBLE operator()(const tree& et,
	28	const sequenceContainer& sc,
	29	const sequenceContainer& baseSc,
	30	ussrvModel& model,
	31	const Vdouble * weights=NULL,
	32	const MDOUBLE upperBoundOnAlpha = 15,
	33	const MDOUBLE epsilonAlphaOptimization = 0.01);
	34	MDOUBLE getBestAlpha() {return _bestAlpha;}
	35	MDOUBLE getBestL() {return _bestL;}
	36
	37	void setAlpha(MDOUBLE alpha, ussrvModel& model) const
	38	{
	39	model.updateAlpha(alpha);
	40	}
	41
	42	void setBestL(MDOUBLE bestL) { _bestL = bestL;}
	43
	44	private:
	45	MDOUBLE _bestAlpha;
	46	MDOUBLE _bestL;
	47	};
	48
	49	// Alpha is fixed
	50	class bestNuFixedTreeUSSRV {
	51	public:
	52	explicit bestNuFixedTreeUSSRV(){}
	53	MDOUBLE operator()(const tree& et,
	54	const sequenceContainer& sc,
	55	const sequenceContainer& baseSc,
	56	ussrvModel& model,
	57	const Vdouble * weights=NULL,
	58	const MDOUBLE upperBoundOnNu = 15,
	59	const MDOUBLE epsilonNuOptimization = 0.01);
	60	MDOUBLE getBestNu() {return _bestNu;}
	61	MDOUBLE getBestL() {return _bestL;}
	62	void setNu(MDOUBLE nu, ussrvModel& model) const
	63	{
	64	model.updateNu(nu);
	65	}
	66	void setBestL(MDOUBLE bestL) { _bestL = bestL;}
	67
	68	private:
	69	MDOUBLE _bestNu;
	70	MDOUBLE _bestL;
	71	};
	72
	73	class bestFFixedTreeUSSRV {
	74	public:
	75	explicit bestFFixedTreeUSSRV() {}
	76	MDOUBLE operator()(const tree& et,
	77	const sequenceContainer& sc,
	78	const sequenceContainer& baseSc,
	79	ussrvModel& model,
	80	const Vdouble * weights=NULL,
	81	const MDOUBLE upperBoundOnF = 1,
	82	const MDOUBLE epsilonFOptimization = 0.01);
	83	MDOUBLE getBestF() {return _bestF;}
	84	MDOUBLE getBestL() {return _bestL;}
	85	void setF(MDOUBLE f, ussrvModel& model) const
	86	{
	87	if ( (f>1) \|\| (f < 0))
	88	{
	89	LOG(5,<<"bestFFixedTreeSSRV:setF, f must be between 0 to 1. f = " << f << endl);
	90	return;
	91	}
	92	model.updateF(f);
	93	}
	94	void setBestL(MDOUBLE bestL) { _bestL = bestL;}
	95
	96	private:
	97	MDOUBLE _bestF;
	98	MDOUBLE _bestL;
	99	};
	100
	101
	102	// ******************
	103	// * SSRV *
	104	// ******************
	105
	106	// Nu is fixed. The tree is fixed
	107	class bestAlphaFixedTreeSSRV {
	108	public:
	109	explicit bestAlphaFixedTreeSSRV() {}
	110	MDOUBLE operator()(const tree& et,
	111	const sequenceContainer& sc,
	112	stochasticProcessSSRV& ssrvSp,
	113	const Vdouble * weights=NULL,
	114	const MDOUBLE lowerBoundOnAlpha = 0,
	115	const MDOUBLE upperBoundOnAlpha = 10,
	116	const MDOUBLE epsilonAlphaOptimization = 0.01);
	117	MDOUBLE getBestAlpha() {return _bestAlpha;}
	118	MDOUBLE getBestL() {return _bestL;}
	119
	120	void setAlpha(MDOUBLE alpha, stochasticProcessSSRV& ssrvSp) const
	121	{
	122	if (alpha<0)
	123	errorMsg::reportError("bestAlphaFixedTreeSSRV::setAlpha, alpha is < 0 ");
	124
	125	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	126	gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
	127	gammaDist->setAlpha(alpha);
	128	pMulRM->updateQ();
	129	}
	130
	131	void setBestL(MDOUBLE bestL) { _bestL = bestL;}
	132
	133	private:
	134	MDOUBLE _bestAlpha;
	135	MDOUBLE _bestL;
	136	};
	137
	138	// Alpha is fixed
	139	class bestNuFixedTreeSSRV {
	140	public:
	141	explicit bestNuFixedTreeSSRV(){}
	142	MDOUBLE operator()(const tree& et,
	143	const sequenceContainer& sc,
	144	stochasticProcessSSRV& ssrvSp,
	145	const Vdouble * weights=NULL,
	146	const MDOUBLE lowerBoundOnNu = 0,
	147	const MDOUBLE upperBoundOnNu = 15,
	148	const MDOUBLE epsilonNuOptimization = 0.01);
	149	MDOUBLE getBestNu() {return _bestNu;}
	150	MDOUBLE getBestL() {return _bestL;}
	151	void setNu(MDOUBLE nu, stochasticProcessSSRV& ssrvSp) const
	152	{
	153	if (nu<0)
	154	errorMsg::reportError("ussrvModel::updateNu , nu is < 0");
	155
	156	static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(nu);
	157	}
	158
	159	void setBestL(MDOUBLE bestL) { _bestL = bestL;}
	160
	161	private:
	162	MDOUBLE _bestNu;
	163	MDOUBLE _bestL;
	164	};
	165
	166
	167	class bestTamura92ParamFixedTreeSSRV {
	168	public:
	169	explicit bestTamura92ParamFixedTreeSSRV(){}
	170	MDOUBLE operator()(const tree& et,
	171	const sequenceContainer& sc,
	172	stochasticProcessSSRV& ssrvSp,
	173	const Vdouble * weights=NULL,
	174	const int maxTotalIterations = 5,
	175	const MDOUBLE epsilonLikelihoodImprovment = 0.05,
	176	const MDOUBLE lowerBoundOnTrTv = 0.0,
	177	const MDOUBLE upperBoundOnTrTv = 10.0,
	178	const MDOUBLE lowerBoundOnTheta = 0.0,
	179	const MDOUBLE upperBoundOnTheta = 1.0,
	180	const MDOUBLE epsilonTrTvOptimization = 0.01,
	181	const MDOUBLE epsilonThetaOptimization = 0.01);
	182	MDOUBLE getBestTrTv() {return _bestTrTv;}
	183	MDOUBLE getBestTheta() {return _bestTheta;}
	184	MDOUBLE getBestL() {return _bestL;}
	185	void setTrTv(MDOUBLE TrTv, stochasticProcessSSRV& ssrvSp) const {
	186	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	187	static_cast<tamura92*>(pMulRM->getBaseRM())->changeTrTv(TrTv);
	188	pMulRM->updateQ();
	189	}
	190
	191	void setTheta(MDOUBLE theta, stochasticProcessSSRV& ssrvSp) const {
	192	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	193	static_cast<tamura92*>(pMulRM->getBaseRM())->changeTheta(theta);
	194	pMulRM->updateFreq();
	195	pMulRM->updateQ();
	196	}
	197
	198	void setTrTvAndTheta(MDOUBLE TrTv, MDOUBLE theta, stochasticProcessSSRV& ssrvSp) {
	199	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	200	tamura92* tamuraRM = static_cast<tamura92*>(pMulRM->getBaseRM());
	201	tamuraRM->changeTrTv(TrTv);
	202	tamuraRM->changeTheta(theta);
	203	pMulRM->updateFreq();
	204	pMulRM->updateQ();
	205	}
	206
	207	private:
	208	MDOUBLE _bestTrTv;
	209	MDOUBLE _bestTheta;
	210	MDOUBLE _bestL;
	211	};
	212
	213
	214	#endif // ___BEST_ALPHA_AND_NU

+270

-0

libs/phylogeny/bestAlphaManyTrees.cpp less more

	0	// $Id: bestAlphaManyTrees.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#include "bestAlphaManyTrees.h"
	6	#include "bestAlpha.h"
	7	#include "numRec.h"
	8	#include "bblEMProportional.h"
	9	#include "bblEMSeperate.h"
	10	#include "logFile.h"
	11	#include <iostream>
	12	using namespace std;
	13
	14	#ifndef VERBOS
	15	#define VERBOS
	16	#endif
	17
	18
	19	void bestAlpha::optimizeAlphaNG_EM_PROP(tree& et,
	20	vector<sequenceContainer>& sc,
	21	vector<stochasticProcess>& sp,
	22	const vector<Vdouble > weights,
	23	MDOUBLE & bestAlpha,
	24	MDOUBLE & likelihoodScore,
	25	const int maxIterations,
	26	const MDOUBLE epsilon){
	27
	28	//LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	29	MDOUBLE oldL = VERYSMALL;
	30	MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
	31	bx=1.5; // the limits are becoming more narrow with time.
	32	ax=0;
	33	cx=5.0;
	34	MDOUBLE tol=0.01f;
	35	MDOUBLE bestA=0;
	36	int i;
	37	const int maxIterationsThisF = 50;
	38	for (i=0; i < maxIterationsThisF; ++i) {
	39
	40	bblEMProportional bblEMprop1(et,sc,sp,weights,maxIterations,epsilon);
	41	MDOUBLE newL = bblEMprop1.getTreeLikelihood();
	42
	43	#ifdef VERBOS
	44	LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
	45	#endif
	46
	47	MDOUBLE likeAfterAlphaOpt = -brent(ax,bx,cx, // NEW MINUS. CHECK
	48	C_evalAlphaManyTrees(et,sc,sp,weights),
	49	tol,
	50	&bestA); // THIS FUNCTION CHANGE SP, BUT YET ONE HAVE TO INSERT THE BEST ALPHAS.
	51	for (int z=0; z < sp.size();++z) {
	52	(static_cast<gammaDistribution*>(sp[z].distr()))->setAlpha(bestA);
	53	}
	54
	55	#ifdef VERBOS
	56	LOG(5,<<"After optimizing alpha, L = "<<likeAfterAlphaOpt<<endl);
	57	LOG(5,<<" best A = " << bestA<<endl);
	58	#endif
	59	newL = likeAfterAlphaOpt;
	60
	61
	62
	63	if (newL > oldL+0.01) {
	64	oldL = newL;
	65	}
	66	else {
	67	if (newL > oldL) {
	68	likelihoodScore = newL;
	69	bestAlpha= bestA;
	70	return;
	71	}
	72	else {
	73	likelihoodScore = oldL;
	74	bestAlpha= bestA;
	75	return;
	76	}
	77	}
	78	}
	79	if (i == maxIterationsThisF) errorMsg::reportError(" to many iteration in function optimizeBranchLength");
	80	}
	81
	82	/*
	83	void findBestAlphaManyTrees::findBestAlphaFixedManyTrees(const vector<tree>& et,
	84	vector<positionInfo>& pi,
	85	const VVdouble * weights) {
	86	//LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	87	MDOUBLE bestA=0;
	88	checkAllocation();
	89	MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
	90	MDOUBLE tol;
	91	ax=0;bx=1.5;cx=2;
	92	tol=0.01f;
	93	_bestL = brent(ax,bx,cx,
	94	C_evalAlphaManyTrees(et,_pi,weights),
	95	tol,
	96	&bestA);
	97	_bestAlpha= bestA;
	98	}
	99
	100	*/
	101
	102	void bestAlpha::optimizeAlphaNG_EM_SEP(
	103	vector<tree>& et,
	104	vector<sequenceContainer>& sc,
	105	vector<stochasticProcess>& sp,
	106	const vector<Vdouble > weights,
	107	MDOUBLE & bestAlpha,
	108	MDOUBLE & likelihoodScore,
	109	const int maxIterations,
	110	const MDOUBLE epsilon) {
	111	// SEPERATE ANALYSIS, 1 GAMMA
	112	//LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	113	MDOUBLE oldL = VERYSMALL;
	114	MDOUBLE newL = VERYSMALL;
	115	MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
	116	bx=1.5; // the limits are becoming more narrow with time.
	117	ax=0;
	118	cx=5.0;
	119	MDOUBLE tol=0.01f;
	120	MDOUBLE bestA=0;
	121	const int maxIterationsThisF = 50;
	122	for (int i=0; i < maxIterationsThisF; ++i) {
	123	newL=0;
	124	LOG(3,<<"starting iteration "<<i<<endl);
	125	bblEMSeperate bblEMsep1(et,
	126	sc,
	127	sp,
	128	weights,
	129	maxIterations,
	130	epsilon);
	131	newL =bblEMsep1.getTreeLikelihood();
	132	#ifdef VERBOS
	133	LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
	134	#endif
	135	//MDOUBLE alphaB4optimizing = (static_cast<gammaDistribution*>(sp[0].distr()))->getAlpha();
	136	MDOUBLE likeAfterAlphaOpt = -brent(ax,bx,cx, // NEW MINUS - CHECK!
	137	C_evalAlphaManyTreesSep(et,sc,sp,weights),
	138	tol,
	139	&bestA);
	140
	141	if (likeAfterAlphaOpt>newL) {
	142	for (int i=0; i < sc.size();++i) {
	143	(static_cast<gammaDistribution*>(sp[0].distr()))->setAlpha(bestA);
	144	}
	145	newL = likeAfterAlphaOpt;
	146	}
	147	#ifdef VERBOS
	148	LOG(5,<<"After optimizing alpha, L = "<<newL<<endl);
	149	#endif
	150	if (newL > oldL+0.01) {
	151	oldL = newL;
	152	}
	153	else {
	154	if (newL > oldL) {
	155	likelihoodScore = newL;
	156	bestAlpha= bestA;
	157	return;
	158	}
	159	else {
	160	likelihoodScore = oldL;
	161	bestAlpha= bestA;
	162	return;
	163	}
	164	}
	165	}
	166	errorMsg::reportError(" to many iteration in function optimizeBranchLength");
	167	}
	168
	169	//==================== optimizing n alphas ==============================
	170
	171	void bestAlpha::optimizeAlphaNG_EM_PROP_n_alpha(tree& et,
	172	vector<sequenceContainer>& sc,
	173	vector<stochasticProcess>& sp,
	174	const vector<Vdouble > weights,
	175	vector<MDOUBLE> & bestAlphas,
	176	MDOUBLE & likelihoodScore,
	177	const int maxIterations,
	178	const MDOUBLE epsilon){
	179
	180	//LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
	181	MDOUBLE oldL = VERYSMALL;
	182	MDOUBLE newL = VERYSMALL;
	183	MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
	184	bx=1.5; // the limits are becoming more narrow with time.
	185	ax=0;
	186	cx=5.0;
	187	vector<MDOUBLE> bestAs= bestAlphas;
	188	vector<MDOUBLE> newAlphas(sc.size(),0);
	189	int i;
	190	const int maxIterationsThisF = 50;
	191	for (i=0; i < maxIterationsThisF; ++i) {
	192	#ifdef VERBOS
	193	LOG(5,<<" ============================ optimizing bbl (fixed alphas) ================= \n");
	194	#endif
	195	newL=0;
	196	bblEMProportional bblem1(et,sc,sp,weights,maxIterations,epsilon);
	197	MDOUBLE tmpX =bblem1.getTreeLikelihood();
	198
	199	#ifdef VERBOS
	200	LOG(5,<<"likelihood of trees (sum)= "<<tmpX<<endl);
	201	#endif
	202	newL =tmpX;
	203	#ifdef VERBOS
	204	LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
	205	LOG(5,<<" ============================ optimizing alphas ================= \n");
	206	#endif
	207	const MDOUBLE upperBoundOnAlpha = 5;
	208	MDOUBLE likeAfterAlphaOpt = 0;
	209	for (int treeNumber =0; treeNumber<sc.size();++treeNumber) {
	210	bestAlphaFixedTree bestAlphaFixedTree1(et,
	211	sc[treeNumber],
	212	sp[treeNumber],
	213	weights?(*weights)[treeNumber]:NULL,
	214	upperBoundOnAlpha,
	215	epsilon);
	216	MDOUBLE tmpX = bestAlphaFixedTree1.getBestL();
	217	#ifdef VERBOS
	218	LOG(5,<<"likelihood of tree "<<treeNumber<<" = "<<tmpX<<endl);
	219	#endif
	220	newAlphas[treeNumber] = bestAlphaFixedTree1.getBestAlpha();
	221	#ifdef VERBOS
	222	LOG(5,<<" best alpha tree number: "<<treeNumber<<" = "<<newAlphas[treeNumber]<<endl);
	223	#endif
	224	likeAfterAlphaOpt +=tmpX;
	225	}
	226
	227
	228	if (likeAfterAlphaOpt>newL) {
	229	for (int z=0; z < sp.size();++z) {
	230	(static_cast<gammaDistribution*>(sp[z].distr()))->setAlpha(newAlphas[z]);
	231	}
	232	newL = likeAfterAlphaOpt;
	233	bestAs = newAlphas;
	234	}
	235
	236	#ifdef VERBOS
	237	LOG(5,<<"After optimizing alpha, L = "<<newL<<endl);
	238	#endif
	239
	240	if (newL > oldL+0.01) {
	241	oldL = newL;
	242	}
	243	else {
	244	if (newL > oldL) {
	245	likelihoodScore = newL;
	246	bestAlphas= bestAs;
	247	return;
	248	}
	249	else {
	250	likelihoodScore = oldL;
	251	bestAlphas= bestAs;
	252	return;
	253	}
	254	}
	255	}
	256	if (i == maxIterationsThisF) {
	257	errorMsg::reportError(" to many iteration in function optimizeBranchLength");
	258	}
	259	}
	260
	261	//// CHECK:
	262	//MDOUBLE check_sum=0;
	263	//for (int k=0; k < sp.size(); ++k) {
	264	// MDOUBLE check = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et,sc[k],sp[k]);
	265	// LOG(5,<<" CHECK = "<< check<<endl);
	266	// check_sum+=check;
	267	//}
	268	//LOG(5,<<" check-sum = "<<check_sum<<endl);
	269	//// END CHECK

+127

-0

libs/phylogeny/bestAlphaManyTrees.h less more

	0	// $Id: bestAlphaManyTrees.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___BEST_ALPHA_MANY_TREES
	3	#define ___BEST_ALPHA_MANY_TREES
	4
	5	#include "definitions.h"
	6	#include "computePijComponent.h"
	7	#include "sequenceContainer.h"
	8	#include "bblEM.h"
	9	#include "gammaDistribution.h"
	10	#include "likelihoodComputation.h"
	11	#include "logFile.h"
	12
	13	using namespace likelihoodComputation;
	14
	15	//#define VERBOS
	16	namespace bestAlpha {
	17	/* void optimizeAlpha1G_EM( tree& et,
	18	const sequenceContainer& sc,
	19	const stochasticProcess& sp,
	20	const Vdouble * weights,
	21	MDOUBLE & bestAlpha,
	22	MDOUBLE & likelihoodScore,
	23	const int maxIterations=1000,
	24	const MDOUBLE epsilon=0.05);
	25	*/
	26	void optimizeAlphaNG_EM_SEP(vector<tree>& et,
	27	vector<sequenceContainer>& sc,
	28	vector<stochasticProcess> &sp,
	29	const vector<Vdouble > weights,
	30	MDOUBLE & bestAlpha,
	31	MDOUBLE & likelihoodScore,
	32	const int maxIterations=1000,
	33	const MDOUBLE epsilon=0.05);
	34	void optimizeAlphaNG_EM_PROP(tree& et,// 1 alpha for all trees!
	35	vector<sequenceContainer>& sc,
	36	vector<stochasticProcess>& sp,
	37	const vector<Vdouble > weights,
	38	MDOUBLE & bestAlpha,
	39	MDOUBLE & likelihoodScore,
	40	const int maxIterations=1000,
	41	const MDOUBLE epsilon=0.05);
	42	void optimizeAlphaNG_EM_PROP_n_alpha(tree& et,// alpha for each trees!
	43	vector<sequenceContainer>& sc,
	44	vector<stochasticProcess>& sp,
	45	const vector<Vdouble > weights,
	46	vector<MDOUBLE> & bestAlpha,
	47	MDOUBLE & likelihoodScore,
	48	const int maxIterations=1000,
	49	const MDOUBLE epsilon=0.05);
	50	};
	51
	52	#include <iostream>// for debugging
	53	using namespace std; // for debugging
	54
	55	class C_evalAlphaManyTrees{
	56	public:
	57	C_evalAlphaManyTrees(tree& et,
	58	vector<sequenceContainer>& sc,
	59	vector<stochasticProcess>& sp,
	60	const vector<Vdouble > weights)
	61	: _et(et),_sc(sc),_sp(sp),_weights(weights) {};
	62	private:
	63	const tree& _et;
	64	const vector<sequenceContainer>& _sc;
	65	vector<stochasticProcess>& _sp;
	66	const vector<Vdouble > _weights;
	67	public:
	68	MDOUBLE operator() (MDOUBLE alpha) {
	69	#ifdef VERBOS
	70	LOG(5,<<"trying alpha: "<<alpha<<endl);
	71	#endif
	72	MDOUBLE res=0;
	73	for (int i=0; i < _sc.size();++i) {
	74
	75	if (_sp[i].categories() == 1) {
	76	errorMsg::reportError(" one category when trying to optimize alpha");
	77	}
	78	(static_cast<gammaDistribution*>(_sp[i].distr()))->setAlpha(alpha);
	79	res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc[i],_sp[i],_weights?(*_weights)[i]:NULL);
	80	}
	81	#ifdef VERBOS
	82	LOG(5,<<"likelihood = "<<-res<<endl);
	83	#endif
	84	return -res;
	85	}
	86	};
	87
	88	class C_evalAlphaManyTreesSep{ // separate model, 1 gamma
	89	public:
	90	C_evalAlphaManyTreesSep(vector<tree>& et,
	91	vector<sequenceContainer>& sc,
	92	vector<stochasticProcess>& sp,
	93	const vector<Vdouble > weights)
	94	: _et(et),_sc(sc),_sp(sp),_weights(weights) {};
	95	private:
	96	const vector<tree>& _et;
	97	const vector<sequenceContainer>& _sc;
	98	vector<stochasticProcess>& _sp;
	99	const vector<Vdouble > _weights;
	100	public:
	101	MDOUBLE operator() (MDOUBLE alpha) {
	102	//LOG(5,<<"trying alpha: "<<alpha<<endl);
	103	MDOUBLE res=0;
	104	for (int i=0; i < _sc.size();++i) {
	105
	106	if (_sp[i].categories() == 1) {
	107	errorMsg::reportError(" one category when trying to optimize alpha");
	108	}
	109	(static_cast<gammaDistribution*>(_sp[i].distr()))->setAlpha(alpha);
	110	res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et[i],_sc[i],_sp[i],_weights?(*_weights)[i]:NULL);
	111	}
	112	// LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
	113	return -res;
	114	}
	115	};
	116
	117
	118
	119
	120
	121
	122
	123
	124	#endif
	125
	126

+467

-0

libs/phylogeny/bestGtrModelParams.cpp less more

	0	// $Id: bestGtrModelparams.cpp 2008-29-04 10:57:00Z nimrod $
	1
	2	#include "bestGtrModelParams.h"
	3	#include <iostream>
	4	using namespace std;
	5
	6	#include "bblEM.h"
	7	#include "bblEMProportionalEB.h"
	8	#include "bblLSProportionalEB.h"
	9	#include "numRec.h"
	10	#include "logFile.h"
	11	#include "bestAlpha.h"
	12
	13	bestGtrModel::bestGtrModel(tree& et, // find best Gtr Model Params
	14	const sequenceContainer& sc,
	15	stochasticProcess& sp,
	16	const Vdouble * weights,
	17	const int maxTotalIterations,
	18	const MDOUBLE epsilonLikelihoodImprovment,
	19	const MDOUBLE epsilonLoglikelihoodForGTRParam,
	20	const MDOUBLE upperBoundGTRParam,
	21	const bool optimizeTree,
	22	const bool optimizeAlpha){
	23	LOG(5,<<"Starting bestGtrModel: find Best replacement matrix parameters"<<endl);
	24	MDOUBLE oldL = VERYSMALL;
	25	MDOUBLE newL = VERYSMALL;
	26	_bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et,sc,sp,weights);
	27
	28	MDOUBLE prev_a2c = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2c();
	29	MDOUBLE prev_a2g = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2g();
	30	MDOUBLE prev_a2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2t();
	31	MDOUBLE prev_c2g = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_c2g();
	32	MDOUBLE prev_c2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_c2t();
	33	MDOUBLE prev_g2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_g2t();
	34
	35	MDOUBLE prevAlpha = epsilonLoglikeForBBL;
	36
	37	for (int i=0; i < maxTotalIterations; ++i) {
	38	//optimize a2c
	39	newL = -brent(0.0, prev_a2c, upperBoundGTRParam,
	40	C_evalGTRParam(a2c,et,sc,sp,weights),
	41	epsilonLoglikelihoodForGTRParam,
	42	&_best_a2c);
	43	if (newL >= _bestL)
	44	{
	45	_bestL = newL;
	46	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2c);//safety
	47	}
	48	else
	49	{//likelihood went down!
	50	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(prev_a2c);
	51	LOG(5,<<"likelihood went down in optimizing a2c"<<endl<<"oldL = "<<_bestL);
	52	}
	53
	54	//optimize a2t
	55	newL = -brent(0.0, prev_a2t, upperBoundGTRParam,
	56	C_evalGTRParam(a2t,et,sc,sp,weights),
	57	epsilonLoglikelihoodForGTRParam,
	58	&_best_a2t);
	59	if (newL >= _bestL)
	60	{
	61	_bestL = newL;
	62	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2t);//safety
	63	}
	64	else
	65	{//likelihood went down!
	66	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(prev_a2t);
	67	LOG(5,<<"likelihood went down in optimizing a2t"<<endl<<"oldL = "<<_bestL);
	68	}
	69
	70	//optimize a2g
	71	newL = -brent(0.0, prev_a2g, upperBoundGTRParam,
	72	C_evalGTRParam(a2g,et,sc,sp,weights),
	73	epsilonLoglikelihoodForGTRParam,
	74	&_best_a2g);
	75	if (newL >= _bestL)
	76	{
	77	_bestL = newL;
	78	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2g);//safety
	79	}
	80	else
	81	{//likelihood went down!
	82	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(prev_a2g);
	83	LOG(5,<<"likelihood went down in optimizing a2g"<<endl<<"oldL = "<<_bestL);
	84	}
	85
	86	//optimize c2g
	87	newL = -brent(0.0, prev_c2g, upperBoundGTRParam,
	88	C_evalGTRParam(c2g,et,sc,sp,weights),
	89	epsilonLoglikelihoodForGTRParam,
	90	&_best_c2g);
	91	if (newL >= _bestL)
	92	{
	93	_bestL = newL;
	94	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2g);//safety
	95	}
	96	else
	97	{//likelihood went down!
	98	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(prev_c2g);
	99	LOG(5,<<"likelihood went down in optimizing c2g"<<endl<<"oldL = "<<_bestL);
	100	}
	101
	102	//optimize c2t
	103	newL = -brent(0.0, prev_c2t, upperBoundGTRParam,
	104	C_evalGTRParam(c2t,et,sc,sp,weights),
	105	epsilonLoglikelihoodForGTRParam,
	106	&_best_c2t);
	107	if (newL >= _bestL)
	108	{
	109	_bestL = newL;
	110	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2t);//safety
	111	}
	112	else
	113	{//likelihood went down!
	114	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(prev_c2t);
	115	LOG(5,<<"likelihood went down in optimizing c2t"<<endl<<"oldL = "<<_bestL);
	116	}
	117
	118	//optimize g2t
	119	newL = -brent(0.0, prev_g2t, upperBoundGTRParam,
	120	C_evalGTRParam(g2t,et,sc,sp,weights),
	121	epsilonLoglikelihoodForGTRParam,
	122	&_best_g2t);
	123	if (newL >= _bestL)
	124	{
	125	_bestL = newL;
	126	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2t);//safety
	127	}
	128	else
	129	{//likelihood went down!
	130	(static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(prev_g2t);
	131	LOG(5,<<"likelihood went down in optimizing g2t"<<endl<<"oldL = "<<_bestL);
	132	}
	133	if(optimizeAlpha)
	134	{
	135	newL = -brent(0.0, prevAlpha, upperBoundForAlpha,
	136	C_evalAlpha(et,sc,sp,weights),
	137	epsilonLoglikeForAlphaOptimization,
	138	&_bestAlpha);
	139	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
	140
	141	if (newL >= _bestL)
	142	{
	143	_bestL = newL;
	144	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha); //safety
	145	}
	146	else
	147	{//likelihood went down!
	148	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(prevAlpha);
	149	LOG(5,<<"likelihood went down in optimizing alpha"<<endl<<"oldL = "<<_bestL);
	150	}
	151	}
	152
	153	if(optimizeTree)
	154	{
	155	bblEM bblEM1(et,sc,sp,weights,maxBBLIt,epsilonLoglikeForBBL);
	156	_bestL = bblEM1.getTreeLikelihood();
	157	}
	158
	159
	160	// check for improvement in the likelihood
	161	if (_bestL > oldL+epsilonLikelihoodImprovment) {
	162	oldL = _bestL;
	163	prev_a2c = _best_a2c;
	164	prev_a2g = _best_a2g;
	165	prev_a2t = _best_a2t;
	166	prev_c2g = _best_c2g;
	167	prev_c2t = _best_c2t;
	168	prev_g2t = _best_g2t;
	169	prevAlpha = _bestAlpha;
	170	} else {
	171	break;
	172	}
	173	}
	174	}
	175
	176	bestGtrModelProportional::bestGtrModelProportional(tree& et, // find best Gtr Model Params under a proportional model
	177	vector<sequenceContainer>& sc,
	178	multipleStochasticProcess* msp,
	179	gammaDistribution* pProportionDist,
	180	Vdouble initLocalAlphas,
	181	Vdouble initLocala2cs,
	182	Vdouble initLocala2gs,
	183	Vdouble initLocala2ts,
	184	Vdouble initLocalc2gs,
	185	Vdouble initLocalc2ts,
	186	Vdouble initLocalg2ts,
	187	const MDOUBLE upperBoundOnLocalAlpha,
	188	const MDOUBLE initGlobalAlpha,
	189	const MDOUBLE upperBoundOnGlobalAlpha,
	190	const MDOUBLE upperBoundGTRParam,
	191	const int maxTotalIterations,
	192	const int maxBBLIterations,
	193	const bool optimizeSelectedBranches,
	194	const bool optimizeTree,
	195	const string branchLengthOptimizationMethod,
	196	const bool optimizeLocalParams,
	197	const bool optimizeGlobalAlpha,
	198	const Vdouble * weights,
	199	const MDOUBLE epsilonLikelihoodImprovment,
	200	const MDOUBLE epsilonLoglikelihoodForGTRParam,
	201	const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization,
	202	const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization,
	203	const MDOUBLE epsilonLoglikelihoodForBBL){
	204	LOG(5,<<"Starting bestGtrModelProportional"<<endl);
	205	Vdouble current_a2cVec,current_a2gVec,current_a2tVec,current_c2gVec,current_c2tVec,current_g2tVec,currentLocalAlphaVec;
	206	MDOUBLE currentGlobalAlpha = initGlobalAlpha;
	207	currentLocalAlphaVec = initLocalAlphas;
	208	current_a2cVec = initLocala2cs;
	209	current_a2gVec = initLocala2gs;
	210	current_a2tVec = initLocala2ts;
	211	current_c2gVec = initLocalc2gs;
	212	current_c2tVec = initLocalc2ts;
	213	current_g2tVec = initLocalg2ts;
	214
	215	Vdouble newLvec;
	216	//doubleRep epsilonLoglikelihoodForGlobalAlphaOptimizationDR(epsilonLoglikelihoodForGlobalAlphaOptimization);//DR
	217	newLvec.resize(msp->getSPVecSize());
	218	//doubleRep oldL(VERYSMALL);//DR
	219	//doubleRep newL;//DR
	220	MDOUBLE oldL = VERYSMALL;
	221	MDOUBLE newL;
	222	_bestLvec.resize(msp->getSPVecSize(),0.0);
	223	_bestLocalAlphaVec = initLocalAlphas;
	224	_bestGlobalAlpha = initGlobalAlpha;
	225	int spIndex;
	226	_best_a2cVec = current_a2cVec;
	227	_best_a2gVec = current_a2gVec;
	228	_best_a2tVec = current_a2tVec;
	229	_best_c2gVec = current_c2gVec;
	230	_best_c2tVec = current_c2tVec;
	231	_best_g2tVec = current_g2tVec;
	232	pProportionDist->setAlpha(_bestGlobalAlpha);
	233	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	234	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	235	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2cVec[spIndex]);
	236	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2gVec[spIndex]);
	237	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2tVec[spIndex]);
	238	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2gVec[spIndex]);
	239	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2tVec[spIndex]);
	240	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2tVec[spIndex]);
	241	}
	242	//first compute the likelihood;
	243	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	244
	245	MDOUBLE ax_local = 0.0;
	246	MDOUBLE c_GTRParam_x = upperBoundGTRParam;
	247	MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha;
	248
	249	for (int i=0; i < maxTotalIterations; ++i) {
	250	if(optimizeLocalParams){
	251	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	252	//optimize a2c
	253	MDOUBLE a2c_x = _best_a2cVec[spIndex];
	254	newLvec[spIndex] = -brent(ax_local,a2c_x,c_GTRParam_x,
	255	C_evalGTRParamProportional(a2c,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	256	epsilonLoglikelihoodForGTRParam,
	257	&current_a2cVec[spIndex]);
	258	if (newLvec[spIndex] >= _bestLvec[spIndex])
	259	{
	260	_bestLvec[spIndex] = newLvec[spIndex];
	261	_best_a2cVec[spIndex] = current_a2cVec[spIndex];
	262	}
	263	else
	264	{//likelihood went down!
	265	LOG(2,<<"likelihood went down in optimizing a2c"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	266	}
	267	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2cVec[spIndex]);//safety
	268
	269	//optimize a2t
	270	MDOUBLE a2t_x = _best_a2tVec[spIndex];
	271	newLvec[spIndex] = -brent(ax_local,a2t_x,c_GTRParam_x,
	272	C_evalGTRParamProportional(a2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	273	epsilonLoglikelihoodForGTRParam,
	274	&current_a2tVec[spIndex]);
	275	if (newLvec[spIndex] >= _bestLvec[spIndex])
	276	{
	277	_bestLvec[spIndex] = newLvec[spIndex];
	278	_best_a2tVec[spIndex] = current_a2tVec[spIndex];
	279	}
	280	else
	281	{//likelihood went down!
	282	LOG(2,<<"likelihood went down in optimizing a2t"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	283	}
	284	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2tVec[spIndex]);//safety
	285
	286	//optimize a2g
	287	MDOUBLE a2g_x = _best_a2gVec[spIndex];
	288	newLvec[spIndex] = -brent(ax_local,a2g_x,c_GTRParam_x,
	289	C_evalGTRParamProportional(a2g,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	290	epsilonLoglikelihoodForGTRParam,
	291	&current_a2gVec[spIndex]);
	292	if (newLvec[spIndex] >= _bestLvec[spIndex])
	293	{
	294	_bestLvec[spIndex] = newLvec[spIndex];
	295	_best_a2gVec[spIndex] = current_a2gVec[spIndex];
	296	}
	297	else
	298	{//likelihood went down!
	299	LOG(2,<<"likelihood went down in optimizing a2g"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	300	}
	301	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2gVec[spIndex]);//safety
	302
	303	//optimize c2g
	304	MDOUBLE c2g_x = _best_c2gVec[spIndex];
	305	newLvec[spIndex] = -brent(ax_local,c2g_x,c_GTRParam_x,
	306	C_evalGTRParamProportional(c2g,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	307	epsilonLoglikelihoodForGTRParam,
	308	&current_c2gVec[spIndex]);
	309	if (newLvec[spIndex] >= _bestLvec[spIndex])
	310	{
	311	_bestLvec[spIndex] = newLvec[spIndex];
	312	_best_c2gVec[spIndex] = current_c2gVec[spIndex];
	313	}
	314	else
	315	{//likelihood went down!
	316	LOG(2,<<"likelihood went down in optimizing c2g"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	317	}
	318	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2gVec[spIndex]);//safety
	319
	320	//optimize c2t
	321	MDOUBLE c2t_x = _best_c2tVec[spIndex];
	322	newLvec[spIndex] = -brent(ax_local,c2t_x,c_GTRParam_x,
	323	C_evalGTRParamProportional(c2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	324	epsilonLoglikelihoodForGTRParam,
	325	&current_c2tVec[spIndex]);
	326	if (newLvec[spIndex] >= _bestLvec[spIndex])
	327	{
	328	_bestLvec[spIndex] = newLvec[spIndex];
	329	_best_c2tVec[spIndex] = current_c2tVec[spIndex];
	330	}
	331	else
	332	{//likelihood went down!
	333	LOG(2,<<"likelihood went down in optimizing c2t"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	334	}
	335	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2tVec[spIndex]);//safety
	336
	337	//optimize g2t
	338	MDOUBLE g2t_x = _best_g2tVec[spIndex];
	339	newLvec[spIndex] = -brent(ax_local,g2t_x,c_GTRParam_x,
	340	C_evalGTRParamProportional(g2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	341	epsilonLoglikelihoodForGTRParam,
	342	&current_g2tVec[spIndex]);
	343	if (newLvec[spIndex] >= _bestLvec[spIndex])
	344	{
	345	_bestLvec[spIndex] = newLvec[spIndex];
	346	_best_g2tVec[spIndex] = current_g2tVec[spIndex];
	347	}
	348	else
	349	{//likelihood went down!
	350	LOG(2,<<"likelihood went down in optimizing g2t"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	351	}
	352	(static_cast<gtrModel*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2tVec[spIndex]);//safety
	353
	354	//optimize local alpha
	355	MDOUBLE localAlpha_x = _bestLocalAlphaVec[spIndex];
	356	newLvec[spIndex] = -brent(ax_local,localAlpha_x,c_localAlpha_x,
	357	C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	358	epsilonLoglikelihoodForLocalAlphaOptimization,
	359	&currentLocalAlphaVec[spIndex]);
	360	if (newLvec[spIndex] >= _bestLvec[spIndex])
	361	{
	362	_bestLvec[spIndex] = newLvec[spIndex];
	363	_bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex];
	364	}
	365	else
	366	{//likelihood went down!
	367	LOG(2,<<"likelihood went down in optimizing local alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	368	}
	369	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); //safety
	370	}
	371	LOGnOUT(2,<<"Done with GTR local params optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	372	LOGnOUT(2,<<"Local Params:"<<endl);
	373	LOGnOUT(2,<<"a2c:");
	374	for(spIndex = 0;spIndex < _best_a2cVec.size();++spIndex){
	375	LOGnOUT(2,<<_best_a2cVec[spIndex]<<",";);
	376	}
	377	LOGnOUT(2,<<endl);
	378	LOGnOUT(2,<<"a2g:");
	379	for(spIndex = 0;spIndex < _best_a2gVec.size();++spIndex){
	380	LOGnOUT(2,<<_best_a2gVec[spIndex]<<",";);
	381	}
	382	LOGnOUT(2,<<endl);
	383	LOGnOUT(2,<<"a2t:");
	384	for(spIndex = 0;spIndex < _best_a2tVec.size();++spIndex){
	385	LOGnOUT(2,<<_best_a2tVec[spIndex]<<",";);
	386	}
	387	LOGnOUT(2,<<endl);
	388	LOGnOUT(2,<<"c2g:");
	389	for(spIndex = 0;spIndex < _best_c2gVec.size();++spIndex){
	390	LOGnOUT(2,<<_best_c2gVec[spIndex]<<",";);
	391	}
	392	LOGnOUT(2,<<endl);
	393	LOGnOUT(2,<<"c2t:");
	394	for(spIndex = 0;spIndex < _best_c2tVec.size();++spIndex){
	395	LOGnOUT(2,<<_best_c2tVec[spIndex]<<",";);
	396	}
	397	LOGnOUT(2,<<endl);
	398	LOGnOUT(2,<<"g2t:");
	399	for(spIndex = 0;spIndex < _best_g2tVec.size();++spIndex){
	400	LOGnOUT(2,<<_best_g2tVec[spIndex]<<",";);
	401	}
	402	LOGnOUT(2,<<endl);
	403	LOGnOUT(2,<<"local alpha:");
	404	for(spIndex = 0;spIndex < _bestLocalAlphaVec.size();++spIndex){
	405	LOGnOUT(2,<<_bestLocalAlphaVec[spIndex]<<",";);
	406	}
	407	LOGnOUT(2,<<endl);
	408	}
	409	if(optimizeGlobalAlpha){
	410	//doubleRep ax_global(0.0);//DR
	411	//doubleRep c_globalAlpha_x(upperBoundOnGlobalAlpha);//DR
	412	//doubleRep minusOne(-1.0);//DR
	413	MDOUBLE ax_global = 0.0;
	414	MDOUBLE c_globalAlpha_x = upperBoundOnGlobalAlpha;
	415
	416	//optimize global alpha
	417	//doubleRep globalAlpha_x(prevGlobalAlpha);//DR
	418	MDOUBLE globalAlpha_x = _bestGlobalAlpha;
	419	//newL = minusOne*brentDoubleRep(ax_global,globalAlpha_x,c_globalAlpha_x,
	420	// C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	421	// epsilonLoglikelihoodForGlobalAlphaOptimizationDR,
	422	// &_bestGlobalAlpha);
	423	newL = -brent(ax_global,globalAlpha_x,c_globalAlpha_x,
	424	C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	425	epsilonLoglikelihoodForGlobalAlphaOptimization,
	426	&currentGlobalAlpha);
	427	if (newL >= sumVdouble(_bestLvec))
	428	{
	429	_bestGlobalAlpha = currentGlobalAlpha;
	430	}
	431	else
	432	{//likelihood went down!
	433	LOG(2,<<"likelihood went down in optimizing global alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	434	}
	435	pProportionDist->setAlpha(_bestGlobalAlpha); //safety
	436	//whether or not likelihood has improved we need to update _bestLvec
	437	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	438	LOGnOUT(2,<<"Done with global alpha optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	439	LOGnOUT(2,<<"Global Alpha:"<<_bestGlobalAlpha<<endl);
	440	}
	441
	442	if(optimizeTree)
	443	{
	444	if(branchLengthOptimizationMethod == "bblLS"){
	445	bblLSProportionalEB bblLSPEB1(et,sc,msp,pProportionDist,_bestLvec,optimizeSelectedBranches,maxBBLIterations,epsilonLoglikelihoodForBBL);
	446	_bestLvec = bblLSPEB1.getTreeLikelihoodVec();
	447	LOGnOUT(2,<<"Done with bblLS"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	448	}
	449	else if(branchLengthOptimizationMethod == "bblEM"){
	450	bblEMProportionalEB bblEMPEB1(et,sc,msp,pProportionDist,optimizeSelectedBranches,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);
	451	_bestLvec = bblEMPEB1.getTreeLikelihood();
	452	LOGnOUT(2,<<"Done with bblEM"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	453	}
	454	LOGnOUT(2,<<et.stringTreeInPhylipTreeFormat()<<endl);
	455	}
	456	// check for improvement in the likelihood
	457	if (sumVdouble(_bestLvec) > oldL+epsilonLikelihoodImprovment) {
	458	//all params have already been updated
	459	oldL = sumVdouble(_bestLvec);
	460	} else {
	461	break;
	462	}
	463	LOGnOUT(4,<<"Done with optimization iteration "<<i<<". LL: "<<sumVdouble(_bestLvec)<<endl);
	464	}
	465	}
	466

+217

-0

libs/phylogeny/bestGtrModelParams.h less more

	0	// $Id: bestGtrModelparams.h 2008-28-04 15:13:34Z nimrod $
	1
	2	#ifndef ___BEST_GTRMODEL_PARAMS
	3	#define ___BEST_GTRMODEL_PARAMS
	4
	5	#include "definitions.h"
	6
	7	#include "likelihoodComputation.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "generalGammaDistribution.h"
	12	#include "tree.h"
	13	#include "gtrModel.h"
	14
	15	typedef enum
	16	{
	17	Invalid = 0,
	18	a2c,
	19	a2g,
	20	a2t,
	21	c2g,
	22	c2t,
	23	g2t,
	24	}GTRParam;
	25
	26	#define maxBBLIt 10
	27	#define epsilonLoglikeForBBL 0.01
	28	#define inAlpha 1.5
	29	#define epsilonLoglikeForAlphaOptimization 0.01
	30	#define upperBoundForAlpha 5.0
	31
	32	class bestGtrModel {
	33	public:
	34	explicit bestGtrModel(tree& et, // find best Gtr Model Params
	35	const sequenceContainer& sc,
	36	stochasticProcess& sp,
	37	const Vdouble * weights=NULL,
	38	const int maxTotalIterations = 5,
	39	const MDOUBLE epsilonLikelihoodImprovment = 0.05,
	40	const MDOUBLE epsilonLoglikelihoodForGTRParam = 0.01,
	41	const MDOUBLE upperBoundGTRParam = 5.0,
	42	const bool optimizeTree = true,
	43	const bool optimizeAlpha = true);
	44	MDOUBLE getBesta2c() {return _best_a2c;}
	45	MDOUBLE getBesta2g() {return _best_a2g;}
	46	MDOUBLE getBesta2t() {return _best_a2t;}
	47	MDOUBLE getBestc2g() {return _best_c2g;}
	48	MDOUBLE getBestc2t() {return _best_c2t;}
	49	MDOUBLE getBestg2t() {return _best_g2t;}
	50	MDOUBLE getBestAlpha() {return _bestAlpha;}
	51	MDOUBLE getBestL() {return _bestL;}
	52	private:
	53	MDOUBLE _best_a2c;
	54	MDOUBLE _best_a2g;
	55	MDOUBLE _best_a2t;
	56	MDOUBLE _best_c2g;
	57	MDOUBLE _best_c2t;
	58	MDOUBLE _best_g2t;
	59	MDOUBLE _bestAlpha;
	60	MDOUBLE _bestL;
	61	};
	62
	63	class bestGtrModelProportional {
	64	public:
	65	explicit bestGtrModelProportional(tree& et, // find best Gtr Model Params under a proportional model
	66	vector<sequenceContainer>& sc,
	67	multipleStochasticProcess* msp,
	68	gammaDistribution* pProportionDist,
	69	Vdouble initLocalAlphas,
	70	Vdouble initLocala2cs,
	71	Vdouble initLocala2gs,
	72	Vdouble initLocala2ts,
	73	Vdouble initLocalc2gs,
	74	Vdouble initLocalc2ts,
	75	Vdouble initLocalg2ts,
	76	const MDOUBLE upperBoundOnLocalAlpha,
	77	const MDOUBLE initGlobalAlpha,
	78	const MDOUBLE upperBoundOnGlobalAlpha,
	79	const MDOUBLE upperBoundGTRParam,
	80	const int maxTotalIterations,
	81	const int maxBBLIterations,
	82	const bool optimizeSelectedBranches=false,
	83	const bool optimizeTree = true,
	84	const string branchLengthOptimizationMethod="bblLS",
	85	const bool optimizeLocalParams = true,
	86	const bool optimizeGlobalAlpha = true,
	87	const Vdouble * weights=NULL,
	88	const MDOUBLE epsilonLikelihoodImprovment = 0.05,
	89	const MDOUBLE epsilonLoglikelihoodForGTRParam = 0.01,
	90	const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization= 0.01,
	91	const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization= 0.01,
	92	const MDOUBLE epsilonLoglikelihoodForBBL= 0.01);
	93	MDOUBLE getBesta2c(int spIndex) {return _best_a2cVec[spIndex];}
	94	MDOUBLE getBesta2g(int spIndex) {return _best_a2gVec[spIndex];}
	95	MDOUBLE getBesta2t(int spIndex) {return _best_a2tVec[spIndex];}
	96	MDOUBLE getBestc2g(int spIndex) {return _best_c2gVec[spIndex];}
	97	MDOUBLE getBestc2t(int spIndex) {return _best_c2tVec[spIndex];}
	98	MDOUBLE getBestg2t(int spIndex) {return _best_g2tVec[spIndex];}
	99	MDOUBLE getBestLocalAlpha(int spIndex) {return _bestLocalAlphaVec[spIndex];}
	100	MDOUBLE getBestGlobalAlpha() {return _bestGlobalAlpha;}
	101	Vdouble getBestL() {return _bestLvec;}
	102	private:
	103	Vdouble _best_a2cVec;
	104	Vdouble _best_a2gVec;
	105	Vdouble _best_a2tVec;
	106	Vdouble _best_c2gVec;
	107	Vdouble _best_c2tVec;
	108	Vdouble _best_g2tVec;
	109	Vdouble _bestLocalAlphaVec;
	110	MDOUBLE _bestGlobalAlpha;
	111	Vdouble _bestLvec;
	112	};
	113
	114	class C_evalGTRParam{
	115	public:
	116	C_evalGTRParam( const GTRParam param,
	117	const tree& et,
	118	const sequenceContainer& sc,
	119	stochasticProcess& sp,
	120	const Vdouble * weights = NULL)
	121	:_param(param), _et(et),_sc(sc),_weights(weights),_sp(sp){};
	122	private:
	123	const GTRParam _param;
	124	const tree& _et;
	125	const sequenceContainer& _sc;
	126	const Vdouble * _weights;
	127	stochasticProcess& _sp;
	128	public:
	129	MDOUBLE operator() (MDOUBLE paramVal) {
	130	switch (_param){
	131	case a2c:
	132	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2c(paramVal);
	133	break;
	134	case a2g:
	135	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2g(paramVal);
	136	break;
	137	case a2t:
	138	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2t(paramVal);
	139	break;
	140	case c2g:
	141	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2g(paramVal);
	142	break;
	143	case c2t:
	144	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2t(paramVal);
	145	break;
	146	case g2t:
	147	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_g2t(paramVal);
	148	break;
	149	default:
	150	errorMsg::reportError("Missing GTR parameter in C_evalGTRParam::operator ()");
	151	break;
	152	}
	153	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	154	LOG(5,<<" with " + int2string(_param) + " = "<<paramVal<<" logL = "<<res<<endl);
	155	return -res;
	156	}
	157	};
	158
	159	class C_evalGTRParamProportional{
	160	public:
	161	C_evalGTRParamProportional( const GTRParam param,
	162	const tree& et,
	163	const sequenceContainer& sc,
	164	stochasticProcess& sp,
	165	const gammaDistribution* pProportionDist,
	166	const Vdouble * weights = NULL)
	167	:_param(param), _et(et),_sc(sc),_sp(sp),_pProportionDist(pProportionDist),_weights(weights){};
	168	private:
	169	const GTRParam _param;
	170	const tree& _et;
	171	const sequenceContainer& _sc;
	172	const gammaDistribution* _pProportionDist;
	173	const Vdouble * _weights;
	174	stochasticProcess& _sp;
	175	public:
	176	MDOUBLE operator() (MDOUBLE paramVal) {
	177	switch (_param){
	178	case a2c:
	179	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2c(paramVal);
	180	break;
	181	case a2g:
	182	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2g(paramVal);
	183	break;
	184	case a2t:
	185	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2t(paramVal);
	186	break;
	187	case c2g:
	188	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2g(paramVal);
	189	break;
	190	case c2t:
	191	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2t(paramVal);
	192	break;
	193	case g2t:
	194	(static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_g2t(paramVal);
	195	break;
	196	default:
	197	errorMsg::reportError("Missing GTR parameter in C_evalGTRParamProportional::operator ()");
	198	break;
	199	}
	200	vector<sequenceContainer> tmpScVec;
	201	tmpScVec.push_back(_sc);
	202	vector<stochasticProcess> tmpSpVec;
	203	tmpSpVec.push_back(_sp);
	204	multipleStochasticProcess * tmpMsp = new multipleStochasticProcess();
	205	tmpMsp->setSpVec(tmpSpVec);
	206	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist);
	207	MDOUBLE res = likeVec[0];
	208	delete(tmpMsp);
	209	LOG(5,<<" with " + int2string(_param) + " = "<<paramVal<<" logL = "<<res<<endl);
	210	return -res;
	211	}
	212	};
	213
	214	#endif
	215
	216

+324

-0

libs/phylogeny/bestHKYparam.cpp less more

	0	// $Id: bestHKYparam.cpp 10004 2011-11-13 04:40:13Z rubi $
	1
	2	#include "bestHKYparam.h"
	3	#include <iostream>
	4	using namespace std;
	5
	6	#include "bblEM.h"
	7	#include "bblEMProportionalEB.h"
	8	#include "bblLSProportionalEB.h"
	9	#include "numRec.h"
	10	#include "logFile.h"
	11	#include "bestAlpha.h"
	12
	13	bestHkyParamFixedTree::bestHkyParamFixedTree(const tree& et, //findBestHkyParamFixedTree
	14	const sequenceContainer& sc,
	15	stochasticProcess& sp,
	16	const Vdouble * weights,
	17	const MDOUBLE upperBoundOnHkyParam,
	18	const MDOUBLE epsilonHkyParamOptimization){
	19	LOG(5,<<"findBestHkyParamFixedTree"<<endl);
	20	MDOUBLE bestA=0;
	21	const MDOUBLE cx=upperBoundOnHkyParam;// left, midle, right limit on HkyParam
	22	const MDOUBLE bx=cx*0.3;
	23	const MDOUBLE ax=0;
	24
	25
	26	_bestL = -brent(ax,bx,cx,
	27	C_evalHkyParam(et,sc,sp,weights),
	28	epsilonHkyParamOptimization,
	29	&bestA);
	30	_bestHkyParam= bestA;
	31	(static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
	32	}
	33
	34	bestHkyParamAndBBL::bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL
	35	const sequenceContainer& sc,
	36	stochasticProcess& sp,
	37	const Vdouble * weights,
	38	const MDOUBLE upperBoundOnHkyParam,
	39	const MDOUBLE epsilonHkyParamOptimization,
	40	const MDOUBLE epsilonLikelihoodImprovment,
	41	const int maxBBLIterations,
	42	const int maxTotalIterations){
	43	LOG(5,<<"find Best HkyParam and best BBL"<<endl);
	44	// LOG(5,<<" 1. bestHkyParam::findBestHkyParam"<<endl);
	45	// brLenOpt br1(et,pi,weights);
	46	MDOUBLE oldL = VERYSMALL;
	47	_bestL = VERYSMALL;
	48	const MDOUBLE bx=upperBoundOnHkyParam*0.3;
	49	const MDOUBLE ax=0.01;
	50	const MDOUBLE cx=upperBoundOnHkyParam;
	51	MDOUBLE bestA=0;
	52	for (int i=0; i < maxTotalIterations; ++i) {
	53	_bestL = -brent(ax,bx,cx,
	54	C_evalHkyParam(et,sc,sp,weights),
	55	epsilonHkyParamOptimization,
	56	&bestA);
	57
	58	if (_bestL > oldL+epsilonLikelihoodImprovment) {
	59	oldL = _bestL;
	60	}
	61	else {//LL converged
	62	if (_bestL > oldL)
	63	(static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
	64	else
	65	_bestL = oldL;
	66	break;
	67	}
	68	_bestHkyParam = bestA;
	69	(static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
	70	LOG(5,<<"bestHkyParamAndBBL: trtv = "<<_bestHkyParam<<endl);
	71	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLikelihoodImprovment);//maxIterations=1000
	72	_bestL =bblEM1.getTreeLikelihood();
	73	if (_bestL > oldL+epsilonLikelihoodImprovment) {
	74	oldL = _bestL;
	75	}
	76	else {
	77	_bestL = oldL;
	78	break;
	79	}
	80	}
	81	}
	82
	83	bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL( //find best TrTv (=HkyParam), Alpha and best branch lengths
	84	tree& et,
	85	const sequenceContainer& sc,
	86	stochasticProcess& sp,
	87	const Vdouble * weights,
	88	const int maxTotalIterations,
	89	const MDOUBLE epsilonLikelihoodImprovment,
	90	const MDOUBLE epsilonHkyParamOptimization,
	91	const MDOUBLE epsilonAlphaOptimization,
	92	const MDOUBLE epsilonBBL,
	93	const MDOUBLE upperBoundOnHkyParam,
	94	const int maxBBLIterations,
	95	const MDOUBLE initAlpha,
	96	const MDOUBLE upperBoundOnAlpha)
	97
	98	{
	99	MDOUBLE oldL = VERYSMALL;
	100	MDOUBLE newL = VERYSMALL;
	101
	102	// first guess for the parameters
	103	MDOUBLE prevHkyParam = static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel())->getTrTv();
	104	MDOUBLE prevAlpha = initAlpha;
	105	tree prevTree;
	106
	107	for (int i=0; i < maxTotalIterations; ++i) {
	108
	109	// optimize HkyParam
	110	newL = -brent(0.0, prevHkyParam, upperBoundOnHkyParam,
	111	C_evalHkyParam(et,sc,sp,weights),
	112	epsilonHkyParamOptimization,
	113	&_bestHkyParam);
	114	(static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParam);
	115	LOG(5,<<"bestHkyParamAlphaAndBBL: trtv = "<<_bestHkyParam<<endl);
	116	// optimize Alpha
	117	newL = -brent(0.0, prevAlpha, upperBoundOnAlpha,
	118	C_evalAlpha(et,sc,sp,weights),
	119	epsilonAlphaOptimization,
	120	&_bestAlpha);
	121	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
	122
	123	LOG(5,<<"# bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL iteration " << i << ": after param optimization:" <<endl
	124	<<"# old L = " << oldL << "\t"
	125	<<"# new L = " << newL << endl
	126	<<"# new hkyParam = " << _bestHkyParam << endl
	127	<<"# new Alpha = " << _bestAlpha << endl);
	128
	129	// optimize branch lengths
	130	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonBBL);
	131	newL =bblEM1.getTreeLikelihood();
	132
	133	LOG(5,<<"# bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL iteration " << i << ": after branch lengths optimization:" <<endl
	134	<<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
	135	<<"# The tree:" );
	136	LOGDO(5,et.output(myLog::LogFile()));
	137
	138	// check for improvement in the likelihood
	139	if (newL > oldL+epsilonLikelihoodImprovment) {
	140	oldL = newL;
	141	_bestL = newL;
	142	prevHkyParam = _bestHkyParam;
	143	prevAlpha = _bestAlpha;
	144	prevTree = et;
	145	} else {
	146	if (newL>oldL) {
	147	_bestL = newL;
	148	} else {
	149	_bestL = oldL;
	150	_bestHkyParam = prevHkyParam;
	151	et = prevTree;
	152	}
	153	break;
	154	}
	155	}
	156	}
	157
	158	bestHkyParamAlphaAndBBLProportional::bestHkyParamAlphaAndBBLProportional( //find best TrTv (=HkyParam), global Alpha, local Alpha, and best branch lengths
	159	tree& et,
	160	vector<sequenceContainer>& sc,
	161	multipleStochasticProcess* msp,
	162	gammaDistribution* pProportionDist,
	163	Vdouble initLocalAlphas,
	164	Vdouble initLocalKappas,
	165	const MDOUBLE upperBoundOnLocalAlpha,
	166	const MDOUBLE initGlobalAlpha,
	167	const MDOUBLE upperBoundOnGlobalAlpha,
	168	const MDOUBLE upperBoundOnHkyParam,
	169	const int maxTotalIterations,
	170	const int maxBBLIterations,
	171	const bool optimizeSelectedBranches,
	172	const bool optimizeTree,
	173	const string branchLengthOptimizationMethod,
	174	const bool optimizeLocalParams,
	175	const bool optimizeGlobalAlpha,
	176	const Vdouble * weights,
	177	const MDOUBLE epsilonLikelihoodImprovment,
	178	const MDOUBLE epsilonHkyParamOptimization,
	179	const MDOUBLE epsilonLocalAlphaOptimization,
	180	const MDOUBLE epsilonGlobalAlphaOptimization,
	181	const MDOUBLE epsilonBBL)
	182
	183	{
	184	LOG(5,<<"Starting bestHkyParamAlphaAndBBLProportional"<<endl);
	185	Vdouble current_HkyParamVec,currentLocalAlphaVec;
	186	MDOUBLE currentGlobalAlpha = initGlobalAlpha;
	187	current_HkyParamVec = initLocalKappas;
	188	currentLocalAlphaVec = initLocalAlphas;
	189	//doubleRep epsilonGlobalAlphaOptimizationDR(epsilonGlobalAlphaOptimization);//DR
	190	Vdouble newLvec;
	191	newLvec.resize(msp->getSPVecSize());
	192	//doubleRep oldL(VERYSMALL);//DR
	193	//doubleRep newL;
	194	MDOUBLE oldL = VERYSMALL;
	195	MDOUBLE newL;
	196	_bestLvec.resize(msp->getSPVecSize(),0.0);
	197	_bestLocalAlphaVec = initLocalAlphas;
	198	_bestGlobalAlpha = initGlobalAlpha;
	199	int spIndex;
	200	//initial HKY params
	201	_bestHkyParamVec = initLocalKappas;
	202	pProportionDist->setAlpha(_bestGlobalAlpha);
	203	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	204	(static_cast<hky*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParamVec[spIndex]);
	205	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	206	}
	207	//first compute the likelihood;
	208	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	209
	210	MDOUBLE ax_local = 0.0;
	211	MDOUBLE c_HKYParam_x = upperBoundOnHkyParam;
	212	MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha;
	213	for (int i=0; i < maxTotalIterations; ++i) {
	214	if(optimizeLocalParams){
	215	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	216	//optimize hky
	217	MDOUBLE hky_x(_bestHkyParamVec[spIndex]);
	218	newLvec[spIndex] = -brent(ax_local,hky_x,c_HKYParam_x,
	219	C_evalLocalHkyParam(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	220	epsilonHkyParamOptimization,
	221	&current_HkyParamVec[spIndex]);
	222	if (newLvec[spIndex] >= _bestLvec[spIndex])
	223	{
	224	_bestLvec[spIndex] = newLvec[spIndex];
	225	_bestHkyParamVec[spIndex] = current_HkyParamVec[spIndex];
	226	}
	227	else
	228	{//likelihood went down!
	229	LOG(2,<<"likelihood went down in optimizing hky param"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	230	}
	231	(static_cast<hky*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParamVec[spIndex]);//safety
	232
	233	//optimize local alpha
	234	MDOUBLE localAlpha_x(_bestLocalAlphaVec[spIndex]);
	235	newLvec[spIndex] = -brent(ax_local,localAlpha_x,c_localAlpha_x,
	236	C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	237	epsilonLocalAlphaOptimization,
	238	&currentLocalAlphaVec[spIndex]);
	239	if (newLvec[spIndex] >= _bestLvec[spIndex])
	240	{
	241	_bestLvec[spIndex] = newLvec[spIndex];
	242	_bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex];
	243	}
	244	else
	245	{//likelihood went down!
	246	LOG(2,<<"likelihood went down in optimizing local alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	247	}
	248	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	249	}
	250	LOGnOUT(2,<<"Done with HKY local params optimization. LL: "<<sumVdouble(_bestLvec)<<endl);
	251	LOGnOUT(2,<<"Local Params:"<<endl);
	252	LOGnOUT(2,<<"HHY:");
	253	for(spIndex = 0;spIndex < _bestHkyParamVec.size();++spIndex){
	254	LOGnOUT(2,<<_bestHkyParamVec[spIndex]<<",";);
	255	}
	256	LOGnOUT(2,<<endl);
	257	LOGnOUT(2,<<"local alpha:");
	258	for(spIndex = 0;spIndex < _bestLocalAlphaVec.size();++spIndex){
	259	LOGnOUT(2,<<_bestLocalAlphaVec[spIndex]<<",";);
	260	}
	261	LOGnOUT(2,<<endl);
	262	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	263	LOGnOUT(2,<<"LL*: "<<sumVdouble(_bestLvec)<<endl);
	264
	265	}
	266	if(optimizeGlobalAlpha){
	267	//doubleRep ax_global(0.0);//DR
	268	//doubleRep c_globalAlpha_x(upperBoundOnGlobalAlpha);//DR
	269	//doubleRep minusOne(-1.0);//DR
	270	MDOUBLE ax_global = 0.0;
	271	MDOUBLE c_globalAlpha_x = upperBoundOnGlobalAlpha;
	272	//optimize global alpha
	273	//doubleRep globalAlpha_x(prevGlobalAlpha);//DR
	274	MDOUBLE globalAlpha_x = _bestGlobalAlpha;
	275	//newL = minusOne*brentDoubleRep(ax_global,globalAlpha_x,c_globalAlpha_x,
	276	// C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	277	// epsilonGlobalAlphaOptimizationDR,
	278	// &_bestGlobalAlpha);//DR
	279	newL = -brent(ax_global,globalAlpha_x,c_globalAlpha_x,
	280	C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	281	epsilonGlobalAlphaOptimization,
	282	&currentGlobalAlpha);
	283	if (newL >= sumVdouble(_bestLvec))
	284	{
	285	_bestGlobalAlpha = currentGlobalAlpha;
	286	}
	287	else
	288	{//likelihood went down!
	289	LOG(2,<<"likelihood went down in optimizing global alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	290	}
	291	pProportionDist->setAlpha(_bestGlobalAlpha);
	292	//whether or not likelihood has improved we need to update _bestLvec
	293	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	294	LOGnOUT(2,<<"Done with global alpha optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	295	LOGnOUT(2,<<"Global Alpha:"<<_bestGlobalAlpha<<endl);
	296	}
	297
	298	if(optimizeTree)
	299	{
	300	if(branchLengthOptimizationMethod == "bblLS"){
	301	bblLSProportionalEB bblLSPEB1(et,sc,msp,pProportionDist,_bestLvec,optimizeSelectedBranches,maxBBLIterations,epsilonBBL);
	302	_bestLvec = bblLSPEB1.getTreeLikelihoodVec();
	303	LOGnOUT(2,<<"Done with bblLS"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	304	}
	305	else if(branchLengthOptimizationMethod == "bblEM"){
	306	bblEMProportionalEB bblEMPEB1(et,sc,msp,pProportionDist,optimizeSelectedBranches,NULL,maxBBLIterations,epsilonBBL);
	307	_bestLvec = bblEMPEB1.getTreeLikelihood();
	308	LOGnOUT(2,<<"Done with bblEM"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	309	}
	310	LOGnOUT(2,<<et.stringTreeInPhylipTreeFormat()<<endl);
	311	}
	312
	313	// check for improvement in the likelihood
	314	if (sumVdouble(_bestLvec) > oldL+epsilonLikelihoodImprovment) {
	315	//all params have already been updated
	316	oldL = sumVdouble(_bestLvec);
	317	} else {
	318	break;
	319	}
	320	LOGnOUT(4,<<"Done with optimization iteration "<<i<<". LL: "<<sumVdouble(_bestLvec)<<endl);
	321	}
	322	}
	323

+173

-0

libs/phylogeny/bestHKYparam.h less more

	0	// $Id: bestHKYparam.h 9992 2011-11-08 03:57:29Z rubi $
	1
	2	#ifndef ___BEST_HKY_PARAM
	3	#define ___BEST_HKY_PARAM
	4
	5	#include "definitions.h"
	6
	7	#include "likelihoodComputation.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "tree.h"
	12	#include "hky.h"
	13	#include "multipleStochasticProcess.h"
	14
	15	class bestHkyParamFixedTree {
	16	public:
	17	explicit bestHkyParamFixedTree(const tree& et,
	18	const sequenceContainer& sc,
	19	stochasticProcess& sp,
	20	const Vdouble * weights=NULL,
	21	const MDOUBLE upperBoundOnHkyParam = 0.5,
	22	const MDOUBLE epsilonHkyParamOptimization = 0.01);
	23	MDOUBLE getBestHkyParam() {return _bestHkyParam;}
	24	MDOUBLE getBestL() {return _bestL;}
	25	private:
	26	MDOUBLE _bestHkyParam;
	27	MDOUBLE _bestL;
	28	};
	29
	30	class bestHkyParamAndBBL {
	31	public:
	32	explicit bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL
	33	const sequenceContainer& sc,
	34	stochasticProcess& sp,
	35	const Vdouble * weights=NULL,
	36	const MDOUBLE upperBoundOnHkyParam = 5.0,
	37	const MDOUBLE epsilonHkyParamOptimization= 0.01,
	38	const MDOUBLE epsilonLikelihoodImprovment= 0.05,
	39	const int maxBBLIterations=10,
	40	const int maxTotalIterations=5);
	41	MDOUBLE getBestHkyParam() {return _bestHkyParam;}
	42	MDOUBLE getBestL() {return _bestL;}
	43	private:
	44	MDOUBLE _bestHkyParam;
	45	MDOUBLE _bestL;
	46	};
	47
	48	class C_evalHkyParam{
	49	public:
	50	C_evalHkyParam( const tree& et,
	51	const sequenceContainer& sc,
	52	stochasticProcess& sp,
	53	const Vdouble * weights = NULL)
	54	: _et(et),_sc(sc),_weights(weights),_sp(sp){};
	55	private:
	56	const tree& _et;
	57	const sequenceContainer& _sc;
	58	const Vdouble * _weights;
	59	stochasticProcess& _sp;
	60	public:
	61	MDOUBLE operator() (MDOUBLE HkyParam) {
	62	(static_cast<hky*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(HkyParam);
	63
	64	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	65	//LOG(5,<<" with HkyParam = "<<HkyParam<<" logL = "<<res<<endl);
	66	return -res;
	67	}
	68	};
	69
	70	class C_evalLocalHkyParam{
	71	public:
	72	C_evalLocalHkyParam( const tree& et,
	73	const sequenceContainer& sc,
	74	stochasticProcess& sp,
	75	const gammaDistribution* pProportionDist,
	76	const Vdouble * weights = NULL)
	77	: _et(et),_sc(sc),_weights(weights),_sp(sp),_pProportionDist(pProportionDist){};
	78	private:
	79	const tree& _et;
	80	const sequenceContainer& _sc;
	81	const Vdouble * _weights;
	82	stochasticProcess& _sp;
	83	const gammaDistribution* _pProportionDist;
	84	public:
	85	MDOUBLE operator() (MDOUBLE HkyParam) {
	86	(static_cast<hky*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(HkyParam);
	87	vector<sequenceContainer> tmpScVec;
	88	tmpScVec.push_back(_sc);
	89	vector<stochasticProcess> tmpSpVec;
	90	tmpSpVec.push_back(_sp);
	91	multipleStochasticProcess * tmpMsp = new multipleStochasticProcess();
	92	tmpMsp->setSpVec(tmpSpVec);
	93	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist);
	94	MDOUBLE res = likeVec[0];
	95	delete(tmpMsp);
	96	LOG(5,<<" with HkyParam = "<<HkyParam<<" logL = "<<res<<endl);
	97	return -res;
	98	}
	99	};
	100
	101	class bestHkyParamAlphaAndBBL {
	102	public:
	103	explicit bestHkyParamAlphaAndBBL( //find best TrTv (=HkyParam), Alpha and best branch lengths
	104	tree& et,
	105	const sequenceContainer& sc,
	106	stochasticProcess& sp,
	107	const Vdouble * weights=NULL,
	108	const int maxTotalIterations=5,
	109	const MDOUBLE epsilonLikelihoodImprovment= 0.05,
	110	const MDOUBLE epsilonHkyParamOptimization= 0.01,
	111	const MDOUBLE epsilonAlphaOptimization= 0.01,
	112	const MDOUBLE epsilonBBL= 0.01,
	113	const MDOUBLE upperBoundOnHkyParam = 5.0,
	114	const int maxBBLIterations=10,
	115	const MDOUBLE initAlpha = 1.5,
	116	const MDOUBLE upperBoundOnAlpha = 5.0);
	117
	118	MDOUBLE getBestHkyParam() {return _bestHkyParam;}
	119	MDOUBLE getBestAlpha() {return _bestAlpha;}
	120	MDOUBLE getBestL() {return _bestL;}
	121	private:
	122	MDOUBLE _bestHkyParam;
	123	MDOUBLE _bestAlpha;
	124	MDOUBLE _bestL;
	125	};
	126
	127
	128	class bestHkyParamAlphaAndBBLProportional {
	129	public:
	130	explicit bestHkyParamAlphaAndBBLProportional( //find best Kappa (=HkyParam), global Alpha, local Alpha, and best branch lengths
	131	tree& et,
	132	vector<sequenceContainer>& sc,
	133	multipleStochasticProcess* msp,
	134	gammaDistribution* pProportionDist,
	135	Vdouble initLocalAlphas,
	136	Vdouble initLocalKappas,
	137	const MDOUBLE upperBoundOnLocalAlpha,
	138	const MDOUBLE initGlobalAlpha,
	139	const MDOUBLE upperBoundOnGlobalAlpha,
	140	const MDOUBLE upperBoundOnHkyParam,
	141	const int maxTotalIterations,
	142	const int maxBBLIterations,
	143	const bool optimizeSelectedBranches=false,
	144	const bool optimizeTree = true,
	145	const string branchLengthOptimizationMethod="bblLS",
	146	const bool optimizeLocalParams = true,
	147	const bool optimizeGlobalAlpha = true,
	148	const Vdouble * weights=NULL,
	149	const MDOUBLE epsilonLikelihoodImprovment= 0.05,
	150	const MDOUBLE epsilonHkyParamOptimization= 0.01,
	151	const MDOUBLE epsilonLocalAlphaOptimization= 0.01,
	152	const MDOUBLE epsilonGlobalAlphaOptimization= 0.01,
	153	const MDOUBLE epsilonBBL= 0.01);
	154
	155	MDOUBLE getBestHkyParam(int spIndex) {return _bestHkyParamVec[spIndex];}
	156	MDOUBLE getBestLocalAlpha(int spIndex) {return _bestLocalAlphaVec[spIndex];}
	157	MDOUBLE getBestGlobalAlpha(){return _bestGlobalAlpha;}
	158	Vdouble getBestL() {return _bestLvec;}
	159	private:
	160	Vdouble _bestHkyParamVec;
	161	Vdouble _bestLocalAlphaVec;
	162	MDOUBLE _bestGlobalAlpha;
	163	Vdouble _bestLvec;
	164	};
	165
	166
	167
	168
	169
	170	#endif
	171
	172

+474

-0

libs/phylogeny/bestParamUSSRV.cpp less more

	0	// $Id: bestParamUSSRV.cpp 4951 2008-09-24 11:16:58Z osnatz $
	1	#include "bestParamUSSRV.h"
	2
	3	/* structure of this method:
	4	(1) checks of the number of parameters to optimize, and decide how many parameters optimizations iteration,
	5	and how many parameters+bbl iterations will be done.
	6	(2) A loop over the parameters+bbl iterations
	7	(2.1) A loop over the parameters optimization iterations
	8	(2.1.1) Optimize alpha
	9	(2.1.2) Optimize nu
	10	(2.1.3) Optimize f
	11	if the likelihood wasn't changed during this loop --> parameters converged --> break
	12	(2.2) BBL
	13	if the likelihood wasn't changed during this loop --> parameters+bbl converged --> break
	14	(3) return likelihood
	15	*/
	16
	17	// ***************
	18	// * USSRV *
	19	// ***************
	20
	21	MDOUBLE bestParamUSSRV::operator() (tree& et,
	22	const sequenceContainer& sc,
	23	const sequenceContainer& baseSc,
	24	ussrvModel& model,
	25	const Vdouble * weights /* =NULL */,
	26	const MDOUBLE AlphaUpperBound /* = 15 */,
	27	const MDOUBLE NuUpperBound /* = 15 */,
	28	const MDOUBLE FUpperBound /* = 1 */,
	29	const MDOUBLE epsilonParamOptimization /* = 0.01 */,
	30	const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
	31	const int maxIterations /* = 50 */,
	32	const int maxOfParametersAndBblIterations /* = 40 */)
	33	{
	34	_bestL = VERYSMALL;
	35	MDOUBLE newL = VERYSMALL;
	36
	37	bestAlphaFixedTreeUSSRV alphaOptimization;
	38	bestNuFixedTreeUSSRV nuOptimization;
	39	bestFFixedTreeUSSRV fOptimization;
	40
	41	int it, bblIt;
	42	int numberOfIterations(maxIterations);
	43	int numberOfParametersAndBblIterations(maxOfParametersAndBblIterations);
	44
	45	// if only one parameter is optimize (only Alpha or only Nu or only F) then we need only one iteration.
	46	// if we only do bbl, without any optimization of the parameters, then we don't need iterations at all.
	47	int countParameters2Optimize(0);
	48	if (_AlphaOptimizationFlag) countParameters2Optimize++;
	49	if (_NuOptimizationFlag) countParameters2Optimize++;
	50	if (_FOptimizationFlag) countParameters2Optimize++;
	51
	52	if (countParameters2Optimize==0)
	53	{
	54	numberOfIterations=0;
	55	numberOfParametersAndBblIterations=1;
	56	}
	57	else if (countParameters2Optimize==1)
	58	numberOfIterations=1;
	59
	60	if (_bblOptimizationFlag == false)
	61	numberOfParametersAndBblIterations = 1;
	62
	63	_bestAlpha = model.getAlpha();
	64	_bestNu = model.getNu();
	65	_bestF = model.getF();
	66
	67	bool changes(false);
	68	bool bblChanges(false);
	69	for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt)
	70	{
	71	LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl);
	72	bblChanges = false;
	73	// parameters optimizations (without bbl)
	74	// in each iteration : optimization of Alpha and then optimization of Nu, and then of F.
	75	for (it=0; it < numberOfIterations; ++it)
	76	{
	77	changes = false;
	78	// Alpha optimization
	79	if (_AlphaOptimizationFlag)
	80	{
	81	LOGDO(5,printTime(myLog::LogFile()));
	82	newL = alphaOptimization(et,sc,baseSc,model,weights,AlphaUpperBound,epsilonParamOptimization);
	83
	84	//the improvement in Likelihood is smaller than epsilon
	85	if (newL < _bestL)
	86	{
	87	LOG(5,<<"likelihood went down in LS! (Alpha optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	88	//go back to previous alpha
	89	alphaOptimization.setAlpha(_bestAlpha,model);
	90	alphaOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
	91	//break;
	92	}
	93	else
	94	{// update of likelihood and model.
	95	if (newL > _bestL+epsilonLikelihoodImprovment)
	96	{
	97	changes = true;
	98	bblChanges = true;
	99	}
	100	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	101	_bestL = newL;
	102	_bestAlpha = alphaOptimization.getBestAlpha();
	103	LOG(5,<<"new L = " << _bestL<<" new Alpha = " << _bestAlpha<<endl);
	104	}
	105	}
	106
	107	// Nu optimization
	108	if (_NuOptimizationFlag)
	109	{
	110	LOGDO(5,printTime(myLog::LogFile()));
	111	newL = nuOptimization(et,sc,baseSc,model,weights,NuUpperBound,epsilonParamOptimization);
	112
	113	//the improvement in Likelihood is smaller than epsilon
	114	if (newL < _bestL)
	115	{
	116	LOG(5,<<"likelihood went down in LS! (Nu optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	117	//go back to previous Nu
	118	nuOptimization.setNu(_bestNu,model);
	119	nuOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
	120	//break;
	121	}
	122	else
	123	{// update of likelihood and model.
	124	if (newL > _bestL+epsilonLikelihoodImprovment)
	125	{
	126	changes = true;
	127	bblChanges = true;
	128	}
	129	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	130	_bestL = newL;
	131	_bestNu = nuOptimization.getBestNu();
	132	LOG(5,<<"new L = " << _bestL<<" new Nu = " << _bestNu<<endl);
	133	}
	134	}
	135
	136	// F optimization
	137	if (_FOptimizationFlag)
	138	{
	139	LOGDO(5,printTime(myLog::LogFile()));
	140	newL = fOptimization(et,sc,baseSc,model,weights,FUpperBound,epsilonParamOptimization);
	141
	142	//the improvement in Likelihood is smaller than epsilon
	143	if (newL < _bestL)
	144	{
	145	LOG(5,<<"likelihood went down in LS! (F optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	146	//go back to previous F
	147	fOptimization.setF(_bestF,model);
	148	fOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
	149	//break;
	150	}
	151	else
	152	{// update of likelihood and model.
	153	if (newL > _bestL+epsilonLikelihoodImprovment )
	154	{
	155	changes = true;
	156	bblChanges = true;
	157	}
	158	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	159	_bestL = newL;
	160	_bestF = fOptimization.getBestF();
	161	LOG(5,<<"new L = " << _bestL<<" new F = " << _bestF<<endl);
	162	}
	163	}
	164	if (changes == false)
	165	{
	166	LOG(5,<<"bestParamUSSRV parameters alpha,nu,f converged!"<<endl);
	167	break;
	168	}
	169	}
	170
	171	if (changes == true)
	172	LOG(5,<<"bestParamUSSRV parameters alpha, nu, f, did not converge after " << numberOfIterations << " iterations"<<endl);
	173
	174
	175	// BBL
	176	if (_bblOptimizationFlag == true)
	177	{
	178	LOGDO(5,printTime(myLog::LogFile()));
	179	bblEM2USSRV bbl(et,sc,baseSc,model,weights,maxIterations);
	180	newL = bbl.getTreeLikelihood();
	181	LOG(5,<<"current best L= "<<_bestL<<endl);
	182	LOG(5,<<"new L After BBL = " << newL<< " = "<< bbl.getTreeLikelihood() <<endl);
	183	LOG(5,<<"The new tree is: " << endl);
	184	if (5 <= myLog::LogLevel())
	185	et.output(myLog::LogFile());
	186	LOG(5,<<endl);
	187	if (newL > _bestL+epsilonLikelihoodImprovment)
	188	bblChanges = true;
	189	if (newL < _bestL){
	190	LOG(5,<<"likelihood went down in LS! (BBL)"<<endl<<"oldL = "<<_bestL);
	191	LOG(5,<<" newL= "<<newL<<endl) ;
	192	}
	193	else
	194	_bestL = newL;
	195	}
	196
	197	if (bblChanges == false)
	198	{
	199	LOG(5,<<"bestParamUSSRV bbl and parameters converged!"<<endl);
	200	break;
	201	}
	202	}
	203
	204	if (bblIt == numberOfParametersAndBblIterations)
	205	LOG(5,<<"bestParamUSSRV bbl and parameters alpha did not converge after " << numberOfParametersAndBblIterations << "iterations"<<endl);
	206
	207	LOGDO(5,printTime(myLog::LogFile()));
	208	return _bestL;
	209	}
	210
	211
	212
	213	// ***************
	214	// * SSRV *
	215	// ***************
	216
	217	MDOUBLE bestParamSSRV::operator() (tree& et,
	218	const sequenceContainer& sc,
	219	stochasticProcessSSRV& ssrvSp,
	220	const Vdouble * weights /* =NULL */,
	221	const MDOUBLE AlphaUpperBound /* = 15 */,
	222	const MDOUBLE NuUpperBound /* = 15 */,
	223	const MDOUBLE TrTvUpperBound /* = 10 */,
	224	const MDOUBLE epsilonParamOptimization /* = 0.01 */,
	225	const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
	226	const MDOUBLE epsilonBbl /= 0.05 /,
	227	const int maxIterations /* = 50 */,
	228	const int maxOfParametersAndBblIterations /* = 40 */)
	229	{
	230	_bestL = VERYSMALL;
	231	MDOUBLE newL = VERYSMALL;
	232
	233	bestAlphaFixedTreeSSRV alphaOptimization;
	234	bestNuFixedTreeSSRV nuOptimization;
	235	bestTamura92ParamFixedTreeSSRV tamura92Optimization;
	236
	237	int it, bblIt;
	238	int numberOfIterations(maxIterations);
	239	int numberOfParametersAndBblIterations(maxOfParametersAndBblIterations);
	240
	241	// if only one parameter is optimize (only Alpha or only Nu or only tamura92) then we need only one iteration.
	242	// if we only do bbl, without any optimization of the parameters, then we don't need iterations at all.
	243	int countParameters2Optimize(0);
	244	if (_AlphaOptimizationFlag) countParameters2Optimize++;
	245	if (_NuOptimizationFlag) countParameters2Optimize++;
	246	if (_tamura92OptimizationFlag) countParameters2Optimize++;
	247
	248
	249	if (countParameters2Optimize==0)
	250	{
	251	numberOfIterations=0;
	252	numberOfParametersAndBblIterations=1;
	253	}
	254	else if (countParameters2Optimize==1)
	255	numberOfIterations=1;
	256
	257	if (_bblOptimizationFlag == false)
	258	numberOfParametersAndBblIterations = 1;
	259
	260	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
	261	gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
	262	_bestAlpha = gammaDist->getAlpha();
	263	_bestNu = pMulRM->getRateOfRate();
	264
	265
	266	bool changes(false);
	267	bool bblChanges(false);
	268
	269	for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt)
	270	{
	271	bblChanges = false;
	272
	273	// Set initial values of lower/upper bounds for params
	274	MDOUBLE AlphaLowerBoundCur = 0.0;
	275	MDOUBLE AlphaUpperBoundCur = AlphaUpperBound;
	276	MDOUBLE NuLowerBoundCur = 0.0;
	277	MDOUBLE NuUpperBoundCur = NuUpperBound;
	278	MDOUBLE TrTvLowerBoundCur = 0.0;
	279	MDOUBLE TrTvUpperBoundCur = TrTvUpperBound;
	280	MDOUBLE ThetaLowerBoundCur = 0.0;
	281	MDOUBLE ThetaUpperBoundCur = 1.0;
	282	// And for epsilon
	283	MDOUBLE epsilonParamOptimizationCur = epsilonParamOptimization;
	284
	285	// parameters optimizations (without bbl)
	286	// in each iteration : optimization of Alpha and then optimization of Nu, and then of F.
	287	for (it=0; it < numberOfIterations; ++it)
	288	{
	289	LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl);
	290	changes = false;
	291	// Alpha optimization
	292	if (_AlphaOptimizationFlag)
	293	{
	294	LOGDO(5,printTime(myLog::LogFile()));
	295	newL = alphaOptimization(et,sc,ssrvSp,weights,AlphaLowerBoundCur,AlphaUpperBoundCur,epsilonParamOptimizationCur);
	296
	297	//the improvement in Likelihood is smaller than epsilon
	298	if (newL < _bestL)
	299	{
	300	LOG(5,<<"likelihood went down in LS! (Alpha optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	301	//go back to previous alpha
	302	alphaOptimization.setAlpha(_bestAlpha,ssrvSp);
	303	alphaOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
	304	//break;
	305	}
	306	else
	307	{// update of likelihood and model.
	308	if (newL > _bestL+epsilonLikelihoodImprovment)
	309	{
	310	changes = true;
	311	bblChanges = true;
	312	}
	313	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	314	_bestL = newL;
	315	_bestAlpha = alphaOptimization.getBestAlpha();
	316	LOG(5,<<"new L = " << _bestL<<" new Alpha = " << _bestAlpha<<endl);
	317	}
	318
	319	// Narrow search range between lower/upper bounds
	320	AlphaLowerBoundCur = (AlphaLowerBoundCur + 2*_bestAlpha) / 3;
	321	AlphaUpperBoundCur = (AlphaUpperBoundCur + 2*_bestAlpha) / 3;
	322	}
	323
	324	// Nu optimization
	325	if (_NuOptimizationFlag)
	326	{
	327	LOGDO(5,printTime(myLog::LogFile()));
	328	newL = nuOptimization(et,sc,ssrvSp,weights,NuLowerBoundCur,NuUpperBoundCur,epsilonParamOptimizationCur);
	329
	330	//the improvement in Likelihood is smaller than epsilon
	331	if (newL < _bestL)
	332	{
	333	LOG(5,<<"likelihood went down in LS! (Nu optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	334	//go back to previous Nu
	335	nuOptimization.setNu(_bestNu,ssrvSp);
	336	nuOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
	337	//break;
	338	}
	339	else
	340	{// update of likelihood and model.
	341	if (newL > _bestL+epsilonLikelihoodImprovment)
	342	{
	343	changes = true;
	344	bblChanges = true;
	345	}
	346	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	347	_bestL = newL;
	348	_bestNu = nuOptimization.getBestNu();
	349	LOG(5,<<"new L = " << _bestL<<" new Nu = " << _bestNu<<endl);
	350	}
	351
	352	// Narrow search range between lower/upper bounds
	353	NuLowerBoundCur = (NuLowerBoundCur + 2*_bestNu) / 3;
	354	NuUpperBoundCur = (NuUpperBoundCur + 2*_bestNu) / 3;
	355	}
	356
	357	// tamura92 optimization
	358	if (_tamura92OptimizationFlag)
	359	{
	360	LOGDO(5,printTime(myLog::LogFile()));
	361	newL = tamura92Optimization(
	362	et,sc,ssrvSp,weights,5,epsilonLikelihoodImprovment,
	363	TrTvLowerBoundCur,TrTvUpperBoundCur,ThetaLowerBoundCur,ThetaUpperBoundCur,
	364	epsilonParamOptimizationCur,epsilonParamOptimizationCur);
	365	MDOUBLE bestTrTv = tamura92Optimization.getBestTrTv();
	366	MDOUBLE bestTheta = tamura92Optimization.getBestTheta();
	367
	368	//the improvement in Likelihood is smaller than epsilon
	369	if (newL < _bestL)
	370	{
	371	LOG(5,<<"likelihood went down in LS! (tamura92 optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
	372	}
	373	else
	374	{// update of likelihood and model.
	375	if (newL > _bestL+epsilonLikelihoodImprovment)
	376	{
	377	changes = true;
	378	bblChanges = true;
	379	}
	380	LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
	381	_bestL = newL;
	382	LOG(5,<<"new L = " << _bestL
	383	<<" new TrTv = " << bestTrTv
	384	<<" new Theta = " << bestTheta <<endl);
	385	}
	386
	387	// Narrow search range between lower/upper bounds
	388	TrTvLowerBoundCur = (TrTvLowerBoundCur + 2*bestTrTv) / 3;
	389	TrTvUpperBoundCur = (TrTvUpperBoundCur + 2*bestTrTv) / 3;
	390
	391	ThetaLowerBoundCur = (ThetaLowerBoundCur + 2*bestTheta) / 3;
	392	ThetaUpperBoundCur = (ThetaUpperBoundCur + 2*bestTheta) / 3;
	393	}
	394
	395	if (changes == false)
	396	{
	397	LOG(5,<<"bestParamSSRV parameters alpha,nu, and tamura92 params converged!"<<endl);
	398	break;
	399	}
	400
	401	// Reduce epsilonParamOptimizationCur
	402	epsilonParamOptimizationCur /= 2;
	403	}
	404
	405	if (changes == true)
	406	LOG(5,<<"bestParamSSRV parameters alpha, nu, and tamura92 params did not converge after " << numberOfIterations << " iterations"<<endl);
	407
	408
	409	// BBL
	410	if (_bblOptimizationFlag == true)
	411	{
	412	LOGDO(5,printTime(myLog::LogFile()));
	413	bblEM bbl(et,sc,ssrvSp,weights,maxIterations,epsilonBbl);
	414	newL = bbl.getTreeLikelihood();
	415	LOG(5,<<" current best L= "<<_bestL<<endl);
	416	LOG(5,<<"new L After BBL = " << newL<< " = "<< bbl.getTreeLikelihood() <<endl);
	417	LOG(5,<<"The new tree is: " << endl);
	418	if (5 <= myLog::LogLevel())
	419	et.output(myLog::LogFile());
	420	LOG(5,<<endl);
	421	if (newL > _bestL+epsilonLikelihoodImprovment)
	422	bblChanges = true;
	423	if (newL < _bestL){
	424	LOG(5,<<"likelihood went down in LS! (BBL)"<<endl<<"oldL = "<<_bestL);
	425	LOG(5,<<" newL= "<<newL<<endl) ;
	426	}
	427	else
	428	_bestL = newL;
	429	}
	430
	431	if (bblChanges == false)
	432	{
	433	LOG(5,<<"bestParamSSRV bbl and parameters converged!"<<endl);
	434	break;
	435	}
	436	}
	437
	438	if (bblIt == numberOfParametersAndBblIterations)
	439	LOG(5,<<"bestParamSSRV bbl and parameters alpha did not converge after " << numberOfParametersAndBblIterations << "iterations"<<endl);
	440
	441	LOGDO(5,printTime(myLog::LogFile()));
	442	return _bestL;
	443	}
	444
	445
	446
	447	// Variant that can work on a const tree - only if we're not doing BBL
	448	// WARNING: Running this with bblOptimization==true will give a fatal error
	449	MDOUBLE bestParamSSRV::operator() (const tree& et,
	450	const sequenceContainer& sc,
	451	stochasticProcessSSRV& ssrvSp,
	452	const Vdouble * weights /* =NULL */,
	453	const MDOUBLE AlphaUpperBound /* = 15 */,
	454	const MDOUBLE NuUpperBound /* = 15 */,
	455	const MDOUBLE TrTvUpperBound /* = 10 */,
	456	const MDOUBLE epsilonParamOptimization /* = 0.01 */,
	457	const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
	458	const MDOUBLE epsilonBbl /= 0.05 /,
	459	const int maxIterations /* = 50 */,
	460	const int maxOfParametersAndBblIterations /* = 40 */)
	461	{
	462	if (_bblOptimizationFlag == true)
	463	errorMsg::reportError("bestParamSSRV::operator(): Can't work on const tree if bblOptimization was requested");
	464
	465	tree etNotConst(et);
	466	return operator()(etNotConst, sc, ssrvSp, weights,
	467	AlphaUpperBound, NuUpperBound,
	468	epsilonParamOptimization, epsilonLikelihoodImprovment,
	469	epsilonBbl, maxIterations,
	470	maxOfParametersAndBblIterations);
	471	}
	472
	473

+130

-0

libs/phylogeny/bestParamUSSRV.h less more

	0	// $Id: bestParamUSSRV.h 1975 2007-04-22 13:47:28Z privmane $
	1	#ifndef BEST_PARAM_USSRV
	2	#define BEST_PARAM_USSRV
	3
	4	#include "definitions.h"
	5	#include "sequenceContainer.h"
	6	#include "stochasticProcess.h"
	7	#include "gammaDistribution.h"
	8	#include "tree.h"
	9	#include "replacementModelSSRV.h"
	10	#include "stochasticProcessSSRV.h"
	11	#include "C_evalParamUSSRV.h"
	12	#include "bestAlpha.h"
	13	#include "numRec.h"
	14	#include "bblEM.h"
	15	#include "logFile.h"
	16	#include "bestAlphaAndNu.h"
	17	#include "bblEM2USSRV.h"
	18	#include "someUtil.h"
	19	#include <ctime>
	20
	21	// ***************
	22	// * USSRV *
	23	// ***************
	24
	25	class bestParamUSSRV
	26	{
	27	public:
	28	explicit bestParamUSSRV(bool AlphaOptimization, bool NuOptimization,
	29	bool FOptimization, bool bblOptimization):
	30	_AlphaOptimizationFlag(AlphaOptimization),
	31	_NuOptimizationFlag(NuOptimization),
	32	_FOptimizationFlag(FOptimization),
	33	_bblOptimizationFlag(bblOptimization) {}
	34
	35	MDOUBLE operator() (tree& et,
	36	const sequenceContainer& sc,
	37	const sequenceContainer& baseSc,
	38	ussrvModel& model,
	39	const Vdouble * weights=NULL,
	40	const MDOUBLE AlphaUpperBound = 15,
	41	const MDOUBLE NuUpperBound = 15,
	42	const MDOUBLE FUpperBound = 1,
	43	const MDOUBLE epsilonParamOptimization = 0.01,
	44	const MDOUBLE epsilonLikelihoodImprovment = 0.01,
	45	const int maxIterations = 50,
	46	const int maxOfParametersAndBblIterations = 40);
	47
	48	MDOUBLE getBestAlpha() {return _bestAlpha;}
	49	MDOUBLE getBestNu() {return _bestNu;}
	50	MDOUBLE getBestF() {return _bestF;}
	51	MDOUBLE getBestL() {return _bestL;}
	52
	53	private:
	54	MDOUBLE _bestAlpha;
	55	MDOUBLE _bestNu;
	56	MDOUBLE _bestF;
	57	MDOUBLE _bestL;
	58
	59	// flags
	60	bool _AlphaOptimizationFlag;
	61	bool _NuOptimizationFlag;
	62	bool _FOptimizationFlag;
	63	bool _bblOptimizationFlag;
	64	};
	65
	66	// ***************
	67	// * SSRV *
	68	// ***************
	69
	70	class bestParamSSRV
	71	{
	72	public:
	73	explicit bestParamSSRV(bool AlphaOptimization, bool NuOptimization, bool tamura92Optimization,
	74	bool bblOptimization):
	75	_AlphaOptimizationFlag(AlphaOptimization),
	76	_NuOptimizationFlag(NuOptimization),
	77	_tamura92OptimizationFlag(tamura92Optimization),
	78	_bblOptimizationFlag(bblOptimization) {}
	79
	80	MDOUBLE operator() (tree& et,
	81	const sequenceContainer& sc,
	82	stochasticProcessSSRV& ssrvSp,
	83	const Vdouble * weights=NULL,
	84	const MDOUBLE AlphaUpperBound = 15,
	85	const MDOUBLE NuUpperBound = 15,
	86	const MDOUBLE TrTvUpperBound = 10,
	87	const MDOUBLE epsilonParamOptimization = 0.01,
	88	const MDOUBLE epsilonLikelihoodImprovment = 0.01,
	89	const MDOUBLE epsilonBbl = 0.05,
	90	const int maxIterations = 50,
	91	const int maxOfParametersAndBblIterations = 40);
	92
	93	// Variant that can work on a const tree - only if we're not doing BBL
	94	// WARNING: Running this with bblOptimization==true will give a fatal error
	95	MDOUBLE operator() (const tree& et,
	96	const sequenceContainer& sc,
	97	stochasticProcessSSRV& ssrvSp,
	98	const Vdouble * weights=NULL,
	99	const MDOUBLE AlphaUpperBound = 15,
	100	const MDOUBLE NuUpperBound = 15,
	101	const MDOUBLE TrTvUpperBound = 10,
	102	const MDOUBLE epsilonParamOptimization = 0.01,
	103	const MDOUBLE epsilonLikelihoodImprovment = 0.01,
	104	const MDOUBLE epsilonBbl = 0.05,
	105	const int maxIterations = 50,
	106	const int maxOfParametersAndBblIterations = 40);
	107
	108	MDOUBLE getBestAlpha() {return _bestAlpha;}
	109	MDOUBLE getBestNu() {return _bestNu;}
	110	MDOUBLE getBestTrTv() {return _bestTrTv;}
	111	MDOUBLE getBestTheta() {return _bestTheta;}
	112	MDOUBLE getBestL() {return _bestL;}
	113
	114	private:
	115	MDOUBLE _bestAlpha;
	116	MDOUBLE _bestNu;
	117	MDOUBLE _bestTrTv;
	118	MDOUBLE _bestTheta;
	119	MDOUBLE _bestL;
	120
	121	// flags
	122	bool _AlphaOptimizationFlag;
	123	bool _NuOptimizationFlag;
	124	bool _tamura92OptimizationFlag;
	125	bool _bblOptimizationFlag;
	126	};
	127
	128	#endif // BEST_PARAM_USSRV
	129

+398

-0

libs/phylogeny/bestTamura92param.cpp less more

	0	// $Id: bestTamura92param.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "bestTamura92param.h"
	3	#include <iostream>
	4	using namespace std;
	5
	6	#include "bblEM.h"
	7	#include "bblEMProportionalEB.h"
	8	#include "bblLSProportionalEB.h"
	9	#include "numRec.h"
	10	#include "logFile.h"
	11	#include "bestAlpha.h"
	12
	13	bestTamura92ParamFixedTree::bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta
	14	const sequenceContainer& sc,
	15	stochasticProcess& sp,
	16	const Vdouble * weights,
	17	const int maxTotalIterations,
	18	const MDOUBLE epsilonLikelihoodImprovment,
	19	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
	20	const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
	21	const MDOUBLE upperBoundOnTrTv) {
	22	LOG(5,<<"Starting bestTamura92ParamFixedTree: find Best TrTv and theta"<<endl);
	23	MDOUBLE oldL = VERYSMALL;
	24	MDOUBLE newL = VERYSMALL;
	25
	26	// first guess for the parameters
	27	MDOUBLE prevTrTv = upperBoundOnTrTv*0.3;
	28	MDOUBLE prevTheta = 0.5;
	29
	30	for (int i=0; i < maxTotalIterations; ++i) {
	31	// optimize TrTv
	32	newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
	33	C_evalTrTvParam(et,sc,sp,weights),
	34	epsilonLoglikelihoodForTrTvOptimization,
	35	&_bestTrTv);
	36
	37	// optimize Theta
	38	newL = -brent(0.0, prevTheta, 1.0,
	39	C_evalTheta(et,sc,sp,weights),
	40	epsilonLoglikelihoodForThetaOptimization,
	41	&_bestTheta);
	42
	43	// check for improvement in the likelihood
	44	if (newL > oldL+epsilonLikelihoodImprovment) {
	45	prevTrTv = _bestTrTv;
	46	prevTheta = _bestTheta;
	47	oldL = newL;
	48	_bestL = newL;
	49	} else {
	50	if (newL>oldL) {
	51	_bestL = newL;
	52	} else {
	53	_bestL = oldL;
	54	_bestTrTv = prevTrTv;
	55	_bestTheta = prevTheta;
	56	}
	57	break;
	58	}
	59	}
	60	}
	61
	62	bestTamura92ParamAndBBL::bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL
	63	const sequenceContainer& sc,
	64	stochasticProcess& sp,
	65	const Vdouble * weights,
	66	const int maxTotalIterations,
	67	const MDOUBLE epsilonLikelihoodImprovment,
	68	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
	69	const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
	70	const MDOUBLE epsilonLoglikelihoodForBBL,
	71	const MDOUBLE upperBoundOnTrTv,
	72	const int maxBBLIterations){
	73	LOG(5,<<"Starting bestTamura92ParamAndBBL: find best TrTv, theta and BBL"<<endl);
	74	MDOUBLE oldL = VERYSMALL;
	75	MDOUBLE newL = VERYSMALL;
	76
	77	// first guess for the parameters
	78	MDOUBLE prevTrTv = upperBoundOnTrTv*0.3;
	79	MDOUBLE prevTheta = 0.5;
	80	tree prevTree;
	81
	82	for (int i=0; i < maxTotalIterations; ++i) {
	83	// optimize TrTv
	84	newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
	85	C_evalTrTvParam(et,sc,sp,weights),
	86	epsilonLoglikelihoodForTrTvOptimization,
	87	&_bestTrTv);
	88	(static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv);
	89
	90	// optimize Theta
	91	newL = -brent(0.0, prevTheta, 1.0,
	92	C_evalTheta(et,sc,sp,weights),
	93	epsilonLoglikelihoodForThetaOptimization,
	94	&_bestTheta);
	95	(static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta);
	96
	97	// optimize branch lengths
	98	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	99	newL =bblEM1.getTreeLikelihood();
	100
	101	// check for improvement in the likelihood
	102	if (newL > oldL+epsilonLikelihoodImprovment) {
	103	prevTrTv = _bestTrTv;
	104	prevTheta = _bestTheta;
	105	oldL = newL;
	106	_bestL = newL;
	107	prevTree = et;
	108	} else {
	109	if (newL>oldL) {
	110	_bestL = newL;
	111	} else {
	112	_bestL = oldL;
	113	_bestTrTv = prevTrTv;
	114	_bestTheta = prevTheta;
	115	et = prevTree;
	116	}
	117	break;
	118	}
	119	}
	120	}
	121
	122	bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths
	123	tree& et,
	124	const sequenceContainer& sc,
	125	stochasticProcess& sp,
	126	const Vdouble * weights,
	127	const int maxTotalIterations,
	128	const MDOUBLE epsilonLikelihoodImprovment,
	129	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
	130	const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
	131	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
	132	const MDOUBLE epsilonLoglikelihoodForBBL,
	133	const MDOUBLE upperBoundOnTrTv,
	134	const int maxBBLIterations,
	135	const MDOUBLE initAlpha,
	136	const MDOUBLE upperBoundOnAlpha)
	137
	138	{
	139	MDOUBLE oldL = VERYSMALL;
	140	MDOUBLE newL = VERYSMALL;
	141
	142	// first guess for the parameters
	143	MDOUBLE prevTrTv = static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel())->getTrTv();
	144	MDOUBLE prevTheta = static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel())->getTheta();
	145	MDOUBLE prevAlpha = initAlpha;
	146	tree prevTree;
	147
	148	for (int i=0; i < maxTotalIterations; ++i) {
	149
	150	// optimize TrTv
	151	newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
	152	C_evalTrTvParam(et,sc,sp,weights),
	153	epsilonLoglikelihoodForTrTvOptimization,
	154	&_bestTrTv);
	155	(static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv);
	156
	157	// optimize Theta
	158	newL = -brent(0.0, prevTheta, 1.0,
	159	C_evalTheta(et,sc,sp,weights),
	160	epsilonLoglikelihoodForThetaOptimization,
	161	&_bestTheta);
	162	(static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta);
	163
	164	// optimize Alpha
	165	newL = -brent(0.0, prevAlpha, upperBoundOnAlpha,
	166	C_evalAlpha(et,sc,sp,weights),
	167	epsilonLoglikelihoodForAlphaOptimization,
	168	&_bestAlpha);
	169	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
	170
	171	LOG(5,<<"# bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL iteration " << i << ": after param optimization:" <<endl
	172	<<"# old L = " << oldL << "\t"
	173	<<"# new L = " << newL << endl
	174	<<"# new Alpha = " << _bestAlpha << endl);
	175
	176	// optimize branch lengths
	177	bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
	178	newL =bblEM1.getTreeLikelihood();
	179
	180	LOG(5,<<"# bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL iteration " << i << ": after branch lengths optimization:" <<endl
	181	<<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
	182	<<"# The tree:" );
	183	LOGDO(5,et.output(myLog::LogFile()));
	184
	185	// check for improvement in the likelihood
	186	if (newL > oldL+epsilonLikelihoodImprovment) {
	187	oldL = newL;
	188	_bestL = newL;
	189	prevTrTv = _bestTrTv;
	190	prevTheta = _bestTheta;
	191	prevAlpha = _bestAlpha;
	192	prevTree = et;
	193	} else {
	194	if (newL>oldL) {
	195	_bestL = newL;
	196	} else {
	197	_bestL = oldL;
	198	_bestTrTv = prevTrTv;
	199	_bestTheta = prevTheta;
	200	et = prevTree;
	201	}
	202	break;
	203	}
	204	}
	205	}
	206
	207	bestTamura92ParamAlphaAndBBLProportional::bestTamura92ParamAlphaAndBBLProportional( //find best TrTv, theta, loca Alpha for each gene, global Alpha and best branch lengths
	208	tree& et,
	209	vector<sequenceContainer>& sc,
	210	multipleStochasticProcess* msp,
	211	gammaDistribution* pProportionDist,
	212	Vdouble initLocalAlphas,
	213	Vdouble initLocalKappas,
	214	Vdouble initLocalThetas,
	215	const MDOUBLE upperBoundOnLocalAlpha,
	216	const MDOUBLE initGlobalAlpha,
	217	const MDOUBLE upperBoundOnGlobalAlpha,
	218	const MDOUBLE upperBoundOnTrTv,
	219	const int maxTotalIterations,
	220	const int maxBBLIterations,
	221	const bool optimizeSelectedBranches,
	222	const bool optimizeTree,
	223	const string branchLengthOptimizationMethod,
	224	const bool optimizeLocalParams,
	225	const bool optimizeGlobalAlpha,
	226	const Vdouble * weights,
	227	const MDOUBLE epsilonLikelihoodImprovment,
	228	const MDOUBLE epsilonLoglikelihoodForLocalTrTvOptimization,
	229	const MDOUBLE epsilonLoglikelihoodForLocalThetaOptimization,
	230	const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization,
	231	const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization,
	232	const MDOUBLE epsilonLoglikelihoodForBBL)
	233
	234	{
	235	LOG(5,<<"Starting bestTamura92ParamAlphaAndBBLProportional"<<endl);
	236	Vdouble currentTrTvVec,currentThetaVec,currentLocalAlphaVec;
	237	MDOUBLE currentGlobalAlpha = initGlobalAlpha;
	238	currentTrTvVec = initLocalKappas;
	239	currentThetaVec = initLocalThetas;
	240	currentLocalAlphaVec = initLocalAlphas;
	241
	242	Vdouble newLvec;
	243	newLvec.resize(msp->getSPVecSize());
	244	//doubleRep oldL(VERYSMALL);//DR
	245	//doubleRep newL;//DR
	246	MDOUBLE oldL = VERYSMALL;
	247	MDOUBLE newL;
	248	//doubleRep epsilonLoglikelihoodForGlobalAlphaOptimizationDR(epsilonLoglikelihoodForGlobalAlphaOptimization);//DR
	249	_bestLvec.resize(msp->getSPVecSize(),0.0);
	250	_bestLocalAlphaVec = initLocalAlphas;
	251	_bestGlobalAlpha = initGlobalAlpha;
	252	int spIndex;
	253	_bestTrTvVec = currentTrTvVec;
	254	_bestThetaVec = currentThetaVec;
	255	pProportionDist->setAlpha(_bestGlobalAlpha);
	256	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	257	(static_cast<tamura92*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTheta(_bestThetaVec[spIndex]);//safety
	258	(static_cast<tamura92*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTvVec[spIndex]);
	259	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]);
	260	}
	261	//first compute the likelihood;
	262	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	263
	264	MDOUBLE ax_local = 0.0;
	265	MDOUBLE c_TrTv_x = upperBoundOnTrTv;
	266	MDOUBLE c_theta_x = 1.0;
	267	MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha;
	268	for (int i=0; i < maxTotalIterations; ++i) {
	269	if(optimizeLocalParams){
	270	for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){
	271	//optimize Theta
	272	MDOUBLE theta_x(_bestThetaVec[spIndex]);
	273	newLvec[spIndex] = -brent(ax_local,theta_x,c_theta_x,
	274	C_evalLocalTheta(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	275	epsilonLoglikelihoodForLocalThetaOptimization,
	276	&currentThetaVec[spIndex]);
	277	if (newLvec[spIndex] >= _bestLvec[spIndex])
	278	{
	279	_bestLvec[spIndex] = newLvec[spIndex];
	280	_bestThetaVec[spIndex] = currentThetaVec[spIndex];
	281	}
	282	else
	283	{//likelihood went down!
	284	LOG(2,<<"likelihood went down in optimizing TrTv param"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	285	}
	286	(static_cast<tamura92*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTheta(_bestThetaVec[spIndex]);//safety
	287
	288	//optimize TrTv
	289	MDOUBLE TrTv_x(_bestTrTvVec[spIndex]);
	290	newLvec[spIndex] = -brent(ax_local,TrTv_x,c_TrTv_x,
	291	C_evalLocalTrTvParam(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	292	epsilonLoglikelihoodForLocalTrTvOptimization,
	293	&currentTrTvVec[spIndex]);
	294	if (newLvec[spIndex] >= _bestLvec[spIndex])
	295	{
	296	_bestLvec[spIndex] = newLvec[spIndex];
	297	_bestTrTvVec[spIndex] = currentTrTvVec[spIndex];
	298	}
	299	else
	300	{//likelihood went down!
	301	LOG(2,<<"likelihood went down in optimizing TrTv param"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	302	}
	303	(static_cast<tamura92*>(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTvVec[spIndex]);//safety
	304
	305	//optimize local alpha
	306	MDOUBLE localAlpha_x(_bestLocalAlphaVec[spIndex]);
	307	newLvec[spIndex] = -brent(ax_local,localAlpha_x, c_localAlpha_x,
	308	C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights),
	309	epsilonLoglikelihoodForLocalAlphaOptimization,
	310	&currentLocalAlphaVec[spIndex]);
	311	if (newLvec[spIndex] >= _bestLvec[spIndex])
	312	{
	313	_bestLvec[spIndex] = newLvec[spIndex];
	314	_bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex];
	315	}
	316	else
	317	{//likelihood went down!
	318	LOG(2,<<"likelihood went down in optimizing local alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	319	}
	320	(static_cast<gammaDistribution*>(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); //safety
	321	}
	322	LOGnOUT(2,<<"Done with Tamura92 local params optimization. LL: "<<sumVdouble(_bestLvec)<<endl);
	323	LOGnOUT(2,<<"Local Params:"<<endl);
	324	LOGnOUT(2,<<"TrTv:");
	325	for(spIndex = 0;spIndex < _bestTrTvVec.size();++spIndex){
	326	LOGnOUT(2,<<_bestTrTvVec[spIndex]<<",";);
	327	}
	328	LOGnOUT(2,<<endl);
	329	LOGnOUT(2,<<"Theta:");
	330	for(spIndex = 0;spIndex < _bestThetaVec.size();++spIndex){
	331	LOGnOUT(2,<<_bestThetaVec[spIndex]<<",";);
	332	}
	333	LOGnOUT(2,<<endl);
	334	LOGnOUT(2,<<"local alpha:");
	335	for(spIndex = 0;spIndex < _bestLocalAlphaVec.size();++spIndex){
	336	LOGnOUT(2,<<_bestLocalAlphaVec[spIndex]<<",";);
	337	}
	338	LOGnOUT(2,<<endl);
	339	}
	340	if(optimizeGlobalAlpha){
	341	//doubleRep ax_global(0.0);//DR
	342	//doubleRep c_globalAlpha_x(upperBoundOnGlobalAlpha);//DR
	343	//doubleRep minusOne(-1.0);//DR
	344	MDOUBLE ax_global = 0.0;
	345	MDOUBLE c_globalAlpha_x = upperBoundOnGlobalAlpha;
	346
	347	//optimize global alpha
	348	//doubleRep globalAlpha_x(prevGlobalAlpha);//DR
	349	MDOUBLE globalAlpha_x = _bestGlobalAlpha;
	350	//newL = minusOne*brentDoubleRep(ax_global,globalAlpha_x,c_globalAlpha_x,
	351	// C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	352	// epsilonLoglikelihoodForGlobalAlphaOptimizationDR,
	353	// &_bestGlobalAlpha);//DR
	354	newL = -brent(ax_global,globalAlpha_x,c_globalAlpha_x,
	355	C_evalGlobalAlpha(et,sc,msp,pProportionDist,weights),
	356	epsilonLoglikelihoodForGlobalAlphaOptimization,
	357	&currentGlobalAlpha);
	358	if (newL >= sumVdouble(_bestLvec))
	359	{
	360	_bestGlobalAlpha = currentGlobalAlpha;
	361	}
	362	else
	363	{//likelihood went down!
	364	LOG(2,<<"likelihood went down in optimizing global alpha"<<endl<<"oldL = "<<sumVdouble(_bestLvec));
	365	}
	366	pProportionDist->setAlpha(_bestGlobalAlpha); //safety
	367	//whether or not likelihood has improved we need to update _bestLvec
	368	_bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights);
	369	LOGnOUT(2,<<"Done with global alpha optimization"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	370	LOGnOUT(2,<<"Global Alpha:"<<_bestGlobalAlpha<<endl);
	371	}
	372
	373	if(optimizeTree)
	374	{
	375	if(branchLengthOptimizationMethod == "bblLS"){
	376	bblLSProportionalEB bblLSPEB1(et,sc,msp,pProportionDist,_bestLvec,optimizeSelectedBranches,maxBBLIterations,epsilonLoglikelihoodForBBL);
	377	_bestLvec = bblLSPEB1.getTreeLikelihoodVec();
	378	LOGnOUT(2,<<"Done with bblLS"<<endl<<"LL:"<<sumVdouble(_bestLvec)<<endl);
	379	}
	380	else if(branchLengthOptimizationMethod == "bblEM"){
	381	bblEMProportionalEB bblEMPEB1(et,sc,msp,pProportionDist,optimizeSelectedBranches,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);
	382	_bestLvec = bblEMPEB1.getTreeLikelihood();
	383	LOGnOUT(2,<<"Done with bblEM. LL: "<<sumVdouble(_bestLvec)<<endl);
	384	}
	385	LOGnOUT(2,<<et.stringTreeInPhylipTreeFormat()<<endl);
	386	}
	387	// check for improvement in the likelihood
	388	if (sumVdouble(_bestLvec) > oldL+epsilonLikelihoodImprovment) {
	389	//all params have already been updated
	390	oldL = sumVdouble(_bestLvec);
	391	} else {
	392	break;
	393	}
	394	LOGnOUT(4,<<"Done with optimization iteration "<<i<<". LL: "<<sumVdouble(_bestLvec)<<endl);
	395	}
	396	}
	397

+238

-0

libs/phylogeny/bestTamura92param.h less more

	0	// $Id: bestTamura92param.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___BEST_TAMURA92_PARAM
	3	#define ___BEST_TAMURA92_PARAM
	4
	5	#include "definitions.h"
	6
	7	#include "likelihoodComputation.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "multipleStochasticProcess.h"
	11	#include "gammaDistribution.h"
	12	#include "tree.h"
	13	#include "tamura92.h"
	14
	15
	16	class bestTamura92ParamFixedTree {
	17	public:
	18	explicit bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta
	19	const sequenceContainer& sc,
	20	stochasticProcess& sp,
	21	const Vdouble * weights,
	22	const int maxTotalIterations = 5,
	23	const MDOUBLE epsilonLikelihoodImprovment = 0.05,
	24	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization = 0.01,
	25	const MDOUBLE epsilonLoglikelihoodForThetaOptimization = 0.01,
	26	const MDOUBLE upperBoundOnTrTv = 5.0);
	27	MDOUBLE getBestTrTv() {return _bestTrTv;}
	28	MDOUBLE getBestTheta() {return _bestTheta;}
	29	MDOUBLE getBestL() {return _bestL;}
	30	private:
	31	MDOUBLE _bestTrTv;
	32	MDOUBLE _bestTheta;
	33	MDOUBLE _bestL;
	34	};
	35
	36	class bestTamura92ParamAndBBL{
	37	public:
	38	explicit bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL
	39	const sequenceContainer& sc,
	40	stochasticProcess& sp,
	41	const Vdouble * weights=NULL,
	42	const int maxTotalIterations=5,
	43	const MDOUBLE epsilonLikelihoodImprovment=0.05,
	44	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization=0.01,
	45	const MDOUBLE epsilonLoglikelihoodForThetaOptimization=0.01,
	46	const MDOUBLE epsilonLoglikelihoodForBBL=0.01,
	47	const MDOUBLE upperBoundOnTrTv=5.0,
	48	const int maxBBLIterations=10);
	49	MDOUBLE getBestTrTv() {return _bestTrTv;}
	50	MDOUBLE getBestTheta(int spIndex) {return _bestTheta;}
	51	MDOUBLE getBestL() {return _bestL;}
	52	private:
	53	MDOUBLE _bestTrTv;
	54	MDOUBLE _bestTheta;
	55	MDOUBLE _bestL;
	56	};
	57
	58
	59	class bestTamura92ParamAlphaAndBBL {
	60	public:
	61	explicit bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths
	62	tree& et,
	63	const sequenceContainer& sc,
	64	stochasticProcess& sp,
	65	const Vdouble * weights=NULL,
	66	const int maxTotalIterations=5,
	67	const MDOUBLE epsilonLikelihoodImprovment= 0.05,
	68	const MDOUBLE epsilonLoglikelihoodForTrTvOptimization= 0.01,
	69	const MDOUBLE epsilonLoglikelihoodForThetaOptimization= 0.01,
	70	const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
	71	const MDOUBLE epsilonLoglikelihoodForBBL= 0.01,
	72	const MDOUBLE upperBoundOnTrTv = 5.0,
	73	const int maxBBLIterations=10,
	74	const MDOUBLE initAlpha = 1.5,
	75	const MDOUBLE upperBoundOnAlpha = 5.0);
	76	MDOUBLE getBestTrTv() {return _bestTrTv;}
	77	MDOUBLE getBestTheta() {return _bestTheta;}
	78	MDOUBLE getBestAlpha() {return _bestAlpha;}
	79	MDOUBLE getBestL() {return _bestL;}
	80	private:
	81	MDOUBLE _bestTrTv;
	82	MDOUBLE _bestTheta;
	83	MDOUBLE _bestAlpha;
	84	MDOUBLE _bestL;
	85	};
	86
	87	class bestTamura92ParamAlphaAndBBLProportional {
	88	public:
	89	explicit bestTamura92ParamAlphaAndBBLProportional( //find best TrTv, theta, loca Alpha for each gene, global Alpha and best branch lengths
	90	tree& et,
	91	vector<sequenceContainer>& sc,
	92	multipleStochasticProcess* msp,
	93	gammaDistribution* pProportionDist,
	94	Vdouble initLocalAlphas,
	95	Vdouble initLocalKappas,
	96	Vdouble initLocalThetas,
	97	const MDOUBLE upperBoundOnLocalAlpha,
	98	const MDOUBLE initGlobalAlpha,
	99	const MDOUBLE upperBoundOnGlobalAlpha,
	100	const MDOUBLE upperBoundOnTrTv,
	101	const int maxTotalIterations,
	102	const int maxBBLIterations,
	103	const bool optimizeSelectedBranches=false,
	104	const bool optimizeTree = true,
	105	const string branchLengthOptimizationMethod="bblLS",
	106	const bool optimizeLocalParams = true,
	107	const bool optimizeGlobalAlpha = true,
	108	const Vdouble * weights=NULL,
	109	const MDOUBLE epsilonLikelihoodImprovment= 0.05,
	110	const MDOUBLE epsilonLoglikelihoodForLocalTrTvOptimization= 0.01,
	111	const MDOUBLE epsilonLoglikelihoodForLocalThetaOptimization= 0.01,
	112	const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization= 0.01,
	113	const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization= 0.01,
	114	const MDOUBLE epsilonLoglikelihoodForBBL= 0.01);
	115	MDOUBLE getBestTrTv(int spIndex) {return _bestTrTvVec[spIndex];}
	116	MDOUBLE getBestTheta(int spIndex) {return _bestThetaVec[spIndex];}
	117	MDOUBLE getBestLocalAlpha(int spIndex) {return _bestLocalAlphaVec[spIndex];}
	118	MDOUBLE getBestGlobalAlpha() {return _bestGlobalAlpha;}
	119	Vdouble getBestL() {return _bestLvec;}
	120	private:
	121	Vdouble _bestTrTvVec;
	122	Vdouble _bestThetaVec;
	123	Vdouble _bestLocalAlphaVec;
	124	MDOUBLE _bestGlobalAlpha;
	125	Vdouble _bestLvec;
	126	};
	127
	128
	129	class C_evalTrTvParam{
	130	public:
	131	C_evalTrTvParam( const tree& et,
	132	const sequenceContainer& sc,
	133	stochasticProcess& sp,
	134	const Vdouble * weights = NULL)
	135	: _et(et),_sc(sc),_weights(weights),_sp(sp){};
	136	private:
	137	const tree& _et;
	138	const sequenceContainer& _sc;
	139	const Vdouble * _weights;
	140	stochasticProcess& _sp;
	141	public:
	142	MDOUBLE operator() (MDOUBLE TrTv) {
	143	(static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(TrTv);
	144
	145	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	146	LOG(5,<<" with TrTv = "<<TrTv<<" logL = "<<res<<endl);
	147	return -res;
	148	}
	149	};
	150
	151	class C_evalLocalTrTvParam{
	152	public:
	153	C_evalLocalTrTvParam( const tree& et,
	154	const sequenceContainer& sc,
	155	stochasticProcess& sp,
	156	gammaDistribution* pProportionDist,
	157	const Vdouble * weights = NULL)
	158	: _et(et),_sc(sc),_weights(weights),_sp(sp),_pProportionDist(pProportionDist){};
	159	private:
	160	const tree& _et;
	161	const sequenceContainer& _sc;
	162	const Vdouble * _weights;
	163	stochasticProcess& _sp;
	164	gammaDistribution* _pProportionDist;
	165	public:
	166	MDOUBLE operator() (MDOUBLE TrTv) {
	167	(static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(TrTv);
	168	vector<sequenceContainer> tmpScVec;
	169	tmpScVec.push_back(_sc);
	170	vector<stochasticProcess> tmpSpVec;
	171	tmpSpVec.push_back(_sp);
	172	multipleStochasticProcess * tmpMsp = new multipleStochasticProcess();
	173	tmpMsp->setSpVec(tmpSpVec);
	174	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist);
	175	MDOUBLE res = likeVec[0];
	176	delete(tmpMsp);
	177	LOG(5,<<" with TrTv = "<<TrTv<<" logL = "<<res<<endl);
	178	return -res;
	179	}
	180	};
	181
	182	class C_evalLocalTheta{
	183	public:
	184	C_evalLocalTheta( const tree& et,
	185	const sequenceContainer& sc,
	186	stochasticProcess& sp,
	187	gammaDistribution* pProportionDist,
	188	const Vdouble * weights = NULL)
	189	: _et(et),_sc(sc),_weights(weights),_sp(sp),_pProportionDist(pProportionDist){};
	190	private:
	191	const tree& _et;
	192	const sequenceContainer& _sc;
	193	const Vdouble * _weights;
	194	stochasticProcess& _sp;
	195	gammaDistribution* _pProportionDist;
	196	public:
	197	MDOUBLE operator() (MDOUBLE theta) {
	198	(static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTheta(theta);
	199	vector<sequenceContainer> tmpScVec;
	200	tmpScVec.push_back(_sc);
	201	vector<stochasticProcess> tmpSpVec;
	202	tmpSpVec.push_back(_sp);
	203	multipleStochasticProcess * tmpMsp = new multipleStochasticProcess();
	204	tmpMsp->setSpVec(tmpSpVec);
	205	Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist);
	206	MDOUBLE res = likeVec[0];
	207	delete(tmpMsp);
	208	LOG(5,<<" with Theta = "<<theta<<" logL = "<<res<<endl);
	209	return -res;
	210	}
	211	};
	212
	213	class C_evalTheta{
	214	public:
	215	C_evalTheta( const tree& et,
	216	const sequenceContainer& sc,
	217	stochasticProcess& sp,
	218	const Vdouble * weights = NULL)
	219	: _et(et),_sc(sc),_weights(weights),_sp(sp){};
	220	private:
	221	const tree& _et;
	222	const sequenceContainer& _sc;
	223	const Vdouble * _weights;
	224	stochasticProcess& _sp;
	225	public:
	226	MDOUBLE operator() (MDOUBLE theta) {
	227	(static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTheta(theta);
	228
	229	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
	230	LOG(5,<<" with theta = "<<theta<<" logL = "<<res<<endl);
	231	return -res;
	232	}
	233	};
	234
	235	#endif
	236
	237

+139

-0

libs/phylogeny/betaDistribution.cpp less more

	0	// $Id: betaDistribution.cpp 3985 2008-05-11 11:00:44Z adido $
	1
	2	#include "betaDistribution.h"
	3	#include "gammaUtilities.h"
	4	#include "betaUtilities.h"
	5	#include "errorMsg.h"
	6	#include "logFile.h"
	7	#include <cmath>
	8
	9
	10	betaDistribution::betaDistribution()
	11	{
	12	_alpha = 0.0;
	13	_beta = 0.0;
	14	_boundary.resize(0,0);
	15	_rates.resize(0,0);
	16	_ratesProb.resize(0,0);
	17	_globalRate = 1;//??? 0.5 or 1
	18	_discretizationType = MEDIAN;
	19	}
	20
	21	// note that the order of initalization makes a diffrence.
	22	betaDistribution::betaDistribution(const betaDistribution& other) :
	23	_boundary(other._boundary),
	24	_alpha(other._alpha),
	25	_beta(other._beta),
	26	_rates(other._rates),
	27	_ratesProb(other._ratesProb),
	28	_globalRate(other._globalRate),
	29	_discretizationType(other._discretizationType){
	30	}
	31
	32	betaDistribution::betaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,discretizationType in_discretizationType) :distribution(){
	33	_globalRate=1.0;
	34	_discretizationType = in_discretizationType;
	35	setBetaParameters(in_number_of_categories,alpha,beta);
	36	}
	37
	38	betaDistribution::~betaDistribution() {
	39	_boundary.clear();
	40	_rates.clear();
	41	_ratesProb.clear();
	42	}
	43
	44	void betaDistribution::setAlpha(MDOUBLE in_alpha) {
	45	if (in_alpha == _alpha)
	46	return;
	47	setBetaParameters(categories(), in_alpha, _beta);
	48	}
	49
	50	void betaDistribution::setBeta(MDOUBLE in_beta) {
	51	if (in_beta == _beta)
	52	return;
	53	setBetaParameters( categories(), _alpha, in_beta);
	54	}
	55
	56	void betaDistribution::setDiscretizationType(discretizationType in_discretizationType) {
	57	if (in_discretizationType == _discretizationType)
	58	return;
	59	_discretizationType = in_discretizationType;
	60	if (categories() > 1)
	61	fill_rates();
	62
	63	}
	64	void betaDistribution::change_number_of_categories(int in_number_of_categories) {
	65	if (in_number_of_categories == categories())
	66	return;
	67	setBetaParameters( in_number_of_categories, _alpha, _beta);
	68	}
	69
	70	void betaDistribution::setBetaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
	71	if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
	72	return;
	73
	74
	75	if (in_alpha < MINIMUM_ALPHA_PARAM)
	76	in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
	77	if (in_beta < MINIMUM_ALPHA_PARAM)
	78	in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
	79
	80	_alpha = in_alpha;
	81	_beta = in_beta;
	82	_rates.clear();
	83	_rates.resize(in_number_of_categories);
	84	_ratesProb.clear();
	85	_ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories);
	86	_boundary.clear();
	87	_boundary.resize(in_number_of_categories+1);
	88	if (in_number_of_categories==1) {
	89	_rates[0] = 1.0;
	90	return;
	91	}
	92	if (categories() > 1) {
	93	fill_rates();
	94	return ;
	95	}
	96
	97	}
	98	int betaDistribution::fill_rates() {
	99	fill_boundaries();
	100	int i;
	101	//LOG(5,<<endl<<" alpha = "<<_alpha<<" beta = "<< _beta<<endl);
	102	//for (i=0; i<=categories(); ++i) cout<<endl<<_boundary[i];
	103	//LOG(5,<<"\n====== the r categories are =====\n");
	104	for (i=0; i<categories(); ++i) {
	105	if (_discretizationType == MEAN)
	106	_rates[i]=computeAverage_r(_boundary[i], _boundary[i+1], _alpha, _beta, categories());
	107	else //_discretizationType == MEDIAN
	108	_rates[i] =inverseCDFBeta(_alpha, _beta,static_cast<MDOUBLE>(i2 +1)/(2categories()));
	109	//LOG(5,<<_rates[i]<<endl);
	110	}
	111	//LOG(5,<<endl<<_alpha<<endl);
	112	return 0;
	113	}
	114
	115	int betaDistribution::fill_boundaries() {
	116	int i;
	117	//LOG(5,<<endl<<"========BOUNDARY============="<<endl);
	118	for (i=1; i<categories(); ++i)
	119	{
	120	_boundary[i]=inverseCDFBeta(_alpha, _beta,static_cast<MDOUBLE>(i)/categories());
	121	//LOG(5,<<"_boundary[ "<<i<<"] ="<<_boundary[i]<<endl);
	122	}
	123	_boundary[0]=0;
	124	_boundary[i]=1;
	125
	126	return 0;
	127	}
	128
	129
	130	const MDOUBLE betaDistribution::getCumulativeProb(const MDOUBLE x) const
	131	{//
	132	//since r~gamma(alpha, beta) then beta*r~ gamma(alpha,1)=gammp
	133	//here we assume alpha=beta
	134	return incompleteBeta(_alpha,_beta,x);
	135	}
	136
	137
	138

+61

-0

libs/phylogeny/betaDistribution.h less more

	0	// $Id: betaDistribution.h 5803 2009-01-20 09:17:05Z adido $
	1
	2	#ifndef ___BETA_DIST
	3	#define ___BETA_DIST
	4	/************************************************************
	5	This distribution can take several forms depending on its free parameters alpha,beta
	6	For an extensive exlpanation of this distribution
	7	see http://mathworld.wolfram.com/BetaDistribution.html
	8	************************************************************/
	9	#include "definitions.h"
	10	#include "distribution.h"
	11
	12	class betaDistribution : public distribution {
	13
	14	public:
	15	enum discretizationType{MEAN, MEDIAN};
	16	explicit betaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,discretizationType in_discretizationType = MEDIAN);
	17	explicit betaDistribution(const betaDistribution& other);
	18	explicit betaDistribution();
	19	virtual ~betaDistribution();
	20	virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	21
	22	virtual const int categories() const {return _rates.size();}
	23	virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
	24	virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
	25	virtual distribution* clone() const { return new betaDistribution(*this); }
	26	virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
	27	virtual MDOUBLE getGlobalRate()const {return _globalRate;}
	28	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	29	virtual void setAlpha(MDOUBLE newAlpha);
	30	virtual MDOUBLE getAlpha() const {return _alpha;};
	31	virtual void setBeta(MDOUBLE newBeta);
	32	virtual MDOUBLE getBeta() const {return _beta;};
	33	virtual void setDiscretizationType(discretizationType in_discretizationType);
	34	virtual discretizationType getDiscretizationType() const {return _discretizationType;};
	35
	36	virtual void change_number_of_categories(int in_number_of_categories);
	37	virtual MDOUBLE getBorder(const int i) const {return _boundary[i];} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
	38
	39
	40	private:
	41	int fill_rates();
	42	int fill_boundaries();
	43
	44
	45	protected:
	46	MDOUBLE _alpha;
	47	MDOUBLE _beta;
	48
	49	vector<MDOUBLE> _rates;
	50	vector<MDOUBLE> _ratesProb;
	51	MDOUBLE _globalRate;
	52	discretizationType _discretizationType;
	53	vector<MDOUBLE> _boundary;
	54
	55	};
	56
	57
	58
	59	#endif
	60

+158

-0

libs/phylogeny/betaDistributionFixedCategories.cpp less more

	0	#include "betaDistributionFixedCategories.h"
	1	#include "errorMsg.h"
	2	#include "gammaUtilities.h"
	3
	4
	5	betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta) :
	6	betaDistribution()
	7	{
	8	_alpha = alpha;
	9	_beta = beta;
	10	setFixedCategories(fixedBoundaries);
	11	}
	12
	13
	14	betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) :
	15	betaDistribution()
	16	{
	17	if ((fixedRates.size() + 1) != boundaries.size())
	18	errorMsg::reportError("error in betaDistributionFixedCategories constructor");
	19	_alpha = alpha;
	20	_beta = beta;
	21	_rates = fixedRates;
	22	_boundary = boundaries;
	23	computeRatesProbs();
	24	}
	25
	26
	27
	28	betaDistributionFixedCategories::betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum)
	29	: betaDistribution()
	30	{
	31	_alpha = alpha;
	32	_beta = beta;
	33	setDefaultBoundaries(catNum);
	34	}
	35
	36	betaDistributionFixedCategories::betaDistributionFixedCategories()
	37	: betaDistribution()
	38	{
	39	_alpha = 0.5;
	40	_beta = 0.5;
	41	setDefaultBoundaries(10);
	42	}
	43
	44	betaDistributionFixedCategories::betaDistributionFixedCategories(const betaDistributionFixedCategories& other)
	45	: betaDistribution(other)
	46	{}
	47	void betaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
	48	{
	49	setDefaultBoundaries(in_number_of_categories);
	50	}
	51
	52
	53	void betaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){
	54
	55	if (fixedBoundaries.size()<2)
	56	errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required");
	57	if (fixedBoundaries[0] > 0.0)
	58	errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero");
	59
	60	_boundary = fixedBoundaries;
	61	if (_boundary[_boundary.size()] > VERYBIG/10000.0)
	62	_boundary[_boundary.size()] = VERYBIG/10000.0; // to avoid overflow
	63
	64	setFixedCategories();
	65	}
	66
	67	void betaDistributionFixedCategories::setFixedCategories() {
	68	fill_mean();
	69	computeRatesProbs();
	70	}
	71
	72	void betaDistributionFixedCategories::fill_mean()
	73	{
	74	int numOfCategories = _boundary.size()-1;
	75	if (numOfCategories == 0)
	76	errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized");
	77	_rates.clear();
	78	_rates.resize(numOfCategories,0.0);
	79	int cat;
	80	for (cat=0; cat<numOfCategories; ++cat) {
	81	_rates[cat] = (_boundary[cat]+_boundary[cat+1])/2.0;
	82	}
	83
	84	}
	85
	86
	87	// this function is here to override the inherited function
	88	// note that the rates themselves and the boundaries do not change.
	89	// the number of categories cannot be changed, since fixed categories must be given before
	90	void betaDistributionFixedCategories::setBetaParameters (int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
	91	if (in_number_of_categories==1) {
	92	_rates[0] = 1.0;
	93	return;
	94	}
	95	if (in_number_of_categories != categories())
	96	errorMsg::reportError("betaDistributionFixedCategories::setGammaParameters: the number of categories cannot be changed, first call setFixedCategories");
	97	if ((in_alpha == _alpha) && (in_beta == _beta))
	98	return;
	99
	100	if (in_alpha < MINIMUM_ALPHA_PARAM)
	101	in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflow problems
	102	if (in_beta < MINIMUM_ALPHA_PARAM)
	103	in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
	104
	105	_alpha = in_alpha;
	106	_beta = in_beta;
	107	computeRatesProbs();
	108	}
	109
	110	void betaDistributionFixedCategories::computeRatesProbs(){
	111	MDOUBLE totalProb = 0.0;
	112	MDOUBLE catProb = 0.0;
	113	MDOUBLE lowerBoundaryProb = 0.0;
	114	MDOUBLE upperBoundaryProb = 0.0;
	115	int cat;
	116	_ratesProb.clear();
	117	_ratesProb.resize(categories());
	118	for (cat = 0; cat < categories()-1; ++cat) {
	119	upperBoundaryProb = getCumulativeProb(_boundary[cat+1]);
	120	catProb = upperBoundaryProb - lowerBoundaryProb;
	121	_ratesProb[cat] = catProb;
	122	totalProb += catProb;
	123	lowerBoundaryProb = upperBoundaryProb;
	124	}
	125	_ratesProb[cat] = 1.0 - totalProb;
	126	}
	127
	128	void betaDistributionFixedCategories::setDefaultBoundaries(int catNum)
	129	{
	130	_boundary.clear();
	131	_boundary.resize(catNum+1,0.0);
	132	_boundary[0] = 0;
	133	_boundary[catNum] = 1.0;
	134	switch (catNum)
	135	{
	136	case 1:
	137	break;
	138	case 2:
	139	_boundary[1] = 0.5;
	140	break;
	141	case 10:
	142	_boundary[1] = 0.1;
	143	_boundary[2] = 0.2;
	144	_boundary[3] = 0.3;
	145	_boundary[4] = 0.4;
	146	_boundary[5] = 0.5;
	147	_boundary[6] = 0.6;
	148	_boundary[7] = 0.7;
	149	_boundary[8] = 0.8;
	150	_boundary[9] = 0.9;
	151	break;
	152	default:
	153	errorMsg::reportError("error in betaDistributionFixedCategories::setDefaultBoundaries");
	154	}
	155
	156	setFixedCategories();
	157	}

+37

-0

libs/phylogeny/betaDistributionFixedCategories.h less more

	0	#ifndef ___BETA_FIXED_CATEGORIES_CATEGORIES
	1	#define ___BETA_FIXED_CATEGORIES_CATEGORIES
	2	/************************************************************
	3	This class differ from the regular betaDistribution in that
	4	the rateCategories are fixed according to the user's decision.
	5	Thus, only the probability of each category change for each specific alpha and beta values but
	6	the rate categories themselves are constant.
	7	************************************************************/
	8	#include "definitions.h"
	9	#include "betaDistribution.h"
	10	#include "errorMsg.h"
	11	class betaDistributionFixedCategories : public betaDistribution {
	12
	13	public:
	14	explicit betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta);
	15	explicit betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta);
	16	explicit betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum);
	17	explicit betaDistributionFixedCategories(const betaDistributionFixedCategories& other);
	18	explicit betaDistributionFixedCategories();
	19	virtual ~betaDistributionFixedCategories() {}
	20	virtual distribution* clone() const { return new betaDistributionFixedCategories(*this); }
	21	virtual void change_number_of_categories(int in_number_of_categories);
	22	virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	23	virtual void setFixedCategories(const Vdouble& fixedBoundaries);
	24
	25	protected:
	26	virtual void setDefaultBoundaries(int catNum);
	27	virtual void setFixedCategories();
	28	virtual void fill_mean();
	29	virtual void computeRatesProbs();
	30
	31	};
	32
	33
	34
	35	#endif
	36

+52

-0

libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.cpp less more

	0	#include "betaDistributionFixedCategoriesWithOmegaUniform.h"
	1	#include "errorMsg.h"
	2	#include "gammaUtilities.h"
	3	#include "matrixUtils.h"
	4
	5
	6	betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(const betaDistributionFixedCategoriesOmegaUniform& other)
	7	: _betaDistr(other._betaDistr),_omegaDistr(other._omegaDistr){
	8
	9	}
	10
	11	betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(int betaDistrCatNum,MDOUBLE alpha,MDOUBLE beta,
	12	int omegaCatNum,MDOUBLE omegaLowerBound,MDOUBLE omegaUpperBound)
	13	{
	14	_betaDistr.setBetaParameters(betaDistrCatNum,alpha,beta);
	15	_omegaDistr.setGlobalRate(1.0);
	16	_omegaDistr.setUniformParameters(omegaCatNum,omegaLowerBound,omegaUpperBound);
	17
	18	}
	19
	20	void betaDistributionFixedCategoriesOmegaUniform::setBetaParameters(int in_number_of_categories, MDOUBLE alpha, MDOUBLE beta)
	21	{
	22	_betaDistr.setBetaParameters(in_number_of_categories,alpha,beta);
	23	}
	24
	25
	26
	27	void betaDistributionFixedCategoriesOmegaUniform::change_number_of_categories(int in_number_of_categories)
	28	{
	29	_betaDistr.change_number_of_categories(in_number_of_categories);
	30	}
	31
	32
	33	const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::ratesProb(const int i_rate) const {
	34	int noBetaDistCat = _betaDistr.categories();
	35	if (i_rate < _betaDistr.categories())
	36	return _betaDistr.ratesProb(i_rate);
	37	else return _omegaDistr.ratesProb(i_rate - noBetaDistCat); //omega prob
	38	}
	39
	40
	41	const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::rates(const int i) const {
	42	int noBetaDistCat = _betaDistr.categories();
	43	if (i < noBetaDistCat)
	44	return _betaDistr.rates(i);
	45	else return _omegaDistr.rates(i - noBetaDistCat); //omega
	46
	47	}
	48
	49	const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::getCumulativeProb(const MDOUBLE x) const {
	50	return _betaDistr.getCumulativeProb(x);
	51	}⏎

+53

-0

libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.h less more

	0	#ifndef ___BETA_DISTR_FIXED_CATEGORIES_OMEGA_UNIFORM
	1	#define ___BETA_DISTR_FIXED_CATEGORIES_OMEGA_UNIFORM
	2	/************************************************************
	3	This class differ from the regular betaOmegaDistribution in that
	4	the rateCategories are fixed according to the user's decision.
	5	Thus, only the probability of each category changes for each specific alpha value but
	6	the rate categories themselves are constant.
	7	************************************************************/
	8	#include "definitions.h"
	9	#include "betaDistributionFixedCategories.h"
	10	#include "uniformDistribution.h"
	11	#include "errorMsg.h"
	12
	13
	14	class betaDistributionFixedCategoriesOmegaUniform : public distribution {
	15	public:
	16
	17	explicit betaDistributionFixedCategoriesOmegaUniform(const betaDistributionFixedCategoriesOmegaUniform& other);
	18	explicit betaDistributionFixedCategoriesOmegaUniform(int betaDistrCatNum,MDOUBLE alpha,MDOUBLE beta,
	19	int omegaCatNum =10,MDOUBLE omegaLowerBound = 1,MDOUBLE omegaUpperBound = 11);
	20	explicit betaDistributionFixedCategoriesOmegaUniform() {};
	21	virtual ~betaDistributionFixedCategoriesOmegaUniform() {};
	22	virtual distribution* clone() const { return new betaDistributionFixedCategoriesOmegaUniform(*this); }
	23	virtual void change_number_of_categories(int in_number_of_categories);
	24	virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	25
	26	virtual const int categories() const {return _betaDistr.categories()+ _omegaDistr.categories();}
	27	virtual const int betaCategories()const {return _betaDistr.categories();};
	28	virtual const MDOUBLE rates(const int i) const;
	29	virtual const MDOUBLE ratesProb(const int i_rate) const;
	30	virtual void setGlobalRate(const MDOUBLE x) {_betaDistr.setGlobalRate(x);}
	31	virtual MDOUBLE getGlobalRate()const {return _betaDistr.getGlobalRate();}
	32	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	33	virtual void setAlpha(MDOUBLE newAlpha){ _betaDistr.setAlpha(newAlpha);}
	34	virtual MDOUBLE getAlpha() const {return _betaDistr.getAlpha();};
	35	virtual void setBeta(MDOUBLE newBeta){_betaDistr.setBeta(newBeta);}
	36	virtual MDOUBLE getBeta() const {return _betaDistr.getBeta();};
	37	virtual MDOUBLE getBorder(const int i) const {return _betaDistr.getBorder(i);} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
	38	//virtual MDOUBLE getOmegai() const ;
	39	//virtual MDOUBLE getBetaProbi() const ;
	40	//virtual void setOmegai(MDOUBLE omega);
	41	//virtual void setBetaProbi(MDOUBLE betaProb);
	42
	43
	44	private:
	45	betaDistributionFixedCategories _betaDistr; //10 fixed cat 0.05, 0.15, 0.25 ...,0.95
	46	uniformDistribution _omegaDistr; // w ~ U(1,11) with 10 cat
	47	};
	48
	49
	50
	51	#endif
	52

+61

-0

libs/phylogeny/betaOmegaDistribution.cpp less more

	0	// $Id: betaOmegaDistribution.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "betaOmegaDistribution.h"
	3	#include "gammaUtilities.h"
	4	#include "betaUtilities.h"
	5	#include "errorMsg.h"
	6	#include "logFile.h"
	7	#include <cmath>
	8
	9
	10	betaOmegaDistribution::betaOmegaDistribution()
	11	{
	12	_omega=1;
	13	_betaProb = 0.5;
	14	}
	15
	16	// note that the order of initalization makes a diffrence.
	17	betaOmegaDistribution::betaOmegaDistribution(const betaOmegaDistribution& other) :
	18	_betaDistr(other._betaDistr),
	19	_omega(other._omega),
	20	_betaProb(other._betaProb){
	21	}
	22
	23	betaOmegaDistribution::betaOmegaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega) :distribution(){
	24	_omega = omega;
	25	_betaProb = betaProb;
	26	_betaDistr.setGlobalRate(1.0);
	27	_betaDistr.setBetaParameters(in_number_of_categories,alpha,beta);
	28	}
	29
	30	betaOmegaDistribution::~betaOmegaDistribution() {}
	31
	32
	33	void betaOmegaDistribution::setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega){
	34	_omega = omega;
	35	_betaProb = betaProb;
	36	_betaDistr.setBetaParameters(in_number_of_categories, alpha, beta);
	37
	38	}
	39	const MDOUBLE betaOmegaDistribution::ratesProb(const int i) const {
	40	if (i < _betaDistr.categories())
	41	return _betaDistr.ratesProb(i)*_betaProb;
	42	else return (1-_betaProb); //omega prob
	43	}
	44
	45
	46	const MDOUBLE betaOmegaDistribution::rates(const int i) const {
	47	if (i < _betaDistr.categories())
	48	return _betaDistr.rates(i);
	49	else return _omega; //omega
	50	}
	51
	52
	53
	54	const MDOUBLE betaOmegaDistribution::getCumulativeProb(const MDOUBLE x) const
	55	{ return _betaDistr.getCumulativeProb(x);
	56	}
	57
	58
	59
	60

+56

-0

libs/phylogeny/betaOmegaDistribution.h less more

	0	// $Id: betaOmegaDistribution.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___BETA_OMEGA_DIST
	3	#define ___BETA_OMEGA_DIST
	4	/************************************************************
	5	This distribution can take several forms depending on its free parameters alpha,beta
	6	For an extensive exlpanation of this distribution
	7	see http://mathworld.wolfram.com/BetaDistribution.html
	8	************************************************************/
	9	#include "definitions.h"
	10	#include "distribution.h"
	11	#include "betaDistribution.h"
	12
	13	#include "logFile.h"
	14
	15	using namespace std;
	16
	17
	18	class betaOmegaDistribution : public distribution {
	19
	20	public:
	21	explicit betaOmegaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega);
	22	explicit betaOmegaDistribution(const betaOmegaDistribution& other);
	23	explicit betaOmegaDistribution();
	24	virtual ~betaOmegaDistribution();
	25	virtual void setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega);
	26	virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta){_betaDistr.setBetaParameters(numOfCategories,alpha,beta);}
	27
	28	virtual const int categories() const {return _betaDistr.categories()+1;}
	29	virtual const MDOUBLE rates(const int i) const;
	30	virtual const MDOUBLE ratesProb(const int i) const;
	31	virtual distribution* clone() const { return new betaOmegaDistribution(*this); }
	32	virtual void setGlobalRate(const MDOUBLE x) {_betaDistr.setGlobalRate(x);}
	33	virtual MDOUBLE getGlobalRate()const {return _betaDistr.getGlobalRate();}
	34	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	35	virtual void setAlpha(MDOUBLE newAlpha){ _betaDistr.setAlpha(newAlpha);}
	36	virtual MDOUBLE getAlpha() const {return _betaDistr.getAlpha();};
	37	virtual void setBeta(MDOUBLE newBeta){_betaDistr.setBeta(newBeta);}
	38	virtual MDOUBLE getBeta() const {return _betaDistr.getBeta();};
	39	virtual void change_number_of_categories(int in_number_of_categories){_betaDistr.change_number_of_categories(in_number_of_categories);}
	40	virtual MDOUBLE getBorder(const int i) const {return _betaDistr.getBorder(i);} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
	41	virtual MDOUBLE getOmega() const {return _omega;}
	42	virtual MDOUBLE getBetaProb() const {return _betaProb;};
	43	virtual void setOmega(MDOUBLE omega) { _omega = omega;};
	44	virtual void setBetaProb(MDOUBLE betaProb) { _betaProb = betaProb;};
	45
	46	private:
	47	betaDistribution _betaDistr;
	48	MDOUBLE _omega;
	49	MDOUBLE _betaProb;
	50	};
	51
	52
	53
	54	#endif
	55

+174

-0

libs/phylogeny/betaUtilities.cpp less more

	0	// $Id: betaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "definitions.h"
	2	#include "betaUtilities.h"
	3	#include "gammaUtilities.h"
	4	#include "logFile.h"
	5	#include "errorMsg.h"
	6	#include <cmath>
	7
	8	/******************************
	9	Computes the inverse of the beta CDF: given a prob. value, calculates the x for which
	10	the integral over 0 to x of beta CDF = prob.
	11	Adapted from:
	12	1. Majumder and Bhattacharjee (1973) App. Stat. 22(3) 411-414
	13	and the corrections:
	14	2. Cran et al. (1977) App. Stat. 26(1) 111-114
	15	3. Berry et al. (1990) App. Stat. 39(2) 309-310
	16	and another adaptation made in the code of Yang (tools.c)
	17	****************************/
	18	MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob){
	19	if(a<0 \|\| b<0 \|\| prob<0 \|\| prob>1) {
	20	errorMsg::reportError("error in inverseCDFBeta,illegal parameter");
	21	}
	22	if (prob == 0 \|\| prob == 1)
	23	return prob;
	24
	25	int maxIter=100;
	26	MDOUBLE epsilonLow=1e-300;
	27	MDOUBLE fpu=3e-308;
	28
	29	/****** changing the tail direction (prob=1-prob)*/
	30	bool tail=false;
	31	MDOUBLE probA=prob;
	32	if (prob > 0.5) {
	33	prob = 1.0 - prob;
	34	tail = true;
	35	MDOUBLE tmp=a;
	36	a=b;
	37	b=tmp;
	38	}
	39	MDOUBLE lnBetaVal=betaln(a,b);
	40	MDOUBLE x;
	41
	42	/****** calculating chi square evaluator */
	43	MDOUBLE r = sqrt(-log(prob * prob));
	44	MDOUBLE y = r - (2.30753+0.27061r)/(1.+ (0.99229+0.04481r) * r);
	45
	46	MDOUBLE chiSquare = 1.0/(9.0 * b);
	47	chiSquare = b2 pow(1.0 - chiSquare + y * sqrt(chiSquare), 3.0);
	48	// MDOUBLE chiSquare2=gammq(b,prob/2.0); //chi square valued of prob with 2q df
	49	MDOUBLE T=(4.0a+2.0b-2)/chiSquare;
	50
	51
	52	/****** initializing x0 */
	53	if (a > 1.0 && b > 1.0) {
	54	r = (y * y - 3.) / 6.;
	55	MDOUBLE s = 1. / (a*2. - 1.);
	56	MDOUBLE t = 1. / (b*2. - 1.);
	57	MDOUBLE h = 2. / (s + t);
	58	MDOUBLE w = y * sqrt(h + r) / h - (t - s) * (r + 5./6. - 2./(3.*h));
	59	x = a / (a + b * exp(w + w));
	60	}
	61	else {
	62	if (chiSquare<0){
	63	x=exp((log(b*(1-prob))+lnBetaVal)/b);
	64	}
	65	else if (T<1){
	66	x=exp((log(prob*a)+lnBetaVal)/a);
	67	}
	68	else {
	69	x=(T-1.0)/(T+1.0);
	70	}
	71	}
	72
	73	if(x<=fpu \|\| x>=1-2.22e-16) x=(prob+0.5)/2; // 0<x<1 but to avoid underflow a little smaller
	74
	75	/****** iterating with a modified version of newton-raphson */
	76	MDOUBLE adj, newX=x, prev=0;
	77	MDOUBLE yprev = 0.;
	78	adj = 1.;
	79
	80	MDOUBLE eps = pow(10., -13. - 2.5/(probA * probA) - 0.5/(probA *probA));
	81	eps = (eps>epsilonLow?eps:epsilonLow);
	82
	83	for (int i=0; i<maxIter; i++) {
	84	y = incompleteBeta(a,b,x);
	85	y = (y - prob) *
	86	exp(lnBetaVal + (1.0-a) * log(x) + (1.0-b) * log(1.0 - x)); //the classical newton-raphson formula
	87	if (y * yprev <= 0)
	88	prev = (fabs(adj)>fpu?fabs(adj):fpu);
	89	MDOUBLE g = 1;
	90	for (int j=0; j<maxIter; j++) {
	91	adj = g * y;
	92	if (fabs(adj) < prev) {
	93	newX = x - adj; // new x
	94	if (newX >= 0. && newX <= 1.) {
	95	if (prev <= eps \|\| fabs(y) <= eps) return(tail?1.0-x:x);;
	96	if (newX != 0. && newX != 1.0) break;
	97	}
	98	}
	99	g /= 3.;
	100	}
	101	if (fabs(newX-x)<fpu)
	102	return (tail?1.0-x:x);;
	103	x = newX;
	104	yprev = y;
	105	}
	106	return (tail?1.0-x:x);
	107	}
	108
	109
	110	/******************************
	111	Computes the average r value in percentile k whose boundaries are leftBound and rightBound
	112	****************************/
	113	MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k){
	114	MDOUBLE tmp;
	115	tmp= incompleteBeta(alpha+1,beta,rightBound) - incompleteBeta(alpha+1,beta,leftBound);
	116	tmp= (tmpalpha/(alpha+beta))k;
	117	return tmp;
	118	}
	119	/******************************
	120	Computes the integral from 0 to x over the beta CDF:
	121	(1/Beta(alpha,beta))x^(alpha-1)*(1-x)^(beta-1) where
	122	Beta(a,b)=Gamma(a)*Gamma(b)/Gamma(a+b)
	123	****************************/
	124	MDOUBLE incompleteBeta(MDOUBLE alpha, MDOUBLE beta, MDOUBLE x){
	125	MDOUBLE tmp;
	126	if (x<0 \|\| x>1) {
	127	LOG(5,<<"Error in function incompleteBeta : invalid x = "<<x<<" alpha = "<<alpha<<" beta= "<<beta<<endl);
	128	errorMsg::reportError("Error in function incompleteBeta : invalid x");
	129	}
	130	if (x==0 \|\| x==1) tmp=0.0;
	131	else tmp=exp(alphalog(x)+betalog(1-x)-betaln(alpha,beta));
	132
	133	if (x<((alpha+1)/(alpha+beta+2))) return tmp*betacf(alpha,beta,x)/alpha;
	134	return 1-tmp*betacf(beta,alpha,1-x)/beta;
	135	}
	136	MDOUBLE betacf(MDOUBLE a, MDOUBLE b, MDOUBLE x){
	137	int m, m2;
	138	MDOUBLE aa,c,d,del,h,qab,qam,qap;
	139	qab = a+b;
	140	qap = a+1;
	141	qam = a-1;
	142	c=1;
	143	d=1-qab*x/qap;
	144	if (fabs(d)<FPMIN) d=FPMIN;
	145	d=1.0/d;
	146	h=d;
	147	for(m=1;m<=ITMAX;m++){
	148	m2=2*m;
	149	aa=m(b-m)x/((qam+m2)*(a+m2));
	150	d = 1.0+aa*d;
	151	if (fabs(d)<FPMIN) d = FPMIN;
	152	c=1.0 + aa/c;
	153	if (fabs(c)<FPMIN) c = FPMIN;
	154	d = 1.0/d;
	155	h = dc;
	156	aa = -(a+m)(qab+m)x/((a+m2)*(qap+m2));
	157	d = 1.0+aa*d;
	158	if (fabs(d)<FPMIN) d = FPMIN;
	159	c = 1.0 + aa/c;
	160	if (fabs(c)<FPMIN) c = FPMIN;
	161	d = 1.0/d;
	162	del = d*c;
	163	h*=del;
	164	if (fabs(del-1.0) <= EPS) break;
	165	}
	166	if (m > ITMAX) LOG(5,<<"Error in function betacf : alpha \|\| beta big \|\|MAXIT small"<<endl);
	167	return h;
	168	}
	169
	170	MDOUBLE betaln(MDOUBLE alpha, MDOUBLE beta){
	171	return gammln(alpha)+gammln(beta)-gammln(alpha+beta);
	172	}
	173

+21

-0

libs/phylogeny/betaUtilities.h less more

	0	// $Id: betaUtilities.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___BETA_UTILITIES
	2	#define ___BETA_UTILITIES
	3
	4	#include "definitions.h"
	5	#include "numRec.h"
	6
	7	/******************************************************************************
	8	beta utilities include calculating inverse of the beta cdf and calculation of mean values
	9	used mainly in building the gamma function and creating categories within it
	10	******************************************************************************/
	11
	12	MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob);
	13	MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k);
	14	MDOUBLE incompleteBeta(MDOUBLE alpha, MDOUBLE beta, MDOUBLE x);
	15	MDOUBLE betacf(MDOUBLE a, MDOUBLE b, MDOUBLE x);
	16	MDOUBLE betaln(MDOUBLE alpha, MDOUBLE beta);
	17
	18
	19
	20	#endif

+227

-0

libs/phylogeny/bootstrap.cpp less more

	0	// $Id: bootstrap.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "someUtil.h"
	4	#include "bootstrap.h"
	5	#include "splitTreeUtil.h"
	6	#include <algorithm>
	7	#include <set>
	8	using namespace std;
	9
	10	// -----------------------------------------------------------------------------------------
	11	// ----------------------------- The constructor and its related functions -----------------
	12	// -----------------------------------------------------------------------------------------
	13
	14	bootstrap::bootstrap(const treeVec& treevect):_numTrees(0), _nTaxa(0){
	15	fillFromTreeVec(treevect);
	16	}
	17	bootstrap::bootstrap (const string& filename):_numTrees(0), _nTaxa(0){
	18	fillFromTreeVec(getStartingTreeVecFromFile(filename));
	19	}
	20
	21	void bootstrap::fillFromTreeVec(const treeVec& treevect) {
	22	// for each tree, we compute the set of all splits.
	23	// we update for each split in each tree the split-map.
	24	// so we have the frequency of each split.
	25	for (treeVec::const_iterator i=treevect.begin();i!=treevect.end();++i)
	26	splitTree(*i);
	27	}
	28
	29	// takes a tree, computes all splits and
	30	// enter them into the Splits map
	31	void bootstrap::splitTree(const tree& T){
	32	_numTrees++;
	33	updateNtaxaAndNameMapAndValidateConsistency(T);
	34	splitSubTreeRecursivly(T.getRoot(), true); // the true because we call the recursion with the root. Otherwise it is false;
	35	}
	36
	37	void bootstrap::updateNtaxaAndNameMapAndValidateConsistency(const tree& T) {
	38	if (!_nTaxa) { // only for the first tree, this part intializes the _nameMap and the _nTaxa
	39	_sequenceNames = getSequencesNames(T);
	40	for (_nTaxa=0;_nTaxa<_sequenceNames.size();++_nTaxa) {
	41	_nameMap[_sequenceNames[_nTaxa]] =_nTaxa;
	42	}
	43	}
	44	else {
	45	vector<string> namesInT1 = getSequencesNames(T);
	46	if (namesInT1.size() < _nameMap.size()) {
	47	string errMs1 = "Not all trees have the same number of sequences. ";
	48	errMs1 += "tree number 1 has: ";
	49	errMs1 += int2string(_nameMap.size());
	50	errMs1 += " while tree number: ";
	51	errMs1 += int2string(_numTrees);
	52	errMs1 += " has ";
	53	errMs1 += int2string(namesInT1.size());
	54	errMs1 += "\nError in function bootstrap::splitTree";
	55	errorMsg::reportError(errMs1);
	56	}
	57	for (int i=0; i < namesInT1.size(); ++i) {
	58	if (_nameMap.count(namesInT1[i])==0) {
	59	string errMs = "The taxa ";
	60	errMs += namesInT1[i];
	61	errMs += " found in tree number ";
	62	errMs += int2string(_numTrees);
	63	errMs += " is not present in the first tree. Error in function bootstrap::splitTree";
	64	errorMsg::reportError(errMs);
	65	}
	66	}
	67	}
	68	}
	69
	70	set<int> bootstrap::splitSubTreeRecursivly(const tree::nodeP &n,
	71	const bool isRoot) {//false
	72	// this function assumes that the root of the tree is not a leaf
	73	set<int> s; // the id of all leaves of the subtree of the nodeP n.
	74	for(int i=0; i<n->getNumberOfSons() ;++i) {
	75	set<int> sonSet(splitSubTreeRecursivly(n->getSon(i)));
	76	set<int>::iterator it = sonSet.begin();
	77	for (; it != sonSet.end(); ++it) s.insert(*it);
	78	}
	79	if(isRoot) return s;
	80	if (n->isLeaf()) {
	81	s.insert(idFromName(n->name()));
	82	} else { // this avoids keeping track of trivial splits.
	83	set<int>::const_iterator sBeg(s.begin());
	84	set<int>::const_iterator sEnd(s.end());
	85	split sp(sBeg,sEnd,_nTaxa);
	86	_Splits.add(sp);
	87	}
	88	return(s);
	89	}
	90
	91	// -----------------------------------------------------------------------------------------
	92	// ----------------------------- getWeightsForTree -----------------------------------------
	93	// -----------------------------------------------------------------------------------------
	94
	95	map<int, MDOUBLE> bootstrap::getWeightsForTree(const tree& inTree) const {
	96	map<int, MDOUBLE> v;
	97	recursivelyBuiltBPMap(inTree.getRoot(), v);
	98	return (v);
	99	}
	100
	101	// the function returns the ids of the leaves in the subtree defined by rootOfSubtree.
	102	set<int> bootstrap::recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map<int, MDOUBLE> &v) const {
	103	set<int> s;
	104	for(int i=0;i<rootOfSubtree->getNumberOfSons();++i) {
	105	set<int> sonSet(recursivelyBuiltBPMap(rootOfSubtree->getSon(i),v));
	106	set<int>::iterator it = sonSet.begin();
	107	for (; it != sonSet.end(); ++it) s.insert(*it);
	108	}
	109	if (rootOfSubtree->isLeaf()) {
	110	s.insert(idFromName(rootOfSubtree->name()));
	111	}
	112	set<int>::const_iterator sBeg(s.begin());
	113	set<int>::const_iterator sEnd(s.end());
	114	split sp(sBeg,sEnd,_nTaxa);
	115	v[rootOfSubtree->id()]=(static_cast<MDOUBLE>(_Splits.counts(sp)))/_numTrees;
	116	return(s);
	117	}
	118
	119	// We get different trees, and the id's are not consistent among different trees.
	120	// here, we map a name to a single id.
	121	int bootstrap::idFromName(const string & name) const {
	122	NameMap_t::const_iterator i(_nameMap.find(name));
	123	if (i==_nameMap.end()) {
	124	string s="Can not find an Id for the taxa name:";
	125	s+=name;
	126	s+="\n error in function bootstrap::idFromName\n";
	127	errorMsg::reportError(s);
	128	}
	129	return(i->second);
	130	}
	131
	132	// -----------------------------------------------------------------------------------------
	133	// ----------------------------- Printing the bp ------------------------------------------
	134	// -----------------------------------------------------------------------------------------
	135
	136	void bootstrap::print(ostream& sout){// = cout
	137	_Splits.print(sout);
	138	}
	139
	140	void bootstrap::printTreeWithBPvalues(ostream &out, const tree &t, const map<int, MDOUBLE> & v, const bool printBranchLenght) const{
	141	recursivlyPrintTreeWithBPvalues(out,t.getRoot(),v, printBranchLenght);
	142	out<<";";
	143	}
	144
	145	void bootstrap::recursivlyPrintTreeWithBPvalues(ostream &out,
	146	const tree::nodeP &myNode,
	147	const map<int, MDOUBLE> &v,
	148	const bool printBranchLenght) const {
	149	if (myNode->isLeaf()) {
	150	out << myNode->name();
	151	if (printBranchLenght) out << ":"<<myNode->dis2father();
	152	return;
	153	} else {
	154	out <<"(";
	155	for (int i=0;i<myNode->getNumberOfSons();++i) {
	156	if (i>0) out <<",";
	157	recursivlyPrintTreeWithBPvalues(out, myNode->getSon(i),v, printBranchLenght);
	158	}
	159	out <<")";
	160	if (myNode->isRoot()==false) {
	161	if (printBranchLenght) out<<":"<<myNode->dis2father();
	162	map<int,MDOUBLE>::const_iterator val=v.find(myNode->id());
	163	if ((val!=v.end()) && val->second>0.0) {
	164	out << "["<<val->second<<"]";
	165	}
	166	}
	167	}
	168	}
	169
	170	// for DEBUGGING ONLY:
	171	void bootstrap::print_names(ostream &out) const {
	172	NameMap_t::const_iterator i(_nameMap.begin());
	173	for (;i!=_nameMap.end();++i)
	174	out << "{"<<i->first<<" = "<<i->second<<"}"<<endl;
	175	}
	176
	177	// -----------------------------------------------------------------------------------------
	178	// ----------------------------- Building consensus tree ----------------------------------
	179	// -----------------------------------------------------------------------------------------
	180	// returns the bp values of the consensus tree.
	181	// the idea is to start from the split map, extract a split at a time.
	182	// first, the splits with the highest bp (i.e., in a sorted way).
	183	// Each splits is checked for compatibility with the consensus tree constructed so far.
	184	// if it is compatible, it is added to the consensus.
	185	// Otherwise - it is discarded.
	186	// returns the consensus tree
	187	tree bootstrap::consensusTree(const MDOUBLE threshold) const {// =0.5
	188	// 1. get the names of the sequences
	189	vector<string> names;
	190	for (NameMap_t::const_iterator i(_nameMap.begin());i!=_nameMap.end();++i)
	191	names.push_back(i->first);
	192
	193	// 2. create a star tree
	194	tree res = starTree(names);
	195
	196	// 3. get the sorted vector of the splits from which the consensus is to be built.
	197	vector<pair<split,int> > sortedSplits = _Splits.sortSplits();
	198	// 4. get a list of compatible splits
	199	MDOUBLE thresholdForNumTrees = threshold * _numTrees;
	200
	201	vector<split> consensus;
	202	for (int k=0; k < sortedSplits.size(); ++k) {
	203	bool compatible = true;
	204	if (sortedSplits[k].second < thresholdForNumTrees) break;
	205
	206	for (vector<split>::const_iterator j=consensus.begin(); j != consensus.end(); ++j) {
	207	if (!(sortedSplits[k].first.compatible(*j))) {
	208	compatible=false;
	209	break;
	210	}
	211	}
	212	if (compatible) {
	213	consensus.push_back(sortedSplits[k].first);
	214	}
	215	}
	216
	217	// 5. Now we build a tree from all the compatible splits
	218
	219	for (vector<split>::iterator i1 = consensus.begin();i1!=consensus.end();++i1) {
	220	applySplit(res,*i1,_nameMap);
	221	}
	222	res.create_names_to_internal_nodes();
	223	res.makeSureAllBranchesArePositive();
	224
	225	return (res);
	226	}

+82

-0

libs/phylogeny/bootstrap.h less more

	0	// $Id: bootstrap.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___BOOTSTRAP
	3	#define ___BOOTSTRAP
	4
	5	#include "definitions.h"
	6	#include "split.h"
	7	#include "splitMap.h"
	8	#include "tree.h"
	9	#include "treeUtil.h"
	10	#include <sstream>
	11	using namespace std;
	12
	13	// this class gets as input many trees and can answer questions such as
	14	// 1. the bootstrap value (bp) of a tree
	15	// 2. the bp of a split
	16	// 3. can reconstruct a multifurcating consensus trees.
	17	// We note that 3 can always be done if done only on those splits with bp > 50%
	18	// In this case there is only one tree.
	19	// If the treshold value is <= 50% there might be more than one tree for which
	20	// all splits on this tree have bp>= treshold.
	21	// In this case we want to give the tree with the highest sum of bp.
	22	// This is probably NP hard, and we use a greedy search to chose
	23	// this tree.
	24
	25	class bootstrap {
	26	public:
	27	typedef vector<tree> treeVec;
	28	explicit bootstrap(const treeVec& treevect); // constructor
	29
	30	// this construction is the same as above, but it reads the trees from
	31	// an input file.
	32	explicit bootstrap (const string& filename);
	33
	34	// give a tree and return a map from each edge to a bp value.
	35	// edge 5 is the edge between node id 5 and its father.
	36	map<int, MDOUBLE> getWeightsForTree(const tree& inTree) const;
	37
	38
	39	// give a threshold >= 0.5 and get a concensus tree with all splits
	40	// that are more confident then the threshold.
	41	tree consensusTree(const MDOUBLE threshold = 0.5) const;
	42
	43	void print(ostream& sout = cout);
	44	void printTreeWithBPvalues(ostream &os, const tree &t, const map<int, MDOUBLE> & v, const bool printBranchLenght=true) const;
	45
	46	void print_names(ostream &os) const;
	47
	48
	49	private:
	50
	51
	52
	53
	54	void fillFromTreeVec(const treeVec& treevect);
	55	int idFromName (const string & name) const;
	56
	57
	58	set<int> recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map<int, MDOUBLE> &v) const;
	59	set<int> splitSubTreeRecursivly(const tree::nodeP &n, const bool isRoot=false); // this function assumes that the tree is rooted not in a leaf
	60	// take tree, compute all splits and enter them into the Splits map
	61	void splitTree(const tree& T);
	62	void recursivlyPrintTreeWithBPvalues(ostream &os,
	63	const tree::nodeP &nP,
	64	const map<int, MDOUBLE> &v,
	65	const bool printBranchLenght) const;
	66	void getTreeNodes(const tree& t) const ; // note that _allTree_nodes is mutable
	67	void updateNtaxaAndNameMapAndValidateConsistency(const tree& T);
	68
	69	int _numTrees; // total number of trees
	70	splitMap _Splits;
	71	typedef map<string,int> NameMap_t;
	72	NameMap_t _nameMap; // this is a map from the names of the sequences to integers.
	73	int _nTaxa;
	74	mutable vector<int> _id2TreeId, _treeId2Id;
	75	vector<string> _sequenceNames; // the names of the sequences.
	76	};
	77
	78
	79
	80	#endif // ___BOOTSTRAP
	81

+212

-0

libs/phylogeny/chebyshevAccelerator.cpp less more

	0	// $Id: chebyshevAccelerator.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "chebyshevAccelerator.h"
	3	#include <cmath>
	4	#include <cassert>
	5
	6	chebyshevAccelerator::chebyshevAccelerator(const chebyshevAccelerator& other):
	7	_alphabetSize(other._alphabetSize),
	8	_totalNumOfCoef(other._totalNumOfCoef),
	9	_usingNumberOfCoef(other._usingNumberOfCoef),
	10	_pb(NULL),
	11	_rightRange(other._rightRange),
	12	_leftRange(other._leftRange){
	13	if (other._pb != NULL) _pb = other._pb->clone();
	14	chebi_coff=other.chebi_coff;
	15	chebi_dervation_coff=other.chebi_dervation_coff;
	16	chebi_sec_dervation_coff=other.chebi_sec_dervation_coff;
	17	}
	18
	19	chebyshevAccelerator::chebyshevAccelerator(
	20	replacementModel* pb,
	21	const int alphanetSize,
	22	const int totalNumOfCoef,
	23	const int usingNumberOfCoef,
	24	const MDOUBLE rightRange,
	25	const MDOUBLE leftRange
	26	): _alphabetSize(alphanetSize),
	27	_totalNumOfCoef(totalNumOfCoef), _usingNumberOfCoef(usingNumberOfCoef),_pb(pb->clone()), _rightRange(rightRange), _leftRange(leftRange)
	28	//----------------------------------------------------------------------------------
	29	//input: non
	30	//output: non
	31	//doing: filling the member chebi_coff[][][]; chebi_coff[1][2][4] is the forth
	32	// chebichev coefficient in the chebichev polynom of the function
	33	// slow_pij(1,2,t);
	34	//----------------------------------------------------------------------------------
	35	{
	36	int tmp, tmp1;
	37	for (tmp = 0; tmp < _alphabetSize ; tmp ++) {
	38
	39	chebi_coff.resize(_alphabetSize);
	40	chebi_dervation_coff.resize(_alphabetSize);
	41	chebi_sec_dervation_coff.resize(_alphabetSize);
	42
	43	for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) {
	44	chebi_coff[tmp].resize(_alphabetSize);
	45	chebi_dervation_coff[tmp].resize(_alphabetSize);
	46	chebi_sec_dervation_coff[tmp].resize(_alphabetSize);
	47	for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) {
	48	chebi_coff[tmp][tmp1].resize(_totalNumOfCoef);
	49	chebi_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef);
	50	chebi_sec_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef);
	51	}
	52	}
	53	}
	54
	55
	56	Vdouble coffij(_totalNumOfCoef);
	57	Vdouble coffij_of_derviation(_totalNumOfCoef);
	58	Vdouble coffij_of_second_derivation(_totalNumOfCoef);
	59
	60
	61	for (int from_aa =0; from_aa<_alphabetSize ; ++ from_aa)
	62	{
	63	for (int to_aa =0; to_aa<_alphabetSize ; ++ to_aa)
	64	{
	65	chebft(coffij,_totalNumOfCoef,from_aa,to_aa);
	66	chder(coffij,coffij_of_derviation,_totalNumOfCoef);
	67	chder(coffij_of_derviation,coffij_of_second_derivation,_totalNumOfCoef);
	68
	69	for (int tmp=0; tmp<_totalNumOfCoef;++tmp)
	70	{
	71	chebi_coff[from_aa][to_aa][tmp] = coffij[tmp];
	72	chebi_dervation_coff[from_aa][to_aa][tmp] = coffij_of_derviation[tmp];
	73	chebi_sec_dervation_coff[from_aa][to_aa][tmp] = coffij_of_second_derivation[tmp];
	74	}
	75
	76	}
	77	}
	78	}
	79
	80
	81	void chebyshevAccelerator::chebft(Vdouble& c, int n, int from_aa, int to_aa) {
	82	//----------------------------------------------------------------------------------
	83	//input: c[] is the vector where the cofficient will be
	84	// from aa and to_aa are for chosing the right function to be developed
	85	//output: non
	86	//doing: calculating the chebichev coefficient in the chebichev polynom of the function
	87	// slow_pij(from_aa,to_aa,t), and put them in the c[] vector
	88	//----------------------------------------------------------------------------------
	89	int k,j;
	90	MDOUBLE fac,bpa,bma;
	91
	92	Vdouble f;
	93	f.resize(n);
	94	bma=0.5*(_rightRange-_leftRange);
	95	bpa=0.5*(_rightRange+_leftRange);
	96	for (k=0;k<n;k++) {
	97	MDOUBLE y=cos(3.141592653589793*(k+0.5)/n);
	98	f[k]= _pb->Pij_t(from_aa,to_aa,ybma+bpa); //(func)(y*bma+bpa);
	99	}
	100	fac=2.0/n;
	101	for (j=0;j<n;j++) {
	102	MDOUBLE sum=0.0;
	103	for (k=0;k<n;k++)
	104	sum += f[k]cos(3.141592653589793j*(k+0.5)/n);
	105	c[j]=fac*sum;
	106	}
	107
	108	}
	109
	110
	111	const MDOUBLE chebyshevAccelerator::Pij_t(const int from_aa, const int to_aa, const MDOUBLE x) const
	112	//----------------------------------------------------------------------------------
	113	//input: like pij_t
	114	//output: the probabilty
	115	//doing: calculating with the polinom of chebi and via eigenvalue decomposition
	116	//----------------------------------------------------------------------------------
	117	{
	118
	119	MDOUBLE d=0.0,dd=0.0,sv,y,y2,check;
	120	int j;
	121
	122	if ((x-_leftRange)*(x-_rightRange) > 0.0) {
	123	return _pb->Pij_t(from_aa,to_aa,x);
	124	// errorMsg::reportError("x not in range in routine fast_Pij_t");// also quit the program
	125	}
	126
	127	y2=2.0(y=(2.0x-_leftRange-_rightRange)/(_rightRange-_leftRange));
	128	for (j=_usingNumberOfCoef;j>0;j--) {
	129	sv=d;
	130	d=y2*d-dd+chebi_coff[from_aa][to_aa][j];
	131	dd=sv;
	132	}
	133	check = yd-dd+0.5chebi_coff[from_aa][to_aa][0];
	134	if ((check>1) \|\| (check<=0)) check = _pb->Pij_t(from_aa,to_aa,x);
	135	assert(check<=1);
	136	assert(check>=0);
	137	return check;
	138	}
	139
	140
	141	const MDOUBLE chebyshevAccelerator::dPij_dt(const int from_aa, const int to_aa, const MDOUBLE x) const
	142	//----------------------------------------------------------------------------------
	143	//input: like pij_t
	144	//output: the derivation of probabilty
	145	//doing: calculating with the polinom of chebi and via eigenvalue decomposition
	146	//----------------------------------------------------------------------------------
	147	{
	148
	149	MDOUBLE d=0.0,dd=0.0,sv,y,y2;
	150	int j;
	151
	152	if ((x-_leftRange)*(x-_rightRange) > 0.0) {
	153	return _pb->dPij_dt(from_aa,to_aa,x);
	154	}
	155	y2=2.0(y=(2.0x-_leftRange-_rightRange)/(_rightRange-_leftRange));
	156	for (j=_usingNumberOfCoef;j>0;j--) {
	157	sv=d;
	158	d=y2*d-dd+chebi_dervation_coff[from_aa][to_aa][j];
	159	dd=sv;
	160	}
	161	return yd-dd+0.5chebi_dervation_coff[from_aa][to_aa][0];
	162	}
	163
	164
	165	const MDOUBLE chebyshevAccelerator::d2Pij_dt2(const int from_aa, const int to_aa, const MDOUBLE x) const {
	166	//----------------------------------------------------------------------------------
	167	//input: like pij_t
	168	//output: the second derivation of the probabilty
	169	//doing: calculating with the polynom of chebi and via eigenvalue decomposition
	170	//----------------------------------------------------------------------------------
	171	MDOUBLE d=0.0,dd=0.0,sv,y,y2;
	172	int j;
	173
	174	if ((x-_leftRange)*(x-_rightRange) > 0.0) {
	175	return _pb->d2Pij_dt2(from_aa,to_aa,x);
	176	}
	177	y2=2.0(y=(2.0x-_leftRange-_rightRange)/(_rightRange-_leftRange));
	178	for (j=_usingNumberOfCoef;j>0;j--) {
	179	sv=d;
	180	d=y2*d-dd+chebi_sec_dervation_coff[from_aa][to_aa][j];
	181	dd=sv;
	182	}
	183	return yd-dd+0.5chebi_sec_dervation_coff[from_aa][to_aa][0];
	184	}
	185
	186
	187
	188
	189	void chebyshevAccelerator::chder(Vdouble &c, Vdouble &cder, int n) {
	190	//----------------------------------------------------------------------------------
	191	//input: chebicev coff of f(x) i.e. in c[]. n is the vector size
	192	//output: chebicev coff of df(x)/dx i.e. in cder[]
	193	//doing: calculating the coff of the dervation from the coff of f.
	194	//reference:numercal recepies in c, pg 195.
	195	//----------------------------------------------------------------------------------
	196	int j;
	197	MDOUBLE con;
	198
	199	cder[n-1]=0.0;
	200	cder[n-2]=2(n-1)c[n-1];
	201	for (j=n-3;j>=0;j--)
	202	cder[j]=cder[j+2]+2(j+1)c[j+1];
	203	con=2.0f/(_rightRange-_leftRange);
	204	for (j=0;j<n;j++)
	205	cder[j] *= con;
	206	}
	207
	208
	209
	210
	211

+48

-0

libs/phylogeny/chebyshevAccelerator.h less more

	0	// $Id: chebyshevAccelerator.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___CHEBYSHEV_ACCELERATOR
	3	#define ___CHEBYSHEV_ACCELERATOR
	4
	5	#include "pijAccelerator.h"
	6	#include "replacementModel.h"
	7
	8	class chebyshevAccelerator : public pijAccelerator {
	9	public:
	10
	11	explicit chebyshevAccelerator( replacementModel* pb,
	12	const int alphanetSize=20,
	13	const int totalNumOfCoef=60,
	14	const int usingNumberOfCoef=13,
	15	const MDOUBLE rightRange=0,const MDOUBLE leftRange=2);
	16	chebyshevAccelerator(const chebyshevAccelerator& other);
	17	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
	18	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
	19	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
	20	const MDOUBLE freq(const int i) const {return _pb->freq(i);}
	21	virtual pijAccelerator* clone() const { return new chebyshevAccelerator(*this); }
	22	virtual ~chebyshevAccelerator() {delete _pb;}
	23	virtual replacementModel* getReplacementModel() const {return (_pb);}
	24	virtual const int alphabetSize() const {return _pb->alphabetSize();}
	25
	26	private:
	27	VVVdouble chebi_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
	28	VVVdouble chebi_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
	29	VVVdouble chebi_sec_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
	30
	31	const int _alphabetSize;
	32	const int _totalNumOfCoef;
	33	const int _usingNumberOfCoef;
	34
	35	replacementModel* _pb;
	36
	37	void chebft(Vdouble& c, int n, int from_aa, int to_aa);
	38	void chder(Vdouble &c, Vdouble &cder, int n);
	39
	40	const MDOUBLE _rightRange;
	41	const MDOUBLE _leftRange;
	42
	43	};
	44
	45	// This is an accelerator of Pij(t) calculation, using a proximity to polynomial.
	46	#endif
	47

+106

-0

libs/phylogeny/checkcovFanctors.h less more

	0	// $Id: checkcovFanctors.h 6634 2009-07-20 07:00:05Z osnatz $
	1
	2	#ifndef ____CHECKCOV__FANCTORS
	3	#define ____CHECKCOV__FANCTORS
	4	#include "definitions.h"
	5	#include "tree.h"
	6
	7	#include "likelihoodComputation.h"
	8	using namespace likelihoodComputation;
	9	#include "sequenceContainer.h"
	10	#include "stochasticProcess.h"
	11	#include "logFile.h"
	12
	13	#include <cmath>
	14
	15	//#define VERBOS
	16
	17	#ifdef VERBOS
	18	#include <iostream>
	19	using namespace std;
	20	#endif
	21
	22	class Cevaluate_L_given_r{
	23	public:
	24	explicit Cevaluate_L_given_r( const sequenceContainer& sd,
	25	const tree& t1,
	26	const stochasticProcess& sp,
	27	const int pos)
	28	:_sd(sd),_t1(t1),_pos(pos), _sp(sp) {}
	29	private:
	30	const sequenceContainer& _sd;
	31	const tree& _t1;
	32	const int _pos;
	33	const stochasticProcess& _sp;
	34	public:
	35	MDOUBLE operator() (const MDOUBLE r) {
	36
	37	MDOUBLE tmp1= convert(getLofPos(_pos,_t1,_sd,_sp,r));
	38	#ifdef VERBOS
	39	LOG(5,<<" r = "<<r<<" l = "<<tmp1<<endl);
	40	#else
	41	LOG(12,<<" r = "<<r<<" l = "<<tmp1<<endl);
	42	#endif
	43	return -tmp1;
	44	}
	45	};
	46
	47	// THIS FUNCTION IS USED ONLY BY ITAY MAYROSE AND ONLY HE KNOWS WHAT IS INSIDE...
	48	// ONE DAY HE WILL WRITE .DOC FILES...
	49	class Cevaluate_Posterior_given_r {
	50	public:
	51	explicit Cevaluate_Posterior_given_r( const sequenceContainer& seqContainer,
	52	const tree& t1,
	53	const stochasticProcess& sp,
	54	const MDOUBLE alpha,
	55	const int pos)
	56	:m_seqContainer(seqContainer), m_alpha(alpha),m_tree(t1), m_pos(pos), m_sp(sp) {}
	57	public:
	58	MDOUBLE operator() (const MDOUBLE r)
	59	{
	60
	61	MDOUBLE l= convert(getLofPos(m_pos, m_tree, m_seqContainer, m_sp, r));
	62	#ifdef VERBOS
	63	LOG(5,<<" r = "<<r<<" l = "<<l<<endl);
	64	#endif
	65	MDOUBLE prior = exp((-m_alpha) * r) * pow(r, m_alpha - 1);
	66	return -(l * prior);
	67	}
	68
	69	private:
	70	const sequenceContainer& m_seqContainer;
	71	const MDOUBLE m_alpha;
	72	const tree& m_tree;
	73	const int m_pos;
	74	const stochasticProcess& m_sp;
	75
	76	};
	77
	78	// WHEN YOU WANT TWO TREE TO HAVE THE SAME RATE AT A SPECIFIC POSITION.
	79	class Cevaluate_L_sum_given_r{
	80	public:
	81	explicit Cevaluate_L_sum_given_r(const stochasticProcess& sp,
	82	const sequenceContainer& sd1,
	83	const sequenceContainer& sd2,
	84	const tree &inLTree1,
	85	const tree &inLTree2,
	86	const int pos)
	87	:_sp(sp), _sd1(sd1), _sd2(sd2), _tree1(inLTree1),_tree2(inLTree2), _pos(pos){};
	88
	89	private:
	90	const stochasticProcess _sp;
	91	const sequenceContainer _sd1;
	92	const sequenceContainer _sd2;
	93	const tree& _tree1;
	94	const tree& _tree2;
	95	const int _pos;
	96	public:
	97	MDOUBLE operator() (const MDOUBLE r) {
	98	MDOUBLE tmp1= convert(getLofPos(_pos,_tree1,_sd1,_sp,r));
	99	MDOUBLE tmp2= convert(getLofPos(_pos,_tree2,_sd2,_sp,r));
	100	MDOUBLE tmp= tmp1*tmp2;
	101	return -tmp;
	102	}
	103	};
	104
	105	#endif

+47

-0

libs/phylogeny/checkcovFanctorsWithFactors.h less more

	0	// $Id: checkcovFanctorsWithFactors.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ____CHECKCOV__FANCTORS_WITH_FACTORS
	3	#define ____CHECKCOV__FANCTORS_WITH_FACTORS
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "likelihoodComputation.h"
	7	#include "likelihoodComputationFactors.h" //<-new.
	8	using namespace likelihoodComputation;
	9	#include "sequenceContainer.h"
	10	#include "stochasticProcess.h"
	11
	12	//#define VERBOS
	13	#ifdef VERBOS
	14	#include <iostream>
	15	using namespace std;
	16	#endif
	17
	18	// USING FACTORS: THE IDEA HERE IS THAT WHEN WE HAVE TOO MANY SEQUENCES,
	19	// WE MUST TAKE SPECIAL CARE TO USE "FACTORS" AT INTERNAL NODES, TO AVOID UNDERFLOW.
	20	// HERE WE ALSO RETURN LOG LIKELIHOOD OF A POSITION AND NOT THE LIKELIHOOD ITSELF.
	21	class Cevaluate_LOG_L_given_r{
	22	public:
	23	explicit Cevaluate_LOG_L_given_r( const sequenceContainer& sd,
	24	const tree& t1,
	25	const stochasticProcess& sp,
	26	const int pos)
	27	:_sd(sd),_t1(t1),_pos(pos), _sp(sp){}
	28	private:
	29	const sequenceContainer& _sd;
	30	const tree& _t1;
	31	const int _pos;
	32	const stochasticProcess& _sp;
	33	public:
	34	MDOUBLE operator() (const MDOUBLE r) {
	35
	36	MDOUBLE tmp1= getLOG_LofPos(_pos,_t1,_sd,_sp,r);
	37	#ifdef VERBOS
	38	LOG(5,<<" r = "<<r<<" l = "<<tmp1<<endl);
	39	#endif
	40	return -tmp1;
	41	}
	42	};
	43
	44	#endif
	45
	46

+158

-0

libs/phylogeny/clustalFormat.cpp less more

	0	// $Id: clustalFormat.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "clustalFormat.h"
	3	#include "codon.h"
	4	#include "someUtil.h"
	5	#include "errorMsg.h"
	6	#include <map>
	7
	8	sequenceContainer clustalFormat::read(istream &infile, const alphabet* alph) {
	9	sequenceContainer mySeqData = readUnAligned(infile, alph);
	10	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	11	return mySeqData;
	12	}
	13
	14	sequenceContainer clustalFormat::readUnAligned(istream &infile, const alphabet* alph) {
	15	sequenceContainer mySequenceData;
	16
	17	vector<string> seqFileData;
	18	map<string ,string> stringsToAdd; //map that holding for each name last
	19	//one or two nucleotides (when reading codon
	20	//alphabet) of the line in order to add it
	21	//to the next line.
	22	putFileIntoVectorStringArray(infile,seqFileData);
	23	if (seqFileData.empty()){
	24	errorMsg::reportError("unable to open file, or file is empty in clustal format");
	25	}
	26
	27
	28	vector<string>::const_iterator it1= seqFileData.begin();
	29
	30	// make sure that the first 7 chars in the first line is clustal
	31	if (it1->size()<7) errorMsg::reportError("first word in clusltal sequence file format must be clustal",1);
	32	if ( (( (it1)[0] != 'C') && ((it1)[0] != 'c'))
	33	\|\| (((it1)[1] != 'L') && ((it1)[1] != 'l'))
	34	\|\| (((it1)[2] != 'U') && ((it1)[2] != 'u'))
	35	\|\| (((it1)[3] != 'S') && ((it1)[3] != 's'))
	36	\|\| (((it1)[4] != 'T') && ((it1)[4] != 't'))
	37	\|\| (((it1)[5] != 'A') && ((it1)[5] != 'a'))
	38	\|\| (((it1)[6] != 'L') && ((it1)[6] != 'l')) ) {
	39	errorMsg::reportError("first word in clusltal sequence file format must be clustal",1);
	40	}
	41	it1++;
	42
	43	int localid=0;
	44	while (it1!= seqFileData.end()) {
	45	if (it1->empty()) {++it1;continue; }// empty line continue
	46	if ((it1->size() > 1) && ((*it1)[0]==' ')) {++it1;continue; }// remark line
	47	string remark;
	48	string name;
	49
	50	// getFromLineAnameAndAsequence;
	51	string name1;
	52	string stringSeq1;
	53	string::const_iterator it2 = (it1)->begin();
	54	for (; it2 != (it1)->end();++it2) {
	55	if ((*it2)==' ') break;
	56	else name1+=(*it2);
	57	}
	58	if (stringsToAdd.find(name1)!=stringsToAdd.end()) //not new sequence
	59	stringSeq1 = stringsToAdd[name1]; //init stringSeq1 with the nucleotide
	60	//from the previous line
	61	for (; it2 != (it1)->end();++it2) {
	62	if ((*it2)==' ') continue;
	63	else stringSeq1+=(*it2);
	64	}
	65
	66	//when alphabet is codon stringSeq1 must be product of three.
	67	// 1. save 1 or 2 last nucleotide in stringToAdd
	68	// 2. substr the last or two last nucleotide for the next line.
	69	// 3. keep stringToAdd in map (according the name).
	70	string stringToAdd="";
	71	// codon codonAlph;
	72	if (alph->size()>=60){ // codon?
	73	if ((stringSeq1.size()%3)==1){ //add the last nucleotide to the next line
	74	stringToAdd+=stringSeq1[stringSeq1.size()-1];
	75	stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-1);
	76	}
	77	if ((stringSeq1.size()%3)==2){ //add the 2 last nucleotide to the next line
	78	stringToAdd+=stringSeq1[stringSeq1.size()-2];
	79	stringToAdd+=stringSeq1[stringSeq1.size()-1];
	80	stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-2);
	81	}
	82
	83	}
	84	stringsToAdd[name1] = stringToAdd; //update the map with the new stringToAdd
	85	int id = mySequenceData.getId(name1,false);
	86	if (id==-1) { // new sequence.
	87	name = name1;
	88	mySequenceData.add(sequence(stringSeq1,name,remark,localid,alph));
	89	localid++;
	90	} else {// the sequence is already there...
	91	sequence tmp(stringSeq1,name,remark,id,alph);
	92	mySequenceData[id].operator += (tmp);
	93	}
	94
	95	it1++;
	96	}
	97
	98	return mySequenceData;
	99	}
	100
	101	void clustalFormat::write(ostream &out, const sequenceContainer& sd) {
	102	// setting some parameters
	103	const int numOfPositionInLine = 60;
	104	int maxLengthOfSeqName =0;
	105	for (sequenceContainer::constTaxaIterator p=sd.constTaxaBegin(); p != sd.constTaxaEnd(); ++p ) {
	106	int nameLen = (*p).name().size();
	107	if (nameLen>maxLengthOfSeqName) maxLengthOfSeqName=nameLen;
	108	}
	109	if (maxLengthOfSeqName<15) maxLengthOfSeqName=16;
	110	else maxLengthOfSeqName=maxLengthOfSeqName+4; // all this maxLengthOfSeqName is the
	111
	112	out<<"CLUSTAL V"<<endl;
	113	// num. of space after the name.
	114	int currentPosition = 0;
	115	int charLen = sd.seqLen();
	116	//in case of codon alphabet the character length is : 3*(sequence_length)
	117	// codon codonAlph;
	118	if (sd.alphabetSize()>=60) charLen*=3;
	119	out<<endl<<endl;
	120	while (currentPosition < charLen ) {
	121	out.flush();
	122	//for (vector<const sequenceContainer::sequenceDatum*>::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) {
	123	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	124	for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
	125	if (iName<(*it5).name().size()) {
	126	out<<(*it5).name()[iName];
	127	out.flush();
	128	}
	129	else out<<" ";
	130	}
	131	out.flush();
	132	out<<" ";
	133
	134	if (charLen<numOfPositionInLine)
	135	out<<it5->toString()<<endl;
	136	else {
	137	for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
	138	if (k>=charLen)
	139	break;
	140	out<<it5->toString()[k];
	141	//in case of codon alphabet each position is three characters
	142
	143	if (sd.alphabetSize()>=60){
	144	out<<it5->toString()[++k];
	145	out<<it5->toString()[++k];
	146	}
	147	}
	148	out<<endl;
	149	}
	150	}
	151	currentPosition +=numOfPositionInLine;
	152	out<<endl<<endl;
	153	}
	154
	155	return;
	156	}
	157

+47

-0

libs/phylogeny/clustalFormat.h less more

	0	// $Id: clustalFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___CLUSTAL_FORMAT
	3	#define ___CLUSTAL_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class clustalFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	static void write(ostream &out, const sequenceContainer& sd);
	11	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	12	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	13	};
	14
	15	#endif
	16
	17	/* EXAMPLE OF THE FORMAT:
	18	CLUSTAL V
	19
	20
	21	Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN
	22	Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN
	23	Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN
	24	Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN
	25	Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN
	26	Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN
	27
	28
	29	Langur SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS
	30	Baboon SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS
	31	Human SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR
	32	Rat SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS
	33	Cow SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS
	34	Horse NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS
	35
	36
	37	Langur QYVKGCGV
	38	Baboon QYVQGCGV
	39	Human QYVQGCGV
	40	Rat GYIRNCGV
	41	Cow SYVEGCTL
	42	Horse EYLASCNL
	43
	44
	45	*/
	46

+83

-0

libs/phylogeny/cmdline.ggo less more

	0	# $Id: cmdline.ggo 962 2006-11-07 15:13:34Z privmane $
	1
	2	purpose "structural EM based Phylogeny"
	3	package "semphy"
	4	version "1.0.a3"
	5
	6	# test default values
	7
	8	#files
	9	section "Basic Options"
	10	option "sequence" s "Sequence file name" string typestr="FILENAME" default="-" no
	11	option "format" f "Sequence format: [phylip], clustal, molphy, mase, fasta" string default="phylip" no
	12	option "tree" t "Tree file name" string typestr="FILENAME" no
	13	option "constraint" c "Constraint Tree file name" string typestr="FILENAME" no
	14	option "outputfile" o "Output tree file" string typestr="FILENAME" default="-" no
	15	# model options:
	16	section "Model Options"
	17	option "alphabet" a "Alphabet Size" int typestr="4\|20"default="20" no
	18	option "ratio" z "Transition/Transversion ratio" float default="2" no
	19	option "ACGprob" p "User input nucleotide frequencies. String separated list for A,C,G" string typestr="A,C,G" default="0.25,0.25,0.25" no
	20
	21	option "gamma" G "Use Gamma RVAS (4 bins) and set alpha" float default="0.3" no
	22	option "optimizeGamma" O "Optimize Gamma and use it" flag off
	23
	24
	25	defgroup "Model" groupdesc="Model type"
	26
	27	groupoption "day" - "Use 'day' model" group="Model"
	28	groupoption "jtt" - "Use 'jtt' model (default)" group="Model"
	29	groupoption "rev" - "Use 'rev' model" group="Model"
	30	groupoption "wag" - "Use 'wag' model" group="Model"
	31	groupoption "cprev" - "Use 'cprev' model" group="Model"
	32	groupoption "nucjc" - "Use nucleic acid JC model" group="Model"
	33	groupoption "aaJC" - "Use amino acid JC model" group="Model"
	34	groupoption "k2p" - "Use 'k2p' model" group="Model"
	35	groupoption "hky" - "Use 'k2p' model" group="Model"
	36
	37	option "modelfile" - "Use user input file as model" string typestr="NAME" no
	38
	39
	40	section "Log Options"
	41
	42	option "verbose" v "Log report level (verbose)" int default="1" no
	43	option "Logfile" l "Log output file name" string typestr="FILENAME" default="-" no
	44
	45
	46	section "Algorithm Options"
	47
	48	# algorithm options
	49	defgroup "Run Options" groupdesc="Which algorithm to run"
	50
	51	groupoption "SEMPHY" S "Do SEMPHY step (default)" group="Run Options"
	52	groupoption "bbl" n "Only optimize branch length" group="Run Options"
	53	groupoption "likelihood" L "Compute likelihood for fixed tree" group="Run Options"
	54	groupoption "NJ" J "compute NJ tree only" group="Run Options"
	55	option "rate" R "optimize rate of gene" flag off
	56
	57
	58	section "Other Algorithm Options"
	59	option "max-semphy-iter" M "Max number of SEM iterations" int default="100" no
	60	option "max-bbl-iter" b "Max number of BBL iterations" int default="1000" no
	61	option "min-improv" d "Minimum improvement" float default="0.001" no
	62	option "gaps" g "Remove positions with gaps" flag off
	63	option "dont-use-NJ" N "Do not Use NJ to break stars in treeRearrange" flag on
	64	option "exact" e "Compute exact counts" flag off
	65	option "maxDistance" x "'infinity' distance for sequence pairs" float default="2.0" no
	66
	67	option "seed" r "Seed random number generator" long no
	68
	69
	70	#option "paramFile" f "Parameter file name" string no
	71	#option "cin" I "Get input sequence file from cin" flag off
	72
	73	# annealing:
	74	#option "anneal" A "Do anneal step" flag off
	75	#option "ratchet" R "Do Ratchet step" flag off
	76	#option "start-temp" H "Starting temp" float no
	77	#option "cooling-factor" c "Variance decay factor for anneal noise" float default="1.1" no
	78	#option "final-temp" C "Final temperature of anneal noise" float default="0.1" no
	79	#option "adversarial" - "Use Adversarial Re-weighting" flag off
	80	#option "learning-rate" L "learning rate for Adversary" float default="1.0" no
	81	#option "Orig-dumping" D "Dumping to the original weights" float default="0.5" no
	82	#option "prev-dumping" X "Dumping to the previous weights" float default="0.5" no

+2

-0

libs/phylogeny/cmdline2EvolObjs.cpp less more

	0	// $Id: cmdline2EvolObjs.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "cmdline2EvolObjs.h"

+579

-0

libs/phylogeny/cmdline2EvolObjs.h less more

	0	// $Id: cmdline2EvolObjs.h 8038 2010-06-03 20:31:23Z itaymay $
	1
	2	#ifndef ___CREATESPFROMARGSINFO_H
	3	#define ___CREATESPFROMARGSINFO_H
	4
	5	#include <cstdlib>
	6	#include "amino.h"
	7	#include "nucleotide.h"
	8	#include "codon.h"
	9	#include "sequenceContainer.h"
	10	#include "tree.h"
	11	#include "stochasticProcess.h"
	12	#include "replacementModel.h"
	13	#include "uniDistribution.h"
	14	#include "trivialAccelerator.h"
	15	#include "alphaTrivialAccelerator.h"
	16	#include "chebyshevAccelerator.h"
	17	#include "talRandom.h"
	18	#include "nucJC.h"
	19	#include "aaJC.h"
	20	#include "hky.h"
	21	#include "tamura92.h"
	22	#include "gtrModel.h"
	23	#include "logFile.h"
	24	#include "readDatMatrix.h"
	25	#include "gammaDistribution.h"
	26	#include "recognizeFormat.h"
	27	#include "replacementModelSSRV.h"
	28	#include "stochasticProcessSSRV.h"
	29	#include "someUtil.h"
	30	#include <stdio.h>
	31
	32	#define DEFAULT_VALUE_FOR_ALPAH 1.0
	33
	34	template <class args_infoT>
	35	class cmdline2EvolObjs {
	36	private:
	37	args_infoT _args_info;
	38	public:
	39	const args_infoT& getArgsInfo(void) {return(_args_info);}
	40	// constructors
	41	cmdline2EvolObjs(args_infoT &args_info) : _args_info(args_info) {
	42	checkParameterConsistancy();
	43	}
	44	cmdline2EvolObjs(args_infoT &args_info, bool DontChack) : _args_info(args_info) {
	45	// if (!DontChack) checkParameterConsistancy();
	46	}
	47	explicit cmdline2EvolObjs(void){}; // do nothing
	48	void installArgsInfo(args_infoT &args_info){
	49	_args_info = args_info;
	50	checkParameterConsistancy();
	51	}
	52	private:
	53	void checkParameterConsistancy() {
	54	if (!_args_info.homogeneous_flag) { // using Gamma ASRV
	55	if (!_args_info.alpha_given && !_args_info.optimizeAlpha_flag)
	56	errorMsg::reportError("Must use either 'alpha' or 'optimizeAlpha' when using Gamma ASRV");
	57	} else { // using homogeneous rates
	58	if (_args_info.categories_given \|\|_args_info.alpha_given \|\| _args_info.optimizeAlpha_given)
	59	errorMsg::reportError("Can't use 'categories' or 'alpha' or 'optimizeAlpha' with homogeneous rates model");
	60	// more tests may come here
	61	}
	62
	63	// check compatibility of alphabet and model
	64	if (_args_info.alphabet_arg == 4
	65	&& !(_args_info.nucjc_given \|\| _args_info.k2p_given \|\| _args_info.hky_given \|\| _args_info.tamura92_given \|\| _args_info.gtr_given))
	66	errorMsg::reportError("Model type is not suitable for nucleotide alphabet");
	67	if (_args_info.alphabet_arg == 20
	68	&& (_args_info.nucjc_given \|\| _args_info.k2p_given \|\| _args_info.hky_given \|\| _args_info.tamura92_given \|\| _args_info.gtr_given))
	69	errorMsg::reportError("Model type is not suitable for amino-acid alphabet");
	70
	71	if (_args_info.nu_given) {
	72	_args_info.ssrv_flag = true;
	73	}
	74	}
	75
	76	public:
	77	void initializeRandomSeed() {
	78	if (_args_info.seed_given) {
	79	talRandom::setSeed(_args_info.seed_arg);
	80	}
	81	}
	82	void initializeLogFile() {
	83	myLog::setLog(_args_info.Logfile_arg, _args_info.verbose_arg);
	84	}
	85
	86	// NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
	87	// is returned and the user is responsible for doing delete. This is because
	88	// alphabet is an abstract class, so we can't return it by value
	89	alphabet* cmdline2Alphabet() {
	90	alphabet* alphPtr = NULL;
	91	switch (_args_info.alphabet_arg)
	92	{ // allwayes defined, with default
	93	case 4:
	94	alphPtr = new nucleotide;
	95	break;
	96	case 20:
	97	alphPtr = new amino;
	98	break;
	99	case 64: case 61: case 60: case 62:
	100	alphPtr = new codon;
	101	break;
	102	default: errorMsg::reportError("alphabet size not supported");
	103	}
	104
	105	// Handle mulAlphabet needed in case we use an SSRV model
	106	if (_args_info.ssrv_flag) {
	107	alphabet* mulAlphPtr = new mulAlphabet(alphPtr, _args_info.categories_arg);
	108	delete alphPtr;
	109	alphPtr = mulAlphPtr;
	110	}
	111
	112	return alphPtr;
	113	}
	114
	115	sequenceContainer cmdline2SequenceContainer(const alphabet * const alphPtr) {
	116	ifstream ins;
	117	istream* inPtr = &cin;
	118	string sequenceFileName(_args_info.sequence_arg);
	119	if (sequenceFileName != "" && sequenceFileName != "-") {
	120	ins.open(sequenceFileName.c_str());
	121	if (! ins.is_open())
	122	errorMsg::reportError(string("Can not open sequence file ")+sequenceFileName);
	123	inPtr = &ins;
	124	}
	125	istream& in = *inPtr;
	126
	127	sequenceContainer sc;
	128	if (!_args_info.ssrv_flag) {
	129	sc = recognizeFormat::read(in, alphPtr);
	130	} else {
	131	sequenceContainer scBase(recognizeFormat::read(in, (static_cast<const mulAlphabet*>(alphPtr))->getBaseAlphabet()));
	132	sc = sequenceContainer(scBase, alphPtr);
	133	}
	134	return sc;
	135	}
	136
	137	void takeCareOfGaps (sequenceContainer &sc) {
	138	if (_args_info.gaps_flag) {
	139	sc.removeGapPositions();
	140	} else {
	141	sc.changeGaps2MissingData();
	142	}
	143	}
	144
	145	// NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
	146	// is returned and the user is responsible for deleting it. This is because
	147	// we need to return a NULL pointer if we are not given a tree
	148	tree *cmdline2Tree() {
	149	tree *treePtr = NULL;
	150	if (_args_info.tree_given) { // did we get a tree
	151	string treeFileName(_args_info.tree_arg);
	152	treePtr = new tree(treeFileName);
	153	}
	154	return treePtr;
	155	}
	156
	157	// NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
	158	// is returned and the user is responsible for deleting it. This is because
	159	// we need to return a NULL pointer if we are not given a tree
	160	tree *cmdline2ConstraintTree() {
	161	tree *constraintTreePtr = NULL;
	162	if (_args_info.constraint_given) { // did we get a tree
	163	string constraintTreeFileName(_args_info.constraint_arg);
	164	constraintTreePtr = new tree(constraintTreeFileName);
	165	}
	166	return constraintTreePtr;
	167	}
	168
	169	replacementModel *cmdline2ReplacementModel() {
	170	replacementModel *probModPtr=NULL;
	171	MDOUBLE ratio =_args_info.ratio_arg;
	172	MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
	173	sscanf(_args_info.ACGprob_arg,"%lf,%lf,%lf", &Ap, &Cp, &Gp);
	174	Tp=1.0-(Ap+Cp+Gp);
	175
	176	if (_args_info.day_given) {
	177	LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
	178	probModPtr=new pupAll(datMatrixHolder::dayhoff);
	179	} else if (_args_info.rev_given) {
	180	LOG(5,<<"Using rev replacement matrix"<<endl);
	181	probModPtr=new pupAll(datMatrixHolder::mtREV24);
	182	} else if (_args_info.wag_given) {
	183	LOG(5,<<"Using wag replacement matrix"<<endl);
	184	probModPtr=new pupAll(datMatrixHolder::wag);
	185	} else if (_args_info.cprev_given) {
	186	LOG(5,<<"Using cprev replacement matrix"<<endl);
	187	probModPtr=new pupAll(datMatrixHolder::cpREV45);
	188	} else if (_args_info.nucjc_given) {
	189	LOG(5,<<"Using JC for nucleotide"<<endl);
	190	probModPtr=new nucJC;
	191	} else if (_args_info.aaJC_given) {
	192	LOG(5,<<"Using JC for amino acids"<<endl);
	193	probModPtr=new aaJC;
	194	} else if ((_args_info.hky_given) \|\| (_args_info.k2p_given)) {
	195	LOG(5,<<"Using hky replacement matrix"<<endl);
	196	probModPtr=new hky(Ap,Cp,Gp,Tp,ratio);
	197	} else if (_args_info.tamura92_given) {
	198	LOG(5,<<"Using the Tamura 92 replacement matrix"<<endl);
	199	MDOUBLE theta = Cp+Gp;
	200	probModPtr=new tamura92(theta, ratio);
	201	} else if (_args_info.gtr_given) {
	202	LOG(5,<<"Using the GTR replacement matrix"<<endl);
	203	//Vdouble freqs = evaluateCharacterFreq(_sc);
	204	Vdouble freqs;
	205	freqs.push_back(0.25);
	206	freqs.push_back(0.25);
	207	freqs.push_back(0.25);
	208	freqs.push_back(0.25);
	209	probModPtr=new gtrModel(freqs);
	210	} else if ((_args_info.alphabet_arg == 20) &&
	211	(_args_info.modelfile_given)) { // try to read the name as a file name
	212	LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
	213	probModPtr=new pupAll(_args_info.modelfile_arg);
	214	} else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
	215	probModPtr=new pupAll(datMatrixHolder::jones);
	216	}
	217
	218	return probModPtr;
	219	}
	220
	221	replacementModel *cmdline2ReplacementModelAAOnly() {
	222	replacementModel *probModPtr=NULL;
	223
	224	if (_args_info.day_given) {
	225	LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
	226	probModPtr=new pupAll(datMatrixHolder::dayhoff);
	227	} else if (_args_info.rev_given) {
	228	LOG(5,<<"Using rev replacement matrix"<<endl);
	229	probModPtr=new pupAll(datMatrixHolder::mtREV24);
	230	} else if (_args_info.wag_given) {
	231	LOG(5,<<"Using wag replacement matrix"<<endl);
	232	probModPtr=new pupAll(datMatrixHolder::wag);
	233	} else if (_args_info.cprev_given) {
	234	LOG(5,<<"Using cprev replacement matrix"<<endl);
	235	probModPtr=new pupAll(datMatrixHolder::cpREV45);
	236	} else if (_args_info.aaJC_given) {
	237	LOG(5,<<"Using JC for amino acids"<<endl);
	238	probModPtr=new aaJC;
	239	} else if (_args_info.modelfile_given) { // try to read the name as a file name
	240	LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
	241	probModPtr=new pupAll(_args_info.modelfile_arg);
	242	} else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
	243	probModPtr=new pupAll(datMatrixHolder::jones);
	244	}
	245
	246	return probModPtr;
	247	}
	248
	249	bool useGamma()
	250	{
	251	return (!_args_info.homogeneous_flag);
	252	}
	253
	254	// this function is ment for cases where a "mature" stochastic Process
	255	// can be produced. If there is a chance that the user may ask for
	256	// alpha optimisation use the
	257	// "cmdline2StochasticProcessThatRequiresAlphaOptimization" version
	258	// instead
	259	inline stochasticProcess *cmdline2StochasticProcess() {
	260	distribution *distP = NULL;
	261	if (useGamma()) {
	262	if (_args_info.alpha_given)
	263	distP = new gammaDistribution(_args_info.alpha_arg,_args_info.categories_arg);
	264	else
	265	errorMsg::reportError("Can not create stochastic process with ASRV if no alpha is given, when working without alpha optimization");
	266	LOG(5,<<"Using Gamma ASRV with "<<_args_info.categories_arg<<" bins"<<endl);
	267	} else {
	268	distP = new uniDistribution;
	269	LOG(5,<<"Using uniform rates"<<endl);
	270	}
	271	stochasticProcess spPtr = cmdline2StochasticProcessInternal(distP);
	272	if (distP) delete distP;
	273	return(spPtr);
	274	}
	275
	276	// Assuming that the user asked to optimize Alpha (by bestAlphaAndBBL)
	277	inline stochasticProcess *cmdline2StochasticProcessThatRequiresAlphaOptimization () {
	278	distribution *distP = NULL;
	279	if (!_args_info.optimizeAlpha_given)
	280	errorMsg::reportError("Can't use function cmdline2StochasticProcessThatRequiresAlphaOptimization if the optimizeAlpha flag was not turned on - please inform the programmer of this error.");
	281	// else
	282	if (_args_info.alpha_given)
	283	distP = new gammaDistribution(_args_info.alpha_arg,_args_info.categories_arg);
	284	else
	285	distP = new gammaDistribution(DEFAULT_VALUE_FOR_ALPAH,_args_info.categories_arg);
	286	LOG(5,<<"Using Gamma ASRV with "<<_args_info.categories_arg<<" bins"<<endl);
	287	stochasticProcess spPtr = cmdline2StochasticProcessInternal(distP);
	288	if (distP) delete distP;
	289	return(spPtr);
	290	}
	291
	292	inline stochasticProcess *cmdline2HomogenuisStochasticProcess() {
	293	uniDistribution dist;
	294	LOG(5,<<"Creating homogeneous rate based stochastic Process "<<endl);
	295	return (cmdline2StochasticProcessInternal(dist));
	296	}
	297
	298	inline stochasticProcess cmdline2HomogenuisStochasticProcessAAOnly() {
	299	uniDistribution dist;
	300	LOG(5,<<"Creating homogeneous rate based stochastic Process "<<endl);
	301	return (cmdline2StochasticProcessInternalAAOnly(dist));
	302	}
	303
	304	inline stochasticProcess *cmdline2StochasticProcessSafe()
	305	{
	306	if (_args_info.homogeneous_flag) {
	307	return cmdline2StochasticProcess();
	308	} else { // we use Gamma
	309	if (_args_info.optimizeAlpha_flag) {
	310	return cmdline2StochasticProcessThatRequiresAlphaOptimization();
	311	} else if (_args_info.alpha_given) {
	312	return cmdline2StochasticProcess();
	313	} else {
	314	errorMsg::reportError("Gamma ASRV requiers either --alpha or --optimizeAlpha or both.",1);
	315	}
	316	}
	317	exit(1); // should never be reached
	318	}
	319
	320	private:
	321	stochasticProcess *cmdline2StochasticProcessInternal(distribution& dist) {
	322	replacementModel *probModPtr=NULL;
	323	pijAccelerator *pijAcc=NULL;
	324	MDOUBLE ratio =_args_info.ratio_arg;
	325	MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
	326	sscanf(_args_info.ACGprob_arg,"%lf,%lf,%lf", &Ap, &Cp, &Gp);
	327	Tp=1.0-(Ap+Cp+Gp);
	328
	329	if (_args_info.day_given) {
	330	LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
	331	probModPtr=new pupAll(datMatrixHolder::dayhoff);
	332	pijAcc = new chebyshevAccelerator(probModPtr);
	333	} else if (_args_info.rev_given) {
	334	LOG(5,<<"Using rev replacement matrix"<<endl);
	335	probModPtr=new pupAll(datMatrixHolder::mtREV24);
	336	pijAcc = new chebyshevAccelerator(probModPtr);
	337	} else if (_args_info.wag_given) {
	338	LOG(5,<<"Using wag replacement matrix"<<endl);
	339	probModPtr=new pupAll(datMatrixHolder::wag);
	340	pijAcc = new chebyshevAccelerator(probModPtr);
	341	} else if (_args_info.cprev_given) {
	342	LOG(5,<<"Using cprev replacement matrix"<<endl);
	343	probModPtr=new pupAll(datMatrixHolder::cpREV45);
	344	pijAcc = new chebyshevAccelerator(probModPtr);
	345	} else if (_args_info.nucjc_given) {
	346	LOG(5,<<"Using JC for nucleotide"<<endl);
	347	probModPtr=new nucJC;
	348	pijAcc = new trivialAccelerator(probModPtr);
	349	} else if (_args_info.aaJC_given) {
	350	LOG(5,<<"Using JC for amino acids"<<endl);
	351	probModPtr=new aaJC;
	352	pijAcc = new trivialAccelerator(probModPtr);
	353	} else if ((_args_info.hky_given) \|\| (_args_info.k2p_given)) {
	354	LOG(5,<<"Using hky replacement matrix"<<endl);
	355	probModPtr=new hky(Ap,Cp,Gp,Tp,ratio);
	356	pijAcc = new trivialAccelerator(probModPtr);
	357	} else if (_args_info.tamura92_given) {
	358	LOG(5,<<"Using the Tamura 92 replacement matrix"<<endl);
	359	MDOUBLE theta = Cp+Gp;
	360	probModPtr=new tamura92(theta, ratio);
	361	pijAcc = new trivialAccelerator(probModPtr);
	362	} else if (_args_info.gtr_given) {
	363	LOG(5,<<"Using the GTR replacement matrix"<<endl);
	364	//Vdouble freqs = evaluateCharacterFreq(_sc);
	365	Vdouble freqs;
	366	freqs.push_back(0.25);
	367	freqs.push_back(0.25);
	368	freqs.push_back(0.25);
	369	freqs.push_back(0.25);
	370	probModPtr=new gtrModel(freqs);
	371	pijAcc = new trivialAccelerator(probModPtr);
	372	} else if ((_args_info.alphabet_arg == 20) &&
	373	(_args_info.modelfile_given)) { // try to read the name as a file name
	374	LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
	375	probModPtr=new pupAll(_args_info.modelfile_arg);
	376	pijAcc = new chebyshevAccelerator(probModPtr);
	377	} else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
	378	probModPtr=new pupAll(datMatrixHolder::jones);
	379	pijAcc = new chebyshevAccelerator(probModPtr);
	380	}
	381
	382	stochasticProcess *spPtr = NULL;
	383	if (!_args_info.ssrv_flag) {
	384	spPtr = new stochasticProcess(&dist, pijAcc);
	385
	386	} else {
	387	// Using a Site-Specific Rate Variation model
	388	replacementModelSSRV probModSsrv(&dist,probModPtr,_args_info.nu_arg);
	389	if (pijAcc) delete pijAcc;
	390	pijAcc = new trivialAccelerator(&probModSsrv);
	391	spPtr = new stochasticProcessSSRV(pijAcc);
	392	LOG(5,<<"cmdline2StochasticProcessInternal: Created stochasticProcessSSRV"<<endl);
	393	}
	394
	395	// if rate is given in input, set it.
	396	if (_args_info.inputRate_given)
	397	spPtr->setGlobalRate(_args_info.inputRate_arg);
	398
	399	if (probModPtr) delete probModPtr;
	400	if (pijAcc) delete pijAcc;
	401	return spPtr;
	402	}
	403
	404	stochasticProcess cmdline2StochasticProcessInternalAAOnly(distribution& dist) {
	405	replacementModel *probModPtr=NULL;
	406	pijAccelerator *pijAcc=NULL;
	407
	408	if (_args_info.day_given) {
	409	LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
	410	probModPtr=new pupAll(datMatrixHolder::dayhoff);
	411	pijAcc = new chebyshevAccelerator(probModPtr);
	412	} else if (_args_info.rev_given) {
	413	LOG(5,<<"Using rev replacement matrix"<<endl);
	414	probModPtr=new pupAll(datMatrixHolder::mtREV24);
	415	pijAcc = new chebyshevAccelerator(probModPtr);
	416	} else if (_args_info.wag_given) {
	417	LOG(5,<<"Using wag replacement matrix"<<endl);
	418	probModPtr=new pupAll(datMatrixHolder::wag);
	419	pijAcc = new chebyshevAccelerator(probModPtr);
	420	} else if (_args_info.cprev_given) {
	421	LOG(5,<<"Using cprev replacement matrix"<<endl);
	422	probModPtr=new pupAll(datMatrixHolder::cpREV45);
	423	pijAcc = new chebyshevAccelerator(probModPtr);
	424	} else if (_args_info.aaJC_given) {
	425	LOG(5,<<"Using JC for amino acids"<<endl);
	426	probModPtr=new aaJC;
	427	pijAcc = new trivialAccelerator(probModPtr);
	428	} else if (_args_info.modelfile_given) { // try to read the name as a file name
	429	LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
	430	probModPtr=new pupAll(_args_info.modelfile_arg);
	431	pijAcc = new chebyshevAccelerator(probModPtr);
	432	} else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
	433	probModPtr=new pupAll(datMatrixHolder::jones);
	434	pijAcc = new chebyshevAccelerator(probModPtr);
	435	}
	436	stochasticProcess sp(&dist, pijAcc);
	437
	438	// if rate is given in input, set it.
	439	// if (_args_info.inputRate_given)
	440	// sp.setGlobalRate(_args_info.inputRate_arg);
	441
	442	if (probModPtr) delete probModPtr;
	443	if (pijAcc) delete pijAcc;
	444	return sp;
	445	}
	446
	447	public:
	448	stochasticProcess cmdline2ExactGammaStochasticProcess() {
	449	uniDistribution dist;
	450	LOG(5,<<"Creating exact Gamma based stochastic Process "<<endl);
	451	if(!_args_info.alpha_given)
	452	errorMsg::reportError("Using exact Gamma requires alpha to be set");
	453	pupAll *probModPtr=NULL;
	454	// pijAccelerator *pijAcc=NULL;
	455	alphaTrivialAccelerator *pijAcc=NULL;
	456
	457	if (_args_info.day_given) {
	458	LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
	459	probModPtr=new pupAll(datMatrixHolder::dayhoff);
	460	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	461	} else if (_args_info.rev_given) {
	462	LOG(5,<<"Using rev replacement matrix"<<endl);
	463	probModPtr=new pupAll(datMatrixHolder::mtREV24);
	464	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	465	} else if (_args_info.wag_given) {
	466	LOG(5,<<"Using wag replacement matrix"<<endl);
	467	probModPtr=new pupAll(datMatrixHolder::wag);
	468	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	469	} else if (_args_info.cprev_given) {
	470	LOG(5,<<"Using cprev replacement matrix"<<endl);
	471	probModPtr=new pupAll(datMatrixHolder::cpREV45);
	472	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	473	} else if ((_args_info.alphabet_arg == 20) &&
	474	(_args_info.modelfile_given)) { // try to read the name as a file name
	475	LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
	476	probModPtr=new pupAll(_args_info.modelfile_arg);
	477	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	478	} else if (_args_info.nucjc_given \|\|
	479	_args_info.aaJC_given \|\|
	480	_args_info.hky_given \|\|
	481	_args_info.k2p_given \|\|
	482	_args_info.tamura92_given \|\|
	483	_args_info.gtr_given) {
	484	errorMsg::reportError("Exact Gamma stochastic process only works with pupAll model");
	485	} else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
	486	probModPtr=new pupAll(datMatrixHolder::jones);
	487	pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
	488	}
	489	stochasticProcess sp(&dist, pijAcc);
	490
	491	// if rate is given in input, set it.
	492	if (_args_info.inputRate_given)
	493	sp.setGlobalRate(_args_info.inputRate_arg);
	494
	495	if (probModPtr) delete probModPtr;
	496	if (pijAcc) delete pijAcc;
	497	return sp;
	498	}
	499
	500	public:
	501	// NOTE: the user must check:
	502	// if the returned stream is an ofstream object (an actual file) it should be deleted
	503	// if the returned stream is an ostream object (cout) do nothing
	504	ostream *cmdline2OutputStream() {
	505	ostream *outPtr;
	506	string outFileName(_args_info.outputfile_arg);
	507	if (outFileName == "") outFileName="-";
	508	if (outFileName == "-") {
	509	outPtr = &cout;
	510	} else {
	511	outPtr = new ofstream(outFileName.c_str());
	512	if (!outPtr->good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName);
	513	}
	514	return outPtr;
	515	}
	516
	517	// NOTE: the user must check:
	518	// if the returned stream is an ofstream object (an actual file) it should be deleted
	519	// if the returned stream is an ostream object (cout) do nothing
	520	ostream *cmdline2TreeOutputStream() {
	521	ostream *outPtr;
	522	string outFileName(_args_info.treeoutputfile_arg);
	523	if (outFileName == "") outFileName="-";
	524	if (outFileName == "-") {
	525	outPtr = &cout;
	526	} else {
	527	outPtr = new ofstream(outFileName.c_str());
	528	if (!outPtr->good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName);
	529	}
	530	return outPtr;
	531	}
	532
	533	void consistencyCheck (tree treePtr, tree constraintTreePtr) {
	534	if (treePtr!=NULL) {
	535	if (constraintTreePtr !=NULL) {
	536	/* constraints c1(*constraintTreePtr);
	537	c1.setTree(*treePtr);
	538	if (!c1.fitsConstraints()){
	539	LOG(1,<<"Input tree does not fit constraints!"<<endl);
	540	LOGDO(1,c1.outputMissingClads(myLog::LogFile()));
	541	errorMsg::reportError("Please enter a starting tree that fits the constraints");
	542	}
	543	*/ }
	544	}
	545	}
	546
	547	public:
	548	// Read from file the posterior distribution of rates for each sequence site
	549	VVdoubleRep cmdline2PosteriorRates() {
	550	if (!_args_info.posteriorRates_given)
	551	errorMsg::reportError("cmdline2EvolObjs::cmdline2PosteriorRates: This method shouldn't be used if --posteriorRates was not given");
	552	ifstream in(_args_info.posteriorRates_arg);
	553	if (!in.is_open())
	554	errorMsg::reportError(string("Can not open sequence file ")+string(_args_info.posteriorRates_arg));
	555
	556	string line, number, rest; // For splitting the line into separate numbers
	557	VdoubleRep posterior(_args_info.categories_arg); // Current line
	558	VVdoubleRep posteriorRates; // Accumulate all lines
	559	getline(in, line);
	560
	561	// Each loop reads one line of numbers
	562	while (in) {
	563	// split line into numbers
	564	for(int cat=0; cat<_args_info.categories_arg; ++cat) {
	565	splitString2(line, " ", number, rest);
	566	if (number.size() == 0)
	567	errorMsg::reportError(string("cmdline2EvolObjs::cmdline2PosteriorRates: Bad line with too few numbers in file ")
	568	+_args_info.posteriorRates_arg+": "+line);
	569	posterior[cat] = atof(number.c_str());
	570	}
	571	posteriorRates.push_back(posterior);
	572	getline(in, line);
	573	}
	574	return posteriorRates;
	575	}
	576	};
	577
	578	#endif

+560

-0

libs/phylogeny/codon.cpp less more

	0	// $Id: codon.cpp 5981 2009-03-17 14:39:39Z rubi $
	1
	2	#include "codon.h"
	3	#include "nucleotide.h"
	4	#include "amino.h"
	5	#include "logFile.h"
	6	#include "definitions.h"
	7	#include "someUtil.h"
	8	#include "matrixUtils.h"
	9	#include "sequenceContainer.h"
	10	#include <sstream>
	11	#include <cctype>
	12	#define INITIATION_CODON "i"
	13
	14	vector<vector<codonUtility::diffType> > codonUtility::_trtvDiff;
	15	vector<vector<codonUtility::replacementType> > codonUtility::_synNonsynDiff;
	16	vector<vector<codonUtility::nucDiffPlaceType> > codonUtility::_nucDiffPlace;
	17	vector<vector<codonUtility::nucsDiffType> > codonUtility::_nucsDiff;
	18
	19
	20	codon::codon(){
	21	geneticCodeString gcs=geneticCodeHolder::nuclearStandard;
	22	init(gcs);
	23	}
	24
	25	codon::codon(const geneticCodeString& matrixFileString){
	26	init(matrixFileString);
	27	}
	28
	29	void codon::init(const geneticCodeString& matrixFileString)
	30	{
	31	readMatrixFromFile(matrixFileString.Val);
	32	}
	33
	34	void codon::readMatrixFromFile(const string& matrixFileName){ //default value: "nuclearCode.txt"
	35	// cout<<"in codon constructor"<<endl;
	36	stringstream in(matrixFileName.c_str());
	37	if (!in) {
	38	errorMsg::reportError("in codon::readMatrixFromFile: unable to open matrix data file");
	39	}
	40
	41	int aa = -1; //initialized as -1 so in first iteration will change to 0
	42	int noOfCodons = 0;
	43	string strAmino;
	44	bool isInitCodon = false;
	45	while (!in.eof()) { //20 amino acids and stop
	46	string val;
	47	in>>val;
	48	if (val.size()==1) { //amino acid
	49	if(val == INITIATION_CODON)
	50	isInitCodon = true;
	51	else{
	52	aa++;
	53	strAmino=val;
	54	if (strAmino=="*") { _alphabetSize=noOfCodons;}
	55	isInitCodon = false;
	56	}
	57	}
	58
	59	else if (val.size()==3 && val[0]!='#'){ //codon, # symbolizes a comment
	60	if(isInitCodon){
	61	map <string,int>::const_iterator iniItr =_codon2Int.find(val);
	62	if(iniItr == _codon2Int.end())
	63	errorMsg::reportError("Initiation codon with undefined index at codon::readMatrixFromFile");
	64	else
	65	_initiationIndex2codon[iniItr->second] = val;
	66	}
	67	else{
	68	_geneticCode[val]=strAmino;
	69	_codon2Int[val]=noOfCodons;
	70	noOfCodons++;
	71	}
	72	}
	73	else {
	74
	75	if (noOfCodons!=64){
	76	string err="in codon::readMatrixFromFile: total number of codons = "+int2string(noOfCodons);
	77	errorMsg::reportError(err);
	78	}
	79	return;
	80	}
	81	}
	82	}
	83	codon& codon::operator=(const codon& other) {
	84	_geneticCode = other._geneticCode; //key - codon, value - amino acid
	85	_codon2Int = other._codon2Int;//key string of codon int= integer value of codon
	86	_alphabetSize = other._alphabetSize;
	87	_initiationIndex2codon = other._initiationIndex2codon;
	88	return *this;
	89	}
	90	// codon::codon(const codon& other):
	91	// _geneticCode(other._geneticCode), //key - codon, value - amino acid
	92	// _codon2Int(other._codon2Int),//key string of codon int= integer value of codon
	93	// _alphabetSize(other._alphabetSize){}
	94
	95
	96	//return -99 if not succeeds.
	97	int codon::fromChar(const string& s, const int pos) const {
	98	if (s.size() <= pos+2) {
	99	//errorMsg::reportError("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three");
	100	string textToPrint("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three");
	101	LOG(1,<<textToPrint<<endl);
	102	return -99;
	103	}
	104
	105	nucleotide nuc;
	106	int p1,p2,p3;
	107	p1 = nuc.fromChar(s[pos]);
	108	p2 = nuc.fromChar(s[pos+1]);
	109	p3 = nuc.fromChar(s[pos+2]);
	110
	111
	112	if ((p1 <0) \|\| (p2 <0) \|\| (p3 <0))
	113	return gap();
	114	else if ((p1 ==15) \|\| (p2 ==15) \|\| (p3 ==15)) return unknown(); // unknown.
	115	else if ((p1 >4) \|\| (p2 >4) \|\| (p3 >4)) return unknown(); //unknown.
	116	string strCodon="";
	117	//change U --> T
	118	if (p1==4) strCodon+="T";
	119	else strCodon+=toupper(s[pos]);
	120	if (p2==4) strCodon+="T";
	121	else strCodon+=toupper(s[pos+1]);
	122	if (p3==4) strCodon+="T";
	123	else strCodon+=toupper(s[pos+2]);
	124	//const string strCodon = s.substr(pos,3);
	125	map <string,int> tmpMap=_codon2Int;
	126	map <string,int>::iterator it1;
	127	it1=tmpMap.find(strCodon);
	128	if (it1==tmpMap.end()){
	129
	130	string err="error in codon::fromChar cannot find codon "+strCodon;
	131	errorMsg::reportError(err);
	132	}
	133	return tmpMap[strCodon];
	134	}
	135
	136	vector<int> codon::fromString(const string &str) const {
	137	vector<int> vec;
	138	if (str.size()%3!=0) {
	139	errorMsg::reportError("error in function codon::fromString. String length should be a multiplication of 3");
	140	}
	141	for (int i=0;i<str.size();i+=3)
	142	vec.push_back(fromChar(str,i));
	143	return vec;
	144	}
	145
	146	string codon::fromInt(const int in_id) const{
	147	if (in_id == unknown())
	148	return "XXX";
	149	if (in_id == gap())
	150	return "---";
	151	map <string, int> tmpMap = _codon2Int;
	152	map <string, int>::iterator it=tmpMap.begin();
	153	while (it!=tmpMap.end()){
	154	if ((*it).second==in_id){
	155	return (*it).first;
	156	}
	157	it++;
	158	}
	159	string err="error in function codon::fromInt: no codon found for the integer";
	160	errorMsg::reportError(err);
	161	return (string("we should never get here - the reportError above will exit"));
	162	}
	163
	164	codonUtility::replacementType codonUtility::codonReplacement(const int c1, const int c2, const codon &cod){
	165	if (c1 == c2) return codonUtility::sameCodon;
	166	else if (codonUtility::aaOf(c1,cod) == codonUtility::aaOf(c2,cod)) return codonUtility::synonymous;
	167	return codonUtility::non_synonymous;
	168	}
	169
	170	int codonUtility::aaOf(const int c1, const codon &cod){
	171	amino a;
	172	if (c1==cod.gap())
	173	return a.gap();
	174	if (c1==cod.unknown())
	175	return a.unknown();
	176	string strCodon=cod.fromInt(c1);
	177	map <string,string> geneticCode=cod.geneticCode();
	178	map <string,string>::iterator pos;
	179	if ((pos=geneticCode.find(strCodon)) == geneticCode.end()){
	180	string err="error in codonUtility::aaOf: cannot find codon "+strCodon;
	181	errorMsg::reportError(err);
	182	}
	183	if (pos->second.size() > 1){
	184	errorMsg::reportError("error in codonUtility::aaOf: amino acid 1 letter code > 1");
	185	}
	186	return a.fromChar(*pos->second.c_str());
	187	}
	188
	189
	190	codonUtility::diffType codonUtility::codonDiff(const int c1, const int c2, const codon &cod){
	191	if (c1==c2) return codonUtility::equal;
	192	nucleotide n;
	193	string s1 = cod.fromInt(c1);
	194	string s2 = cod.fromInt(c2);
	195
	196	int pos1 = n.fromChar(s1[0])+n.fromChar(s2[0]);
	197	int pos2 = n.fromChar(s1[1])+n.fromChar(s2[1]);
	198	int pos3 = n.fromChar(s1[2])+n.fromChar(s2[2]);
	199
	200	if (s1[0]!=s2[0] && s1[1]!=s2[1] && s1[2]!=s2[2])
	201	return codonUtility::threesub;
	202
	203	if (s1[0]==s2[0] && s1[1]==s2[1] && s1[2]!=s2[2]) {
	204	if (pos3%2==0) return codonUtility::tr;
	205	else return codonUtility::tv;
	206	}
	207	if (s1[1]==s2[1] && s1[2]==s2[2] && s1[0]!=s2[0]) {
	208	if (pos1%2==0) return codonUtility::tr;
	209	else return codonUtility::tv;
	210	}
	211	if (s1[0]==s2[0] && s1[2]==s2[2] && s1[1]!=s2[1]) {
	212	if (pos2%2==0) return codonUtility::tr;
	213	else return codonUtility::tv;
	214	}
	215
	216	if (s1[0]==s2[0] && pos2%2==0 && pos3%2==0)
	217	return codonUtility::twoTrs;
	218	if (s1[1]==s2[1] && pos1%2==0 && pos3%2==0)
	219	return codonUtility::twoTrs;
	220	if (s1[2]==s2[2] && pos1%2==0 && pos2%2==0)
	221	return codonUtility::twoTrs;
	222
	223	if (s1[0]==s2[0] && pos2%2!=0 && pos3%2!=0)
	224	return codonUtility::twoTvs;
	225	if (s1[1]==s2[1] && pos1%2!=0 && pos3%2!=0)
	226	return codonUtility::twoTvs;
	227	if (s1[2]==s2[2] && pos1%2!=0 && pos2%2!=0)
	228	return codonUtility::twoTvs;
	229
	230	return codonUtility::trtv;
	231	}
	232
	233
	234	//return the place (0, 1, or 2) that the two codons are different
	235	//and the identity of the different nucleotide in the target codon.
	236	//For example, nucDiffPlace(ATG, ACG) retruns C2
	237	codonUtility::nucDiffPlaceType codonUtility::nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod){
	238	if (fromCodon == targetCodon)
	239	return codonUtility::EQUAL;
	240
	241	codonUtility::nucDiffPlaceType res = A1;
	242	nucleotide nuc;
	243	string s1 = cod.fromInt(fromCodon);
	244	string s2 = cod.fromInt(targetCodon);
	245
	246	int diffNum = 0;
	247	if (s1[0] != s2[0]){
	248	++diffNum;
	249	switch (s2[0])
	250	{
	251	case 'A': res = A1;
	252	break;
	253	case 'C': res = C1;
	254	break;
	255	case 'G': res = G1;
	256	break;
	257	case 'T': res = T1;
	258	break;
	259	default:
	260	errorMsg::reportError("error in codonUtility::nucDiffPlace.");
	261	break;
	262	}
	263	}
	264	if (s1[1] != s2[1]){
	265	++diffNum;
	266	switch (s2[1])
	267	{
	268	case 'A': res = A2;
	269	break;
	270	case 'C': res = C2;
	271	break;
	272	case 'G': res = G2;
	273	break;
	274	case 'T': res = T2;
	275	break;
	276	default:
	277	errorMsg::reportError("error in codonUtility::nucDiffPlace.");
	278	break;
	279	}
	280	}
	281	if (s1[2] != s2[2]){
	282	++diffNum;
	283	switch (s2[2])
	284	{
	285	case 'A': res = A3;
	286	break;
	287	case 'C': res = C3;
	288	break;
	289	case 'G': res = G3;
	290	break;
	291	case 'T': res = T3;
	292	break;
	293	default:
	294	errorMsg::reportError("error in codonUtility::nucDiffPlace.");
	295	break;
	296	}
	297	}
	298	if (diffNum == 0)
	299	errorMsg::reportError("error in codonUtility::nucDiffPlace. Can't find different nucleotide");
	300	if (diffNum > 1)
	301	res = MUL_SUB;
	302	return res;
	303	}
	304
	305	//return the different nucleotides between the fron and target codons.
	306	//For example, nucsPlace(ATG, ACG) retruns TC
	307	codonUtility::nucsDiffType codonUtility::nucsDiff(const int fromCodon, const int targetCodon, const codon &cod){
	308	if (fromCodon == targetCodon)
	309	return codonUtility::SAME;
	310
	311	codonUtility::nucsDiffType res = AC;
	312	nucleotide nuc;
	313	string s1 = cod.fromInt(fromCodon);
	314	string s2 = cod.fromInt(targetCodon);
	315
	316	int diffNum = 0;
	317	int from = 0;
	318	int to = 0;
	319	if (s1[0] != s2[0])
	320	{
	321	++diffNum;
	322	from = s1[0];
	323	to = s2[0];
	324	}
	325	if (s1[1] != s2[1])
	326	{
	327	++diffNum;
	328	from = s1[1];
	329	to = s2[1];
	330	}
	331	if (s1[2] != s2[2])
	332	{
	333	++diffNum;
	334	from = s1[2];
	335	to = s2[2];
	336	}
	337	switch(from)
	338	{
	339	case 'A':
	340	switch(to)
	341	{
	342	case 'G':res = AG;break;
	343	case 'T':res = AT;break;
	344	case 'C':res = AC;break;
	345	default:
	346	errorMsg::reportError("error in codonUtility::nucsDiff.");
	347	break;
	348	}
	349	break;
	350	case 'G':
	351	switch(to)
	352	{
	353	case 'A':res = AG;break;
	354	case 'T':res = GT;break;
	355	case 'C':res = CG;break;
	356	default:
	357	errorMsg::reportError("error in codonUtility::nucsDiff.");
	358	break;
	359	}
	360	break;
	361	case 'C':
	362	switch(to)
	363	{
	364	case 'G':res = CG;break;
	365	case 'T':res = CT;break;
	366	case 'A':res = AC;break;
	367	default:
	368	errorMsg::reportError("error in codonUtility::nucsDiff.");
	369	break;
	370	}
	371	break;
	372	case 'T':
	373	switch(to)
	374	{
	375	case 'G':res = GT;break;
	376	case 'A':res = AT;break;
	377	case 'C':res = CT;break;
	378	default:
	379	errorMsg::reportError("error in codonUtility::nucsDiff.");
	380	break;
	381	}
	382	break;
	383	default:
	384	errorMsg::reportError("error in codonUtility::nucsDiff.");
	385	break;
	386	}
	387
	388	if (diffNum == 0)
	389	errorMsg::reportError("error in codonUtility::nucsDiff. Can't find different nucleotide");
	390	if (diffNum > 1)
	391	res = DIFF;
	392	return res;
	393	}
	394
	395
	396
	397	void codonUtility::initSubMatrices(const codon& cod){
	398
	399	if ((_trtvDiff.size() == cod.size()) && (_synNonsynDiff.size() == cod.size()) && (_nucDiffPlace.size() == cod.size()) && (_nucsDiff.size() == cod.size()))
	400	return;
	401
	402	_trtvDiff.resize(cod.size());
	403	_synNonsynDiff.resize(cod.size());
	404	_nucDiffPlace.resize(cod.size());
	405	_nucsDiff.resize(cod.size());
	406	for (int i = 0; i < _trtvDiff.size(); ++i)
	407	{
	408	_trtvDiff[i].resize(cod.size());
	409	_synNonsynDiff[i].resize(cod.size());
	410	_nucDiffPlace[i].resize(cod.size());
	411	_nucsDiff[i].resize(cod.size());
	412
	413	}
	414	//resizeMatrix<diffType>(_trtvDiff, cod.size(), cod.size());
	415	//resizeMatrix<replacementType>(_synNonsynDiff, cod.size(), cod.size());
	416	//resizeMatrix<nucDiffPlaceType>(_nucDiffPlace, cod.size(), cod.size());
	417	for (int i = 0; i < cod.size(); ++i){
	418	for (int j =0; j <= i; ++j){
	419	_trtvDiff[i][j] = _trtvDiff[j][i] = codonDiff(i, j, cod);
	420	_synNonsynDiff[i][j] = _synNonsynDiff[j][i] = codonReplacement(i, j, cod);
	421	_nucDiffPlace[i][j] = nucDiffPlace(i, j, cod);
	422	_nucDiffPlace[j][i] = nucDiffPlace(j, i, cod);
	423	_nucsDiff[i][j] = nucsDiff(i,j,cod);
	424	_nucsDiff[j][i] = nucsDiff(j,i,cod);
	425	}
	426	}
	427	}
	428
	429	//returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container
	430	void codonUtility::getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage)
	431	{
	432	if (sc.getAlphabet()->size() != 61)
	433	errorMsg::reportError("cannot calculate codon usage when alphabet is not codon");
	434	codonCounter.resize(61, 0);
	435	codonUsage.resize(61, 0.0);
	436	codon alph;
	437	int sum = 0;
	438	for (int s = 0; s < sc.numberOfSeqs();++s) {
	439	int id = sc.placeToId(s);
	440	for (int pos = 0; pos < sc.seqLen(); ++pos)
	441	{
	442	int cod = sc[id][pos];
	443	if (alph.isSpecific(cod))
	444	{
	445	++sum;
	446	++codonCounter[cod];
	447	}
	448	}
	449	}
	450
	451	for (int c = 0; c < codonCounter.size(); ++c)
	452	codonUsage[c] = static_cast<MDOUBLE>(codonCounter[c]) / sum;
	453	}
	454
	455
	456	//in codonUsageFile: only 3-letter-codon and frequency seperated by "\t"
	457	void codonUtility::readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &alph)
	458	{
	459	codonUsage.resize(alph.size(), 0.0);
	460	ifstream inFile(codonUsageFileName.c_str());
	461	vector<string> inFileData;
	462	putFileIntoVectorStringArray(inFile, inFileData);
	463	inFile.close();
	464	if (inFileData.empty()){
	465	errorMsg::reportError("unable to open file, or file is empty in codonUtility::readCodonUsage");
	466	}
	467
	468	vector<string>::const_iterator it = inFileData.begin();
	469	for (; it!= inFileData.end(); ++it)
	470	{
	471	if (it->empty()) //empty line
	472	continue;
	473	int endCodon = it->find_first_of("\t", 0);
	474	int startFreq = it->find_first_not_of("\t ", endCodon);
	475	if (startFreq>0)
	476	{
	477	string codonStr = it->substr(0, endCodon);
	478	string freqStr = it->substr(startFreq);
	479	MDOUBLE freq = string2double(freqStr);
	480	if(freq == 0.0) freq = EPSILON;
	481	codonUsage[alph.fromChar(codonStr, 0)] = freq;
	482	}
	483	}
	484	}
	485
	486	//calculates the CAI for the whole MSA and for each position.
	487	//The calculation is based on a pre-calculated codonUsage vector.
	488	//The calculation is based on Sharp & Li (1987) NAR, 15:1281-1295
	489	MDOUBLE codonUtility::calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site)
	490	{
	491	//the returned value: calculated as the average CAI for the MSA, rather than the geometrical mean as in Sharp & Li
	492	MDOUBLE wholeAlignmentCai = 0.0;
	493	codon alph;
	494	amino am;
	495	//1. calculate Wk = the frequency of codon k relative to the frequency of the optimal codon for that amino acid.
	496	Vdouble Wk(codonUsage.size(), 0.0);
	497	int aaId;
	498	for (aaId = 0; aaId < am.size(); ++aaId)
	499	{
	500	Vint codonsOfAa = aminoUtility::codonOf(aaId, alph);
	501	//finding the most frequent codon for this aa
	502	MDOUBLE mostFrequent = 0.0;
	503	Vint::const_iterator iter;
	504	for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter)
	505	{
	506	if (codonUsage[*iter] > mostFrequent)
	507	mostFrequent = codonUsage[*iter];
	508	}
	509
	510	//calculating Wk
	511	for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter)
	512	Wk[iter] = codonUsage[iter] / mostFrequent;
	513	}
	514
	515	//2. calculate CAI
	516	cai4site.resize(sc.seqLen(), 0.0);
	517	int pos;
	518	for (pos = 0; pos < sc.seqLen(); ++pos)
	519	{
	520	MDOUBLE cai = 0.0;
	521	int informativeCodons = 0;
	522	for (int s = 0; s < sc.numberOfSeqs();++s)
	523	{
	524	int id = sc.placeToId(s);
	525	int cod = sc[id][pos];
	526	if(!alph.isSpecific(cod))
	527	continue;
	528	cai += Wk[cod];
	529	++informativeCodons;
	530	}
	531
	532	cai /= static_cast<MDOUBLE>(informativeCodons);
	533	cai4site[pos] = cai;
	534	wholeAlignmentCai += cai;
	535	}
	536	return wholeAlignmentCai;
	537	}
	538
	539
	540
	541	bool codon::isStopCodon(const int in_id) const
	542	{
	543	if (in_id == unknown()) return false;
	544	if (in_id == gap()) return false;
	545	if ((in_id >= 0 ) && (in_id < _alphabetSize)) return false;
	546	return true;
	547	}
	548
	549	bool codon::isInitiationCodon(const int in_id) const
	550	{
	551	bool result = true;
	552	map <int,string>::const_iterator itr = _initiationIndex2codon.find(in_id);
	553	if(itr == _initiationIndex2codon.end()){
	554	result = false;
	555	}
	556	return result;
	557	}
	558
	559

+107

-0

libs/phylogeny/codon.h less more

	0	// $Id: codon.h 5975 2009-03-17 08:00:37Z rubi $
	1	#ifndef ____CODON
	2	#define ____CODON
	3
	4	#include <cassert>
	5	#include "definitions.h"
	6	#include "errorMsg.h"
	7	#include "someUtil.h"
	8	#include "alphabet.h"
	9	#include "geneticCodeHolder.h"
	10	#include <map>
	11	class codon;
	12
	13	class sequenceContainer;
	14	class codonUtility {
	15	public:
	16	enum diffType {equal =0, tr, tv, twoTrs, twoTvs ,trtv, threesub};
	17	static diffType codonDiff(const int c1, const int c2, const codon &cod);
	18	static diffType codonDiff(const int c1, const int c2) {return _trtvDiff[c1][c2];}
	19
	20	enum replacementType {sameCodon=0, synonymous, non_synonymous};
	21	static replacementType codonReplacement(const int c1, const int c2, const codon &cod);
	22	static replacementType codonReplacement(const int c1, const int c2) {return _synNonsynDiff[c1][c2];}
	23
	24	enum nucDiffPlaceType {A1=0, A2, A3,C1, C2, C3, G1,G2,G3,T1,T2,T3, EQUAL, MUL_SUB};
	25	static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod);
	26	static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon) {return _nucDiffPlace[fromCodon][targetCodon];}
	27
	28	enum nucsDiffType {AC=0, AG, AT, CG, CT, GT, SAME, DIFF}; //The difference between two codons: For exampe nucsDiff(ACT, ACG) returns GT. DIFF = more than one change.
	29	static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon, const codon &cod);
	30	static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon) {return _nucsDiff[fromCodon][targetCodon];}
	31
	32	static int aaOf(const int c1, const codon &cod);
	33	static void initSubMatrices(const codon& cod);
	34
	35	//returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container
	36	static void getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage);
	37	static void readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &inCodonAlpa);
	38	//calculates the CAI for the whole MSA and for each position.
	39	//The calculation is based on a pre-calculated codonUsage vector.
	40	static MDOUBLE calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site);
	41
	42	private:
	43	static vector<vector<diffType> > _trtvDiff;
	44	static vector<vector<replacementType> > _synNonsynDiff;
	45	static vector<vector<nucDiffPlaceType> > _nucDiffPlace;
	46	static vector<vector<nucsDiffType> > _nucsDiff;
	47	};
	48
	49
	50	class codon : public alphabet {
	51	public:
	52	explicit codon(); //default constructor: reads "nuclearCode.txt"
	53	explicit codon(const geneticCodeString& matrixFileString);
	54	virtual ~codon() {}
	55	// explicit codon( codon& other);
	56	codon& operator=(const codon& other);
	57	virtual alphabet* clone() const { return new codon(*this); }
	58	void readMatrixFromFile(const string& matrixFileName);
	59	const map <string,string> & geneticCode()const {return _geneticCode;}
	60	int unknown() const {return 64;}
	61	int gap() const {return -1;}
	62	int size() const {return _alphabetSize;} // 3 stop codon excluded
	63	int stringSize() const {return 3;} // 3 letter code.
	64	vector<int> fromString(const string& str) const;
	65	bool isStopCodon(const int in_id) const;
	66	bool isStopCodon(const string& str) const {return isStopCodon(fromChar(str));};
	67	bool isInitiationCodon(const int in_id) const;
	68	bool isInitiationCodon(const string& str) const {return isInitiationCodon(fromChar(str));};
	69	int fromChar(const string& s, const int pos=0) const;
	70	string fromInt(const int in_id) const;
	71	// "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	72	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	73
	74
	75
	76	int relations(const int charInSeq, const int charToCheck) const{
	77	if (charInSeq == -1) {
	78	errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions");
	79	}
	80	else if (charInSeq == unknown()) return 1;
	81	else if (charInSeq == charToCheck) return 1;
	82	if (charInSeq >= _alphabetSize)
	83	{
	84	string err= "";
	85	err+="charInSeq = ";
	86	err += int2string(charInSeq);
	87	err+= " _alphabetSize = ";
	88	err+=int2string(_alphabetSize);
	89	errorMsg::reportError(err);
	90	}
	91	assert(charInSeq < _alphabetSize);
	92	return 0;
	93	}
	94	private:
	95	void init(const geneticCodeString& matrixFileString);
	96	private:
	97	map <string,string> _geneticCode; //key - codon, value - amino acid
	98	map <string,int> _codon2Int;//key string of codon int= integer value of codon
	99	map <int,string> _initiationIndex2codon;//key: integer value of codon; value: string of initiation codon. the keys is an integer so that the value of the init codon can be found
	100	int _alphabetSize;
	101	};
	102
	103
	104
	105
	106	#endif

+6

-0

libs/phylogeny/codonJC.cpp less more

	0	// $Id: codonJC.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "codonJC.h"
	3
	4
	5

+47

-0

libs/phylogeny/codonJC.h less more

	0	// $Id: codonJC.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___CODON_JC
	3	#define ___CODON_JC
	4
	5	#include "replacementModel.h"
	6	#include <cmath>
	7	using namespace std;
	8
	9	namespace codonDef {
	10	const MDOUBLE Alp = 61.0;
	11	const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
	12	const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
	13	const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
	14	const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
	15	}
	16
	17	class codonJC : public replacementModel {
	18	public:
	19
	20	virtual replacementModel* clone() const { return new codonJC(*this); }// see note down:
	21	const int alphabetSize() const {return 61;}
	22
	23	explicit codonJC(){};
	24	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	25	return ((i==j) ? codonDef::odAl+codonDef::om_odAlexp(codonDef::m_alDiv_omalpd): codonDef::odAl-codonDef::odAlexp(codonDef::m_alDiv_omalpd));
	26	}
	27
	28	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	29	return ((i==j) ? -exp(codonDef::m_alDiv_omalpd): exp(codonDef::m_alDiv_omalpd)/(codonDef::Alp-1));
	30	}
	31	const MDOUBLE freq(const int i) const {return codonDef::odAl;};
	32
	33	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	34	return ((i==j) ? codonDef::alDiv_omalpexp(codonDef::m_alDiv_omalpd): codonDef::m_alDiv_omalpexp(codonDef::m_alDiv_omalpd));
	35	}
	36
	37	};
	38
	39	#endif
	40
	41	// note: according to the new C++ rules, the clone function should be like this:
	42	// virtual aaJC* clone() const { return new aaJC(*this); }
	43	// however, not all compiler support it yet. look at More Effective C++ page 126.
	44
	45
	46

+290

-0

libs/phylogeny/codonUtils.cpp less more

	0	#include "codonUtils.h"
	1	#include "numRec.h"
	2	#include <algorithm>
	3
	4
	5
	6
	7
	8	//check that the input sequences are divisable by 3
	9	void checkInputSeqLength(string codonFile){
	10	nucleotide alph;
	11	ifstream in(codonFile.c_str());
	12	sequenceContainer inputSc = recognizeFormat::readUnAligned(in, &alph);
	13	in.close();
	14	int i;
	15	for (i = 0; i < inputSc.numberOfSeqs(); ++i){
	16	int seqLen = inputSc[i].seqLen();
	17	if ((seqLen % 3) != 0){
	18	string textToPrint = "USER ERROR: unable to read sequence: " + inputSc[i].name() + "\nSequence length is not divisable by three";
	19	errorMsg::reportError(textToPrint);
	20	}
	21	}
	22	}
	23
	24	//this function convert codon sequences to amino sequences.
	25	sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph){
	26	amino aaAlph;
	27	sequenceContainer aaSc;
	28	for (int i = 0; i < codonSc.numberOfSeqs(); ++i){
	29	sequence codonSeq = codonSc[i];
	30	sequence aaSeq("", codonSeq.name(), codonSeq .remark(), codonSeq.id(), &aaAlph);
	31	for (int pos = 0; pos < codonSeq .seqLen(); ++pos)
	32	aaSeq.push_back(codonUtility::aaOf(codonSeq[pos],*codonAlph));
	33	aaSc.add(aaSeq);
	34	}
	35	if (codonSc.numberOfSeqs() != aaSc.numberOfSeqs())
	36	errorMsg::reportError("RevTrans: number of codon and Amino sequences is not the same");
	37
	38	return aaSc;
	39	}
	40
	41	// returns 1/sumPijQij
	42	MDOUBLE getMatricesNormalizationFactor(vector<stochasticProcess> & spVec,const distribution * forceDistr){
	43	MDOUBLE sumPijQij=0.0;
	44	int categor;
	45	for ( categor=0; categor<forceDistr->categories();categor++)
	46	sumPijQij+=forceDistr->ratesProb(categor)static_cast<wYangModel>(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij();
	47	if (sumPijQij ==0){
	48	errorMsg::reportError("Error in getMatricesNormalizationFactor - sumPijQij=0");
	49	}
	50	return sumPijQij;
	51	}
	52
	53	// normalize the Q matrix so average rate of substitution = 1
	54	void normalizeMatrices(vector<stochasticProcess> & spVec,const distribution * forceDistr){
	55	MDOUBLE sumPijQij=0.0;
	56	int categor;
	57	for ( categor=0; categor<forceDistr->categories();categor++)
	58	sumPijQij+=forceDistr->ratesProb(categor)static_cast<wYangModel>(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij();
	59	if (sumPijQij ==0){
	60	errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0");
	61	}
	62	for (categor=0; categor<forceDistr->categories();categor++)
	63	static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->norm(1/sumPijQij);
	64
	65	}
	66
	67	Vdouble freqCodonF3x4(const sequenceContainer &nucSc, codon * coAlph){
	68	VVdouble nucFeqPos(3);
	69	int pos= 0;
	70	int nPos = 0;
	71	for (nPos=0;nPos<3;nPos++)
	72	nucFeqPos[nPos].resize(nucSc.alphabetSize(),0.0);
	73
	74	sequenceContainer::constTaxaIterator tIt;
	75	sequenceContainer::constTaxaIterator tItEnd;
	76	tIt.begin(nucSc);
	77	tItEnd.end(nucSc);
	78	while (tIt!= tItEnd) {
	79	pos = 0;
	80	sequence::constIterator sIt;
	81	sequence::constIterator sItEnd;
	82	sIt.begin(*tIt);
	83	sItEnd.end(*tIt);
	84	while (sIt != sItEnd) {
	85	if ((sIt >= 0) && (sIt <nucFeqPos[pos%3].size())) ++nucFeqPos[pos%3][(*sIt)];
	86	if (*sIt == 4) ++nucFeqPos[pos%3][3]; //for T (4) to U (3)
	87	++sIt;
	88	++pos;
	89	}
	90	++tIt;
	91	}
	92	for (nPos=0;nPos<3;nPos++)
	93	changeCountsToFreqs(nucFeqPos[nPos]);
	94
	95
	96	Vdouble freqCodon(coAlph->size(),0.0);
	97
	98	nucleotide n;
	99	for (int c = 0; c<freqCodon.size();c++){
	100
	101	string s = coAlph->fromInt(c);
	102	int nuc0 = n.fromChar(s[0]);
	103	int nuc1 = n.fromChar(s[1]);
	104	int nuc2 = n.fromChar(s[2]);
	105	freqCodon[c] = nucFeqPos[0][nuc0]nucFeqPos[1][nuc1]nucFeqPos[2][nuc2];
	106	}
	107
	108	MDOUBLE sum=0;
	109	for (int i=0;i<coAlph->size();i++){
	110	sum+=freqCodon[i];
	111	}
	112	MDOUBLE stopFreq = 1.0 - sum;
	113	MDOUBLE ep = stopFreq/coAlph->size();
	114	for (int i=0;i<coAlph->size();i++){
	115	freqCodon[i]+=ep;
	116	}
	117
	118	return freqCodon;
	119
	120
	121	}
	122
	123
	124	/***********************************************
	125	The following functions are useful for the selecton server, for creating a
	126	Rasmol script and for setting the color value of each site
	127	***********************************************/
	128
	129
	130	// Positive significant in color dark yellow, non-sig. positive selection - light yellow.
	131	// Purifying selection in shades of bordeaux
	132	vector<vector<int> > create7ColorValues(){
	133	vector<vector<int> > colorsValue;
	134	colorsValue.resize(7);
	135	for (int i=0;i<7;i++)
	136	colorsValue[i].resize(3);
	137	// RGB values of the differnt color bins
	138	colorsValue[0][0] = 255; //yellow positive significant
	139	colorsValue[0][1] = 220 ;
	140	colorsValue[0][2] = 0;
	141
	142	colorsValue[1][0] =255 ; //light yellow - not significant positive selection
	143	colorsValue[1][1] = 255;
	144	colorsValue[1][2] = 120;
	145
	146	//three categories of not significant negative selection according to bordeaux shades (colors like conseq/consurf)
	147
	148	colorsValue[2][0] = 255; //white
	149	colorsValue[2][1] = 255;
	150	colorsValue[2][2] = 255;
	151
	152	colorsValue[3][0] = 252;
	153	colorsValue[3][1] = 237;
	154	colorsValue[3][2] = 244;
	155
	156	colorsValue[4][0] = 250;
	157	colorsValue[4][1] = 201;
	158	colorsValue[4][2] = 222;
	159
	160	colorsValue[5][0] = 240;
	161	colorsValue[5][1] = 125;
	162	colorsValue[5][2] = 171;
	163
	164	//significant negative selection
	165	colorsValue[6][0] = 130;
	166	colorsValue[6][1] = 67;
	167	colorsValue[6][2] = 96;
	168
	169	return colorsValue;
	170	}
	171
	172	//this functions creates a rasmol script (assumes positions are the same between the alignment and the PDB)
	173	void outToRasmolFile(string fileName,vector<int>& color4Site){
	174	ofstream out(fileName.c_str());
	175	vector<vector<int> > colorsValue = create7ColorValues();
	176	int numberOfColor = colorsValue.size();
	177	vector<vector<int> > colors; //for each color (1-9/3) holds vector of sites.
	178	colors.resize(numberOfColor+1);
	179	int i;
	180	for (i=0;i<color4Site.size();i++){
	181	int color=color4Site[i];
	182	if (color>numberOfColor){
	183	errorMsg::reportError("Error in outToColorFile - unknown color");
	184	}
	185	colors[color].push_back(i+1); //add site (position in the vector +1)
	186	}
	187	out<<"select all"<<endl;
	188	out<<"color [200,200,200]"<<endl<<endl;
	189
	190	for (int c=1;c<numberOfColor+1;c++){
	191	out<<"select ";
	192	for (i=0;i<colors[c].size();i++){
	193	if (i==0)
	194	out<<colors[c][i];
	195	else if ((i+1)%6==0)
	196	out<<endl<<"select selected or "<<colors[c][i];
	197
	198	else out<<" , "<<colors[c][i];
	199	}
	200	out<<endl<<"select selected and :a"<<endl;
	201	out<<"color [" <<colorsValue[c-1][0]<<","<<colorsValue[c-1][1]<<","<<colorsValue[c-1][2]<<"]"<<endl;
	202	out<<"spacefill"<<endl<<endl;
	203	}
	204
	205	out.close();
	206	}
	207
	208
	209	// a file with color-coding from Ka/Ks values to color-bins
	210	void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV,
	211	const sequence & refSeq, string fileName,codon *co) {
	212	vector<int> colors;
	213	int numOfSitesinAln = kaksVec.size();
	214	Vdouble negativesKaksVec,negativesSite;
	215	negativesKaksVec.clear();
	216	negativesSite.clear();
	217	int i,gapsInRefSeq=0;
	218
	219	for (i=0;i<numOfSitesinAln;i++){
	220	if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++;
	221	}
	222
	223	// first dealing with positive selection
	224	colors.resize(numOfSitesinAln-gapsInRefSeq);
	225	int gap=0;
	226	for (i=0;i<numOfSitesinAln;i++){
	227	if (codonUtility::aaOf(refSeq[i],*co) == -1){
	228	gap++;
	229	continue;
	230	}
	231	if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1
	232	colors[i-gap]=1;
	233	else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant"
	234	colors[i-gap]=2;
	235	else {
	236	negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1
	237	negativesSite.push_back(i-gap); //add the number of site of the kaks
	238	}
	239
	240	}
	241
	242	// now dealing with purifying selection
	243	Vdouble orderVec = negativesKaksVec;
	244	if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly)
	245	sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups
	246	MDOUBLE percentileNum = 5.0;
	247	int percentileNumInt = 5;
	248	Vdouble maxScoreForPercentile(percentileNumInt);
	249	if (orderVec.size()>0) {
	250	maxScoreForPercentile[0] = orderVec[0];
	251	for (int c = 1; c < percentileNumInt; ++c){
	252	int place = (int)((c / percentileNum) * negativesKaksVec.size());
	253	MDOUBLE maxScore = orderVec[place];
	254	maxScoreForPercentile[c] = maxScore;
	255	}
	256	}
	257
	258	//loop over all the Ka/Ks < 1
	259	for (int j=0; j < negativesKaksVec.size(); ++j){
	260	MDOUBLE r = negativesKaksVec[j]; //the kaks of the site.
	261	int s = (int)negativesSite[j]; //the site.
	262	if (r > maxScoreForPercentile[4])
	263	colors[s] = 3;
	264	else if (r > maxScoreForPercentile[3])
	265	colors[s] = 4;
	266	else if (r> maxScoreForPercentile[2])
	267	colors[s] = 5;
	268	else if (r > maxScoreForPercentile[1])
	269	colors[s] = 6;
	270	else if (r >= maxScoreForPercentile[0])
	271	colors[s] = 7;
	272	}
	273	//print to file
	274	ofstream out(fileName.c_str());
	275	gap=0;
	276	amino aminoAcid;
	277	LOG(5,<<"Printing selection color bins to file"<<endl);
	278	for (i=0;i<refSeq.seqLen();i++){
	279	int aa = codonUtility::aaOf(refSeq[i], *co);
	280	if (aa==-1){
	281	gap++;
	282	continue;
	283	}
	284	string aaStr = aminoAcid.fromInt(aa);
	285	out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap];
	286	out<<endl;
	287	}
	288	out.close();
	289	}

+37

-0

libs/phylogeny/codonUtils.h less more

	0	#ifndef CODON_UTILS_H
	1	#define CODON_UTILS_H
	2
	3	#include <iostream>
	4	#include "nucleotide.h"
	5	#include "codon.h"
	6	#include "amino.h"
	7	#include "logFile.h"
	8	#include "fastaFormat.h"
	9	#include "clustalFormat.h"
	10	#include "recognizeFormat.h"
	11	#include "someUtil.h"
	12	#include "definitions.h"
	13	#include "sequenceContainer.h"
	14	#include "stochasticProcess.h"
	15	#include "wYangModel.h"
	16	#include "evaluateCharacterFreq.h"
	17	#include "geneticCodeHolder.h"
	18	#include "codon.h"
	19	using namespace std;
	20
	21
	22
	23	void checkInputSeqLength(string codonFile);
	24	sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph);
	25	vector<vector<int> > create7ColorValues();
	26	void outToRasmolFile(string fileName,vector<int>& color4Site);
	27
	28	MDOUBLE getMatricesNormalizationFactor(vector<stochasticProcess> & spVec,const distribution * forceDistr);
	29	void normalizeMatrices(vector<stochasticProcess> & spVec,const distribution * forceDistr);
	30
	31	Vdouble freqCodonF3x4(const sequenceContainer &nucSc,codon *coAlph);
	32
	33	void kaks2Color(const Vdouble & kaksVec,const Vdouble &lowerBoundV,
	34	const sequence & refSeq, string fileName,codon *co);
	35
	36	#endif

+206

-0

libs/phylogeny/computeCounts.cpp less more

	0	// $Id: computeCounts.cpp 9899 2011-10-11 19:56:48Z rubi $
	1
	2	#include "computeCounts.h"
	3	void computeCounts::computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
	4	const computePijHom& pi,
	5	const stochasticProcess& sp,
	6	const suffStatGlobalHomPos& cup,
	7	const suffStatGlobalHomPos& cdown,
	8	const MDOUBLE weight,
	9	const doubleRep posProb,
	10	const tree::nodeP nodeSon,
	11	countTableComponentHom& _ctc,
	12	const MDOUBLE rateCategorProb
	13	)
	14	{
	15	assert(posProb>0.0);
	16	if (weight == 0) return;
	17	int alph1,alph2;
	18	for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
	19	for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
	20
	21	doubleRep tmp = cup.get(nodeSon->id(),alph1) *
	22	cdown.get(nodeSon->id(),alph2) *
	23	pi.getPij(nodeSon->id(),alph1,alph2)*
	24	sp.freq(alph1)
	25	* rateCategorProb
	26	/
	27	posProb;
	28	_ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
	29	}
	30	}
	31	}
	32
	33	//old
	34	void computeCounts::computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc,
	35	const computePijHom& pi,
	36	const stochasticProcess& sp,
	37	const suffStatGlobalHomPos& cup,
	38	const suffStatGlobalHomPos& cdown,
	39	const MDOUBLE weight,
	40	const doubleRep posProb,
	41	const tree::nodeP nodeSon,
	42	countTableComponentHom& _ctc,
	43	const MDOUBLE globalLocalRateCategorProb)
	44	{
	45	assert(posProb>0.0);
	46	if (weight == 0) return;
	47	int alph1,alph2;
	48	for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
	49	for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
	50	//here we multiply: P(a,b\|globalRate_x,localRate_y,D)*P(globalRate_x,localRate_y\|D)
	51	//which is: (updown)[(P(D\|globalRate_x,localRate_y)*GlobalLocalRateCategoriesProb)/posProb]
	52	doubleRep tmp = (cup.get(nodeSon->id(),alph1) *
	53	cdown.get(nodeSon->id(),alph2)) *
	54	(pi.getPij(nodeSon->id(),alph1,alph2) *
	55	sp.freq(alph1) *
	56	globalLocalRateCategorProb
	57	/
	58	posProb);
	59	_ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
	60	}
	61	}
	62	}
	63
	64	//new
	65	void computeCounts::computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc,
	66	const computePijHom& pi,
	67	const stochasticProcess& sp,
	68	const suffStatGlobalHomPos& cup,
	69	const suffStatGlobalHomPos& cdown,
	70	const MDOUBLE weight,
	71	const VdoubleRep posProbVec,
	72	const tree::nodeP nodeSon,
	73	countTableComponentHom& _ctc)
	74	{
	75	if (weight == 0) return;
	76	int alph1,alph2;
	77	doubleRep posProb(0.0);
	78	for(int globalRateCat = 0;globalRateCat < posProbVec.size();++globalRateCat){
	79	posProb += posProbVec[globalRateCat];
	80	}
	81	for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
	82	for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
	83	//here we multiply: P(a,b\|globalRate_x,localRate_y,D)*P(globalRate_x,localRate_y\|D)
	84	//which is: (updown)[(P(D\|globalRate_x,localRate_y)*GlobalLocalRateCategoriesProb)/posProb]
	85	doubleRep tmp = (cup.get(nodeSon->id(),alph1) *
	86	cdown.get(nodeSon->id(),alph2)) *
	87	(pi.getPij(nodeSon->id(),alph1,alph2) *
	88	sp.freq(alph1)
	89	/
	90	posProb);
	91	_ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
	92	}
	93	}
	94	}
	95
	96	void computeCounts::computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
	97	const computePijHom& pi,
	98	const stochasticProcess& sp,
	99	const suffStatGlobalHomPos& cup,
	100	const suffStatGlobalHomPos& cdown, //_cdown[categor][letterAtRoot]
	101	const MDOUBLE weight,
	102	const doubleRep posProb,
	103	const tree::nodeP nodeSon,
	104	countTableComponentHom& _ctc, //_computeCountsV[mynode->id()][letterAtRoot][categor]
	105	const MDOUBLE rateCategorProb,
	106	const int letterInRoot
	107	)
	108	{
	109	assert(posProb>0.0);
	110	if (weight == 0) return;
	111	int alph1,alph2;
	112	for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
	113	for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
	114	doubleRep tmp = cup.get(nodeSon->id(),alph1) *
	115	cdown.get(nodeSon->id(),alph2) * // down was already given with specific root
	116	pi.getPij(nodeSon->id(),alph1,alph2)*
	117	sp.freq(alph1) // fixed root? or already computed byt the downAlg?
	118	* rateCategorProb
	119	//* sp.freq(letterInRoot) // to account for the additional letterAtRoot loop - move it to after getCounts
	120	/posProb;
	121	_ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
	122	}
	123	}
	124	}
	125
	126
	127
	128	void computeCounts::fillCountTableComponentGam(countTableComponentGam& ctcGam,
	129	const stochasticProcess& sp,
	130	const sequenceContainer& sc,
	131	const computePijGam& pij0,
	132	const suffStatGlobalGam& cup,
	133	const suffStatGlobalGam& cdown,
	134	const Vdouble * weights,
	135	tree::nodeP nodeSon,
	136	const VdoubleRep& posProbVec) {
	137	ctcGam.countTableComponentAllocatePlace(sp.alphabetSize(),sp.categories());
	138	for (int rateCat =0; rateCat< sp.categories(); ++ rateCat) {
	139	fillCountTableComponentGamSpecRateCategor(rateCat,ctcGam[rateCat],sp,
	140	sc,pij0[rateCat],
	141	cup,cdown,weights,posProbVec,nodeSon);
	142	}
	143	}
	144
	145	void computeCounts::fillCountTableComponentGamSpecRateCategor(const int rateCategor,
	146	countTableComponentHom& ctcHom,
	147	const stochasticProcess& sp,
	148	const sequenceContainer& sc,
	149	const computePijHom& pi,
	150	const suffStatGlobalGam& cup,
	151	const suffStatGlobalGam& cdown,
	152	const Vdouble * weights,
	153	const VdoubleRep& posProbVec, //prob of the position with gamma
	154	tree::nodeP nodeSon) {
	155	computeCounts cc;
	156	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	157	MDOUBLE weig = (weights ? (*weights)[pos] : 1.0);
	158	cc.computeCountsNodeFatherNodeSonHomPos(sc,pi,sp,cup[pos][rateCategor],
	159	cdown[pos][rateCategor],
	160	weig,posProbVec[pos],nodeSon,
	161	ctcHom,sp.ratesProb(rateCategor));
	162	}
	163	}
	164	/*
	165	void computeCounts::computeCountsNodeXNodeYHomPos(
	166	const tree::nodeP nodeX,
	167	const tree::nodeP nodeY) {
	168
	169	const tree::nodeP nodeFather = nodeSon->father();
	170	_ctc.zero();
	171	if (_weight!=NULL) { // this is one of the MAIN LOOPS. no "if"s deep inside it!
	172	for (int pos=0; pos< _pi.seqLen(); ++pos) {
	173	if ((*_weight)[pos] == 0) continue;
	174	for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) {
	175	for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) {
	176	for (int rate =0; rate< _pi.categories(); ++rate) {
	177	MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) *
	178	_cdown.get(nodeSon->id(),pos,rate,alph2) *
	179	_pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)*
	180	_pi.stocProcessFromPos(pos)->freq(alph1)/
	181	_cprobAtEachPos.getProb(pos);
	182	_ctc.addToCounts(alph1,alph2,rate,tmp(_weight)[pos]);
	183	}
	184	}
	185	}
	186	}
	187	}
	188	else {
	189	for (int pos=0; pos< _pi.seqLen(); ++pos) {
	190	for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) {
	191	for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) {
	192	for (int rate =0; rate< _pi.categories(); ++rate) {
	193	MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) *
	194	_cdown.get(nodeSon->id(),pos,rate,alph2) *
	195	_pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)*
	196	_pi.stocProcessFromPos(pos)->freq(alph1)/
	197	_cprobAtEachPos.getProb(pos);
	198	_ctc.addToCounts(alph1,alph2,rate,tmp);
	199	}
	200	}
	201	}
	202	}
	203	}
	204	*/
	205

+92

-0

libs/phylogeny/computeCounts.h less more

	0	// $Id: computeCounts.h 9903 2011-10-11 20:16:28Z rubi $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#ifndef ___COMPUTE_COUNTS
	6	#define ___COMPUTE_COUNTS
	7
	8	#include "definitions.h"
	9	#include "countTableComponent.h"
	10	#include "sequenceContainer.h"
	11	#include "computePijComponent.h"
	12	#include "suffStatComponent.h"
	13
	14	// things included for the function "fillCountTableComponentGam"
	15	#include "sequenceContainer.h"
	16
	17	class computeCounts {
	18	public:
	19	explicit computeCounts() {};
	20	void computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
	21	const computePijHom& pi,
	22	const stochasticProcess& sp,
	23	const suffStatGlobalHomPos& cup,
	24	const suffStatGlobalHomPos& cdown,
	25	const MDOUBLE weight,
	26	const doubleRep posProb,
	27	const tree::nodeP nodeSon,
	28	countTableComponentHom& _ctc,
	29	const MDOUBLE rateCategorProb = 1.0); //CODE_RED
	30
	31	//Proportional rate implementation - old
	32	void computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc,
	33	const computePijHom& pi,
	34	const stochasticProcess& sp,
	35	const suffStatGlobalHomPos& cup,
	36	const suffStatGlobalHomPos& cdown,
	37	const MDOUBLE weight,
	38	const doubleRep posProb,
	39	const tree::nodeP nodeSon,
	40	countTableComponentHom& _ctc,
	41	const MDOUBLE globalLocalRateCategorProb = 1.0); //CODE_RED
	42	//Proportional rate implementation - new
	43	void computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc,
	44	const computePijHom& pi,
	45	const stochasticProcess& sp,
	46	const suffStatGlobalHomPos& cup,
	47	const suffStatGlobalHomPos& cdown,
	48	const MDOUBLE weight,
	49	const VdoubleRep posProbVec,
	50	const tree::nodeP nodeSon,
	51	countTableComponentHom& _ctc);
	52
	53
	54	void computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
	55	const computePijHom& pi,
	56	const stochasticProcess& sp,
	57	const suffStatGlobalHomPos& cup,
	58	const suffStatGlobalHomPos& cdown,
	59	const MDOUBLE weight,
	60	const doubleRep posProb,
	61	const tree::nodeP nodeSon,
	62	countTableComponentHom& _ctc,
	63	const MDOUBLE rateCategorProb,
	64	const int letterInRoot);
	65
	66
	67
	68	void fillCountTableComponentGam(countTableComponentGam& ctcGam,
	69	const stochasticProcess& sp,
	70	const sequenceContainer& sc,
	71	const computePijGam& pij0,
	72	const suffStatGlobalGam& cup,
	73	const suffStatGlobalGam& cdown,
	74	const Vdouble * weights,
	75	tree::nodeP nodeSon,
	76	const VdoubleRep& posProbVec);
	77
	78	void fillCountTableComponentGamSpecRateCategor(const int rateCategor,
	79	countTableComponentHom& ctcHom,
	80	const stochasticProcess& sp,
	81	const sequenceContainer& sc,
	82	const computePijHom& pi,
	83	const suffStatGlobalGam& cup,
	84	const suffStatGlobalGam& cdown,
	85	const Vdouble * weights,
	86	const VdoubleRep& posProbVec, //prob of the position with gamma
	87	tree::nodeP nodeSon);
	88	};
	89
	90
	91	#endif

+221

-0

libs/phylogeny/computeDownAlg.cpp less more

	0	// $Id: computeDownAlg.cpp 4585 2008-08-05 15:02:58Z cohenofi $
	1
	2	#include "definitions.h"
	3	#include "computeDownAlg.h"
	4	#include "treeIt.h"
	5
	6
	7	void computeDownAlg::fillComputeDown(const tree& et,
	8	const sequenceContainer& sc,
	9	const int pos,
	10	const computePijHom& pi,
	11	suffStatGlobalHomPos& ssc,
	12	const suffStatGlobalHomPos& cup){
	13	ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize());
	14	treeIterTopDownConst tIt(et);
	15	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	16	int letter,letterInFather,bro,letterInSon;
	17	if (mynode->father()==NULL) {// if root
	18	for(letter=0; letter<pi.alphabetSize();letter++) {
	19	ssc.set(mynode->id(),letter,1.0);
	20	}
	21	mynode = tIt.next(); //continue
	22	}
	23	tree::nodeP fatherNode=mynode->father();
	24	const int n_bro=fatherNode->getNumberOfSons();
	25	for(letter=0; letter<pi.alphabetSize();letter++) {//alpha
	26	doubleRep totalProb=1.0;
	27	doubleRep fatherTerm=0;
	28	if (fatherNode->father()!=NULL) {
	29	for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)
	30	fatherTerm += pi.getPij(fatherNode->id(),letter,letterInFather)*
	31	ssc.get(fatherNode->id(),letterInFather);
	32	}
	33	else {
	34	fatherTerm=1.0;
	35	}
	36	doubleRep brotherTerm=1.0;
	37	for(bro = 0; bro < n_bro; bro++) {
	38	tree::nodeP brother = fatherNode->getSon(bro);
	39	if (brother != mynode) {
	40	doubleRep tmp_bro=0.0;
	41	for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
	42	tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
	43	cup.get(brother->id(),letterInSon);
	44	}
	45	brotherTerm *=tmp_bro;
	46	}
	47	}
	48	totalProb = fatherTerm * brotherTerm;
	49	ssc.set(mynode->id(),letter,totalProb);
	50	}
	51	}
	52	}
	53
	54
	55	//use Pij(t) from the stochastic process instead of precomputed probabilities (via the computePijHom class)
	56	void computeDownAlg::fillComputeDown(const tree& et,
	57	const sequenceContainer& sc,
	58	const int pos,
	59	const stochasticProcess& sp,
	60	suffStatGlobalHomPos& ssc,
	61	const suffStatGlobalHomPos& cup){
	62	ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize());
	63	treeIterTopDownConst tIt(et);
	64	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	65	int letter, letterInFather, bro, letterInSon;
	66	if (mynode->isRoot()) {// if root: set all values to 1.0
	67	for(letter = 0; letter < sp.alphabetSize(); letter++) {
	68	ssc.set(mynode->id(), letter, 1.0);
	69	}
	70	mynode = tIt.next(); //continue
	71	}
	72	tree::nodeP fatherNode = mynode->father();
	73	const int n_bro = fatherNode->getNumberOfSons();
	74	for(letter = 0; letter < sp.alphabetSize(); letter++) {
	75	doubleRep totalProb=1.0;
	76	doubleRep fatherTerm=0;
	77	if (fatherNode->isRoot())
	78	{
	79	fatherTerm = 1.0;
	80	}
	81	else
	82	{
	83	for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++)
	84	{
	85	MDOUBLE dist = fatherNode->dis2father() * sp.getGlobalRate();
	86	fatherTerm += sp.Pij_t(letter, letterInFather, dist)
	87	* ssc.get(fatherNode->id(), letterInFather);
	88	}
	89	}
	90	doubleRep brotherTerm = 1.0;
	91	for(bro = 0; bro < n_bro; bro++) {
	92	tree::nodeP brother = fatherNode->getSon(bro);
	93	if (brother != mynode) {
	94	doubleRep tmp_bro=0.0;
	95	for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++)
	96	{
	97	MDOUBLE dist = brother->dis2father() * sp.getGlobalRate();
	98	tmp_bro += sp.Pij_t(letter, letterInSon, dist)
	99	* cup.get(brother->id(), letterInSon);
	100	}
	101	brotherTerm *= tmp_bro;
	102	}
	103	}
	104	totalProb = fatherTerm * brotherTerm;
	105	ssc.set(mynode->id(), letter, totalProb);
	106	}
	107	}
	108	}
	109
	110
	111	//compute probabilities with a site-specific rate
	112	void computeDownAlg::fillComputeDownSpecificRate(const tree& et,
	113	const sequenceContainer& sc,
	114	const int pos,
	115	const stochasticProcess& sp,
	116	suffStatGlobalHomPos& ssc,
	117	const suffStatGlobalHomPos& cup,
	118	const MDOUBLE gRate){
	119	ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize());
	120	treeIterTopDownConst tIt(et);
	121	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	122	int letter, letterInFather, bro, letterInSon;
	123	if (mynode->isRoot()) {// if root: set all values to 1.0
	124	for(letter = 0; letter < sp.alphabetSize(); letter++) {
	125	ssc.set(mynode->id(), letter, 1.0);
	126	}
	127	mynode = tIt.next(); //continue
	128	}
	129	tree::nodeP fatherNode = mynode->father();
	130	const int n_bro = fatherNode->getNumberOfSons();
	131	for(letter = 0; letter < sp.alphabetSize(); letter++) {
	132	doubleRep totalProb=1.0;
	133	doubleRep fatherTerm=0;
	134	if (fatherNode->isRoot())
	135	{
	136	fatherTerm = 1.0;
	137	}
	138	else
	139	{
	140	for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++)
	141	{
	142	MDOUBLE dist = fatherNode->dis2father() * gRate * sp.getGlobalRate();
	143	fatherTerm += sp.Pij_t(letter, letterInFather, dist)
	144	* ssc.get(fatherNode->id(), letterInFather);
	145	}
	146	}
	147	doubleRep brotherTerm = 1.0;
	148	for(bro = 0; bro < n_bro; bro++) {
	149	tree::nodeP brother = fatherNode->getSon(bro);
	150	if (brother != mynode) {
	151	doubleRep tmp_bro=0.0;
	152	for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++)
	153	{
	154	MDOUBLE dist = brother->dis2father() * gRate * sp.getGlobalRate();
	155	tmp_bro += sp.Pij_t(letter, letterInSon, dist)
	156	* cup.get(brother->id(), letterInSon);
	157	}
	158	brotherTerm *= tmp_bro;
	159	}
	160	}
	161	totalProb = fatherTerm * brotherTerm;
	162	ssc.set(mynode->id(), letter, totalProb);
	163	}
	164	}
	165	}
	166
	167	// The filled sscGivenRoot is using the "Gam" class (over all rate categories) for placing letter@root hidden state
	168	void computeDownAlg::fillComputeDownNonReversible(const tree& et,
	169	const sequenceContainer& sc,
	170	const int pos,
	171	const computePijHom& pi,
	172	suffStatGlobalGamPos& sscGivenRoot,
	173	const suffStatGlobalHomPos& cup)
	174	{
	175	sscGivenRoot.allocatePlace(pi.alphabetSize(),et.getNodesNum(), pi.alphabetSize());
	176	treeIterTopDownConst tIt(et);
	177	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	178	int letter,letterInFather,bro,letterInSon;
	179	if (mynode->father()==NULL) {//root
	180	for (int letterAtRoot=0; letterAtRoot<pi.alphabetSize();letterAtRoot++){
	181	for(letter=0; letter<pi.alphabetSize();letter++) {
	182	MDOUBLE ind = (letterAtRoot==letter?1.0:0.0);
	183	sscGivenRoot.set(letterAtRoot,mynode->id(),letter,ind);
	184	}
	185	}
	186	mynode = tIt.next(); //continue
	187	}
	188	tree::nodeP fatherNode=mynode->father();
	189	const int n_bro=fatherNode->getNumberOfSons();
	190	for(int letterAtRoot=0; letterAtRoot<pi.alphabetSize();letterAtRoot++) {//root state
	191	for(letter=0; letter<pi.alphabetSize();letter++) {//letter for current down calc (at father of node)
	192	doubleRep totalProb=1.0;
	193	doubleRep fatherTerm=0;
	194	//down of father
	195	if (fatherNode->father()!=NULL) { // not son of root
	196	for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)//father of father
	197	fatherTerm += pi.getPij(fatherNode->id(),letterInFather,letter)*
	198	sscGivenRoot.get(letterAtRoot,fatherNode->id(),letterInFather);
	199	}
	200	else {//son of root
	201	fatherTerm=(letterAtRoot==letter?1.0:0.0);
	202	}
	203	doubleRep brotherTerm=1.0;
	204	for(bro = 0; bro < n_bro; bro++) {
	205	tree::nodeP brother = fatherNode->getSon(bro);
	206	if (brother != mynode) {
	207	doubleRep tmp_bro=0.0;
	208	for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
	209	tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
	210	cup.get(brother->id(),letterInSon);
	211	}
	212	brotherTerm *=tmp_bro;
	213	}
	214	}
	215	totalProb = fatherTerm * brotherTerm;
	216	sscGivenRoot.set(letterAtRoot,mynode->id(),letter,totalProb);
	217	}
	218	}
	219	}
	220	}⏎

+49

-0

libs/phylogeny/computeDownAlg.h less more

	0	// $Id: computeDownAlg.h 3107 2007-12-27 12:38:05Z adist $
	1
	2	#ifndef ___COMPUTE_DOWN_ALG
	3	#define ___COMPUTE_DOWN_ALG
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "suffStatComponent.h"
	8	#include "sequenceContainer.h"
	9	#include "computePijComponent.h"
	10
	11
	12	class computeDownAlg {
	13	public:
	14	void fillComputeDown(const tree& et,
	15	const sequenceContainer& sc,
	16	const int pos,
	17	const computePijHom& pi,
	18	suffStatGlobalHomPos& ssc,
	19	const suffStatGlobalHomPos& cup);
	20
	21	void fillComputeDown(const tree& et,
	22	const sequenceContainer& sc,
	23	const int pos,
	24	const stochasticProcess& sp,
	25	suffStatGlobalHomPos& ssc,
	26	const suffStatGlobalHomPos& cup);
	27
	28	void fillComputeDownSpecificRate(const tree& et,
	29	const sequenceContainer& sc,
	30	const int pos,
	31	const stochasticProcess& sp,
	32	suffStatGlobalHomPos& ssc,
	33	const suffStatGlobalHomPos& cup,
	34	const MDOUBLE gRate);
	35
	36	/** compute the down computation for a non-reversible model:
	37	each down computation is conditioned on the state at the root.
	38	This means that the vector field is of one additional dimension (the alphabet at the root)
	39	and hence the use of the suffStatGlobalGamPos (=vector<suffStatGlobalHomPos>)
	40	**/
	41	void fillComputeDownNonReversible(const tree& et,
	42	const sequenceContainer& sc,
	43	const int pos,
	44	const computePijHom& pi,
	45	suffStatGlobalGamPos& sscGivenRoot,
	46	const suffStatGlobalHomPos& cup);
	47	};
	48	#endif

+353

-0

libs/phylogeny/computeJumps.cpp less more

	0	#include "computeJumps.h"
	1	#include "talRandom.h"
	2	#include "someUtil.h"
	3	#include "matrixUtils.h"
	4	#include <algorithm>
	5
	6
	7	computeJumps::computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r, const int maxNumOfChangesPerBranchSum)
	8	: _Lambda1(Lambda1), _Lambda2(Lambda2),_maxNumOfChangesPerBranchSum(maxNumOfChangesPerBranchSum)
	9	{
	10	if(_Lambda1==_Lambda2)
	11	_Lambda1+=EPSILON; // Patch: fix a BUG, if gain==loss the probability of transition from 0 to 1 given states start==End==1, is NA, thus add epsilon
	12
	13	_gFuncStart0 = gFunc(_Lambda1, _Lambda2, r);
	14	_gFuncStart0MinusR = gFunc(_Lambda1, _Lambda2, -r);
	15	_gFuncStart1 = gFunc(_Lambda2, _Lambda1, r);
	16	_gFuncStart1MinusR = gFunc(_Lambda2, _Lambda1, -r);
	17	}
	18	computeJumps::~computeJumps()
	19	{
	20	}
	21
	22
	23	/********************************************************************************************
	24	getExpectation
	25	*********************************************************************************************/
	26	MDOUBLE computeJumps::getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId)
	27	{
	28	if(BranchLength>=0){
	29	if(fromId==0 && toId==1){ // Gain
	30	if(terminalStart==0 && terminalEnd==1)
	31	return gainExpGiven01(BranchLength);
	32	if(terminalStart==0 && terminalEnd==0)
	33	return gainExpGiven00(BranchLength);
	34	if(terminalStart==1 && terminalEnd==1)
	35	return gainExpGiven11(BranchLength);
	36	else //(terminalStart==1 && terminalEnd==0)
	37	return gainExpGiven10(BranchLength);
	38	}
	39	if(fromId==1 && toId==0){ // Loss
	40	if(terminalStart==0 && terminalEnd==1)
	41	return lossExpGiven01(BranchLength);
	42	if(terminalStart==0 && terminalEnd==0)
	43	return lossExpGiven00(BranchLength);
	44	if(terminalStart==1 && terminalEnd==1)
	45	return lossExpGiven11(BranchLength);
	46	else //(terminalStart==1 && terminalEnd==0)
	47	return lossExpGiven10(BranchLength);
	48	}
	49	else
	50	return 0;
	51	}
	52	else
	53	return 0;
	54
	55	}
	56	/********************************************************************************************
	57	*********************************************************************************************/
	58	MDOUBLE computeJumps::getTotalExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd)
	59	{
	60	if(BranchLength>=0){
	61	if(terminalStart==0 && terminalEnd==1)
	62	return m01(BranchLength);
	63	if(terminalStart==0 && terminalEnd==0)
	64	return m00(BranchLength);
	65	if(terminalStart==1 && terminalEnd==1)
	66	return m11(BranchLength);
	67	else //(terminalStart==1 && terminalEnd==0)
	68	return m10(BranchLength);
	69	}
	70	else
	71	return 0;
	72
	73	}
	74
	75
	76	/********************************************************************************************
	77	gainExpGivenXY lossExpGivenXY
	78	// Note: divide by Pij, since the computation is gainExp and End=0 given start=0
	79	*********************************************************************************************/
	80	MDOUBLE computeJumps::gainExpGiven01(MDOUBLE BranchLength){
	81	return 0.5*(m01(BranchLength) +Pij_t(0,1,BranchLength))/Pij_t(0,1,BranchLength);
	82	}
	83	MDOUBLE computeJumps::gainExpGiven00(MDOUBLE BranchLength){
	84	return 0.5*(m00(BranchLength)/Pij_t(0,0,BranchLength));
	85	}
	86	MDOUBLE computeJumps::gainExpGiven11(MDOUBLE BranchLength){
	87	return 0.5*(m11(BranchLength)/Pij_t(1,1,BranchLength) ); //???
	88	}
	89	MDOUBLE computeJumps::gainExpGiven10(MDOUBLE BranchLength){
	90	return m10(BranchLength)/Pij_t(1,0,BranchLength) - lossExpGiven10(BranchLength); //???
	91	}
	92	//////////////////////////////////////////////////////////////////////////
	93	MDOUBLE computeJumps::lossExpGiven01(MDOUBLE BranchLength){
	94	return m01(BranchLength)/Pij_t(0,1,BranchLength) - gainExpGiven01(BranchLength); //???
	95	}
	96	MDOUBLE computeJumps::lossExpGiven00(MDOUBLE BranchLength){
	97	return m00(BranchLength)/Pij_t(0,0,BranchLength) - gainExpGiven00(BranchLength); //???
	98	}
	99	MDOUBLE computeJumps::lossExpGiven11(MDOUBLE BranchLength){
	100	return m11(BranchLength)/Pij_t(1,1,BranchLength) - gainExpGiven11(BranchLength); //???
	101	}
	102	MDOUBLE computeJumps::lossExpGiven10(MDOUBLE BranchLength){
	103	return 0.5*(m10(BranchLength) + Pij_t(1,0,BranchLength) )/Pij_t(1,0,BranchLength); //???
	104	//return m10(BranchLength)/Pij_t(1,0,BranchLength) - gainExpGiven10(BranchLength); //???
	105	}
	106
	107
	108
	109	/********************************************************************************************
	110	getProbability
	111	*********************************************************************************************/
	112	MDOUBLE computeJumps::getProb(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId)
	113	{
	114	if(BranchLength>=0){
	115	if(fromId==0 && toId==1){ // Gain
	116	if(terminalStart==0 && terminalEnd==1)
	117	return gainProbGiven01(BranchLength);
	118	if(terminalStart==0 && terminalEnd==0)
	119	return gainProbGiven00(BranchLength);
	120	if(terminalStart==1 && terminalEnd==1)
	121	return gainProbGiven11(BranchLength);
	122	else //(terminalStart==1 && terminalEnd==0)
	123	return gainProbGiven10(BranchLength); // if g=l, return -NaN
	124	}
	125	if(fromId==1 && toId==0){ // Loss
	126	if(terminalStart==0 && terminalEnd==1)
	127	return lossProbGiven01(BranchLength); // if g=l, return -NaN
	128	if(terminalStart==0 && terminalEnd==0)
	129	return lossProbGiven00(BranchLength);
	130	if(terminalStart==1 && terminalEnd==1)
	131	return lossProbGiven11(BranchLength);
	132	else //(terminalStart==1 && terminalEnd==0)
	133	return lossProbGiven10(BranchLength);
	134	}
	135	else
	136	return 0;
	137	}
	138	else
	139	return 0;
	140
	141	}
	142	//////////////////////////////////////////////////////////////////////////
	143	MDOUBLE computeJumps::gainProbGiven01(MDOUBLE BranchLength){
	144	MDOUBLE probSum = 1.0;
	145	return probSum;
	146	}
	147	MDOUBLE computeJumps::gainProbGiven00(MDOUBLE BranchLength){
	148	MDOUBLE probSum = 0.0;
	149	//A Sum(2,4,6,...) changes
	150	//for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){
	151	// probSum += _gFuncStart0.qFunc_2k(BranchLength,k);
	152	//}
	153	//B 1 - Sum(uneven changes) - zeroEvenChanges
	154	probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) - _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k(BranchLength,0);
	155	return probSum/Pij_t(0,0,BranchLength);
	156	}
	157	MDOUBLE computeJumps::gainProbGiven11(MDOUBLE BranchLength){
	158	MDOUBLE probSum = 0.0;
	159	//A Sum(2,4,6,...) changes
	160	//for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){
	161	// probSum += _gFuncStart1.qFunc_2k(BranchLength,k); //? _gFuncStart1 or _gFuncStart0
	162	//}
	163	//B 1 - Sum(uneven changes) - zeroEvenChanges
	164	probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) - _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k(BranchLength,0);
	165	return probSum/Pij_t(1,1,BranchLength);
	166	}
	167	MDOUBLE computeJumps::gainProbGiven10(MDOUBLE BranchLength){
	168	MDOUBLE probSum = 0.0;
	169	//A Sum(3,5,7,...) changes
	170	//for(int k = 2; k<=_maxNumOfChangesPerBranchSum; ++k){
	171	// probSum += _gFuncStart1.qFunc_2k_1(BranchLength,k);
	172	//}
	173	//B 1 - Sum(even changes) - oneUnEvenChanges
	174	probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) + _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k_1(BranchLength,1);
	175	return probSum/Pij_t(1,0,BranchLength);
	176	}
	177
	178	//////////////////////////////////////////////////////////////////////////
	179	MDOUBLE computeJumps::lossProbGiven01(MDOUBLE BranchLength){
	180	MDOUBLE probSum = 0.0;
	181	//A Sum(3,5,7,...) changes
	182	//for(int k = 2; k<=_maxNumOfChangesPerBranchSum; ++k){
	183	// probSum += _gFuncStart0.qFunc_2k_1(BranchLength,k);
	184	//}
	185	//B 1 - Sum(even changes) - oneUnEvenChanges
	186	probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) + _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k_1(BranchLength,1);
	187	return probSum/Pij_t(0,1,BranchLength);
	188	}
	189	MDOUBLE computeJumps::lossProbGiven00(MDOUBLE BranchLength){
	190	MDOUBLE probSum = 0.0;
	191	//A Sum(2,4,6,...) changes
	192	//for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){
	193	// probSum += _gFuncStart0.qFunc_2k(BranchLength,k);
	194	//}
	195	//B 1 - Sum(uneven changes) - zeroEvenChanges
	196	probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) - _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k(BranchLength,0);
	197	return probSum/Pij_t(0,0,BranchLength);
	198	}
	199
	200	MDOUBLE computeJumps::lossProbGiven11(MDOUBLE BranchLength){
	201	MDOUBLE probSum = 0.0;
	202	//A Sum(2,4,6,...) changes
	203	//for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){
	204	// probSum += _gFuncStart1.qFunc_2k(BranchLength,k); //? _gFuncStart1 or _gFuncStart0
	205	//}
	206	//B 1 - Sum(uneven changes) - zeroEvenChanges
	207	probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) - _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k(BranchLength,0);
	208	return probSum/Pij_t(1,1,BranchLength);
	209	}
	210	MDOUBLE computeJumps::lossProbGiven10(MDOUBLE BranchLength){
	211	MDOUBLE probSum = 1.0;
	212	return probSum;
	213	}
	214
	215
	216	/********************************************************************************************
	217	// mij(t) = E(N, end=j \| start=i)
	218	*********************************************************************************************/
	219	MDOUBLE computeJumps::m01(MDOUBLE BranchLength){
	220	return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) - _gFuncStart0MinusR.gFunc_dr(BranchLength));
	221	}
	222	MDOUBLE computeJumps::m00(MDOUBLE BranchLength){
	223	return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) + _gFuncStart0MinusR.gFunc_dr(BranchLength));
	224	}
	225	MDOUBLE computeJumps::m11(MDOUBLE BranchLength){
	226	return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) + _gFuncStart1MinusR.gFunc_dr(BranchLength));
	227	}
	228	MDOUBLE computeJumps::m10(MDOUBLE BranchLength){
	229	return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) - _gFuncStart1MinusR.gFunc_dr(BranchLength));
	230	}
	231
	232	/********************************************************************************************
	233	gFunc_dr
	234	*********************************************************************************************/
	235	MDOUBLE computeJumps::gFunc_dr(MDOUBLE BranchLength, int startState){
	236	// test:
	237	if(startState == 0){
	238	return _gFuncStart0.g1Func_dr(BranchLength) + _gFuncStart0.g2Func_dr(BranchLength);
	239	}
	240	if(startState == 1)
	241	return _gFuncStart1.g1Func_dr(BranchLength) + _gFuncStart1.g2Func_dr(BranchLength);
	242	else
	243	return 0;
	244	}
	245
	246
	247
	248
	249
	250
	251
	252	/********************************************************************************************
	253	gFunc
	254	*********************************************************************************************/
	255	computeJumps::gFunc::gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r)
	256	: _Lambda1(Lambda1), _Lambda2(Lambda2), _r(r)
	257	{
	258	_delta = sqrt((_Lambda1+_Lambda2)(_Lambda1+_Lambda2) + 4(_r_r - 1)_Lambda1*_Lambda2);
	259	_delta_dr = (4_r_Lambda1*_Lambda2)/_delta;
	260
	261	_Alpha1 = 0.5*(-_Lambda1-_Lambda2 +_delta);
	262	_Alpha2 = 0.5*(-_Lambda1-_Lambda2 -_delta);
	263
	264	_Alpha1_dr = 0.5*_delta_dr;
	265	_Alpha2_dr = -0.5*_delta_dr;
	266
	267	_Alpha1_2 = _delta; //= _Alpha1 - _Alpha2;
	268	_Alpha1_2_dr = _delta_dr; //= _Alpha1_dr - _Alpha2_dr;
	269
	270	_g1Part = ( (_r-1)*_Lambda1 - _Alpha2)/_Alpha1_2;
	271	_g2Part = (-(_r-1)*_Lambda1 + _Alpha1)/_Alpha1_2;
	272
	273	_g1Part_dr = ( _Alpha1_2( _Lambda1-_Alpha2_dr) - ( (_r-1)_Lambda1 - _Alpha2)_Alpha1_2_dr )/(_Alpha1_2_Alpha1_2);
	274	_g2Part_dr = ( _Alpha1_2(-_Lambda1+_Alpha1_dr) - (-(_r-1)_Lambda1 + _Alpha1)_Alpha1_2_dr )/(_Alpha1_2_Alpha1_2);
	275
	276	}
	277	//////////////////////////////////////////////////////////////////////////
	278	MDOUBLE computeJumps::gFunc::gFunc_dr(MDOUBLE BranchLength){
	279	return sign(_r)*(g1Func_dr(BranchLength) + g2Func_dr(BranchLength));
	280	}
	281	MDOUBLE computeJumps::gFunc::g1Func_dr(MDOUBLE BranchLength){
	282	return _g1Part_drg1Exp(BranchLength) + _g1Partg1Exp(BranchLength)BranchLength_Alpha1_dr;
	283	}
	284	MDOUBLE computeJumps::gFunc::g2Func_dr(MDOUBLE BranchLength){
	285	return _g2Part_drg2Exp(BranchLength) + _g2Partg2Exp(BranchLength)BranchLength_Alpha2_dr;
	286	}
	287
	288	//////////////////////////////////////////////////////////////////////////
	289	MDOUBLE computeJumps::gFunc::g1Exp(MDOUBLE BranchLength){
	290	return exp(_Alpha1*BranchLength);
	291	}
	292	MDOUBLE computeJumps::gFunc::g2Exp(MDOUBLE BranchLength){
	293	return exp(_Alpha2*BranchLength);
	294	}
	295
	296	MDOUBLE computeJumps::gFunc::gFunc_(MDOUBLE BranchLength){
	297	return _g1Partg1Exp(BranchLength) + _g2Partg2Exp(BranchLength);
	298	};
	299
	300	MDOUBLE computeJumps::gFunc::_A_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)pow(_Lambda1,k)pow(_Lambda2,(k-1)) / pow((_Lambda2-_Lambda1),(k+i)) ; }
	301	MDOUBLE computeJumps::gFunc::_B_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)pow(_Lambda1,k)pow(_Lambda2,(k-1)) / pow((_Lambda1-_Lambda2),(k+i)) ; }
	302	MDOUBLE computeJumps::gFunc::_C_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)pow(_Lambda1,k)pow(_Lambda2,(k)) / pow((_Lambda2-_Lambda1),(k+i)) ; }
	303	MDOUBLE computeJumps::gFunc::_D_(int k, int i){return BinomialCoeff((k+i),i) * pow(-1.0,i)pow(_Lambda1,k)pow(_Lambda2,(k)) / pow((_Lambda1-_Lambda2),(k+i+1)); }
	304
	305	// prob for (2k-1) transitions (gains and losses), given start=0
	306	MDOUBLE computeJumps::gFunc::qFunc_2k_1 (MDOUBLE BranchLength, int k){
	307	MDOUBLE qSUM = 0.0;
	308	for(int i=1; i<=k; ++i){
	309	qSUM += _A_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1) * exp(-_Lambda1*BranchLength)
	310	+ _B_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1) * exp(-_Lambda2*BranchLength);
	311	}
	312	return qSUM;
	313	}
	314	// prob for (2k) transitions (gains and losses), given start=0
	315	MDOUBLE computeJumps::gFunc::qFunc_2k (MDOUBLE BranchLength, int k){
	316	MDOUBLE qSUM = 0.0;
	317	for(int i=1; i<=(k+1); ++i){
	318	qSUM += _C_(k,(k-i+1))* pow(BranchLength,(i-1))/factorial(i-1)exp(-_Lambda1BranchLength);
	319	}
	320	for(int i=1; i<=k; ++i){
	321	qSUM += _D_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1)exp(-_Lambda2BranchLength);
	322	}
	323	return qSUM;
	324	}
	325
	326
	327
	328
	329
	330
	331	/********************************************************************************************
	332	Pij_t - Based on Analytic solution
	333	*********************************************************************************************/
	334	MDOUBLE computeJumps::Pij_t(const int i,const int j, const MDOUBLE d) {
	335	MDOUBLE gain = _Lambda1;
	336	MDOUBLE loss = _Lambda2;
	337	MDOUBLE eigenvalue = -(gain + loss);
	338
	339
	340	VVdouble Pt;
	341	int AlphaSize = 2;
	342	resizeMatrix(Pt,AlphaSize,AlphaSize);
	343	int caseNum = i + j*2;
	344	switch (caseNum) {
	345	case 0 : Pt[0][0] = loss/(-eigenvalue) + exp(eigenvalued)(1 - loss/(-eigenvalue)); break;
	346	case 1 : Pt[1][0] = loss/(-eigenvalue) - exp(eigenvalued)(1 - gain/(-eigenvalue)); break;
	347	case 2 : Pt[0][1] = gain/(-eigenvalue) - exp(eigenvalued)(1 - loss/(-eigenvalue)); break;
	348	case 3 : Pt[1][1] = gain/(-eigenvalue) + exp(eigenvalued)(1 - gain/(-eigenvalue)); break;
	349	}
	350	MDOUBLE val = (Pt[i][j]);
	351	return val;
	352	}

+127

-0

libs/phylogeny/computeJumps.h less more

	0	#ifndef ___COMPUTE_JUMPS__
	1	#define ___COMPUTE_JUMPS__
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "alphabet.h"
	7	#include "someUtil.h"
	8	#include <math.h>
	9
	10	#include <map>
	11	#include <vector>
	12	using namespace std;
	13
	14	/******************************************************************
	15	This class compute jumps (events) by Suchard equations along differing branch lengths (according to a
	16	given tree), with the aim of giving the expectation of the number of jumps
	17	from state a to state b given that the terminal states at the end of the branch are
	18	x and y.
	19	*******************************************************************/
	20
	21	class computeJumps {
	22	public:
	23	computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2, const MDOUBLE r=1, const int maxNumOfChangesPerBranchSum=5);
	24	virtual ~computeJumps();
	25
	26	/******************************************************************
	27	Foreach computeJumps, for gFunc objects are needed:
	28	inner class gFunc, if startState=0, Lambda1=gain, Lambda2= loss
	29	if startState=1, Lambda1=loss, Lambda2= gain.
	30	For both with use +r and -r versions
	31	*******************************************************************/
	32	class gFunc {
	33	public:
	34	gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r);
	35	gFunc(){};
	36	~gFunc(){};
	37
	38	MDOUBLE gFunc_dr(MDOUBLE BranchLength);
	39	MDOUBLE g1Func_dr(MDOUBLE BranchLength);
	40	MDOUBLE g2Func_dr(MDOUBLE BranchLength);
	41	MDOUBLE g1Exp(MDOUBLE BranchLength);
	42	MDOUBLE g2Exp(MDOUBLE BranchLength);
	43	MDOUBLE gFunc_(MDOUBLE BranchLength);
	44
	45	//////////////////////////////////////////////////////////////////////////
	46	MDOUBLE _A_(int k, int i);
	47	MDOUBLE _B_(int k, int i);
	48	MDOUBLE _C_(int k, int i);
	49	MDOUBLE _D_(int k, int i);
	50	// prob for (2k-1) transitions (gains and losses), given start=0
	51	MDOUBLE qFunc_2k_1 (MDOUBLE BranchLength, int k=1);
	52	// prob for (2k) transitions (gains and losses), given start=0
	53	MDOUBLE qFunc_2k (MDOUBLE BranchLength, int k=0);
	54
	55	private:
	56	MDOUBLE _r;
	57	MDOUBLE _Lambda1;
	58	MDOUBLE _Lambda2;
	59
	60	MDOUBLE _Alpha1;
	61	MDOUBLE _Alpha2;
	62	MDOUBLE _Alpha1_dr;
	63	MDOUBLE _Alpha2_dr;
	64
	65	MDOUBLE _Alpha1_2;
	66	MDOUBLE _Alpha1_2_dr;
	67
	68	MDOUBLE _delta;
	69	MDOUBLE _delta_dr;
	70
	71	MDOUBLE _g1Part;
	72	MDOUBLE _g2Part;
	73	MDOUBLE _g1Part_dr;
	74	MDOUBLE _g2Part_dr;
	75
	76	};
	77	//////////////////////////////////////////////////////////////////////////
	78
	79	MDOUBLE getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId);
	80	MDOUBLE getTotalExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd);
	81
	82	MDOUBLE gainExp(MDOUBLE BranchLength,MDOUBLE prob01,MDOUBLE prob11);
	83
	84	MDOUBLE gainExpGiven01(MDOUBLE BranchLength);
	85	MDOUBLE gainExpGiven00(MDOUBLE BranchLength);
	86	MDOUBLE gainExpGiven11(MDOUBLE BranchLength);
	87	MDOUBLE gainExpGiven10(MDOUBLE BranchLength);
	88
	89	MDOUBLE lossExpGiven01(MDOUBLE BranchLength);
	90	MDOUBLE lossExpGiven00(MDOUBLE BranchLength);
	91	MDOUBLE lossExpGiven11(MDOUBLE BranchLength);
	92	MDOUBLE lossExpGiven10(MDOUBLE BranchLength);
	93
	94	MDOUBLE getProb(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId);
	95	MDOUBLE gainProbGiven01(MDOUBLE BranchLength);
	96	MDOUBLE gainProbGiven00(MDOUBLE BranchLength);
	97	MDOUBLE gainProbGiven11(MDOUBLE BranchLength);
	98	MDOUBLE gainProbGiven10(MDOUBLE BranchLength);
	99
	100	MDOUBLE lossProbGiven01(MDOUBLE BranchLength);
	101	MDOUBLE lossProbGiven00(MDOUBLE BranchLength);
	102	MDOUBLE lossProbGiven11(MDOUBLE BranchLength);
	103	MDOUBLE lossProbGiven10(MDOUBLE BranchLength);
	104
	105
	106	MDOUBLE gFunc_dr(MDOUBLE BranchLength, int startState);
	107
	108	private:
	109	MDOUBLE m01(MDOUBLE BranchLength);
	110	MDOUBLE m00(MDOUBLE BranchLength);
	111	MDOUBLE m11(MDOUBLE BranchLength);
	112	MDOUBLE m10(MDOUBLE BranchLength);
	113
	114	MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d);
	115
	116	MDOUBLE _Lambda1;
	117	MDOUBLE _Lambda2;
	118	int _maxNumOfChangesPerBranchSum;
	119
	120	gFunc _gFuncStart0;
	121	gFunc _gFuncStart0MinusR;
	122	gFunc _gFuncStart1;
	123	gFunc _gFuncStart1MinusR;
	124	};
	125
	126	#endif

+100

-0

libs/phylogeny/computeMarginalAlg.cpp less more

	0	// $Id: computeMarginalAlg.cpp 1735 2007-02-26 13:46:37Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "treeIt.h"
	4	#include "computeMarginalAlg.h"
	5	#include <iostream>
	6	#include <cassert>
	7	using namespace std;
	8
	9
	10	void computeMarginalAlg::fillComputeMarginal(const tree& et,
	11	const sequenceContainer& sc,
	12	const stochasticProcess& sp,
	13	const int pos,
	14	const computePijHom& pi,
	15	suffStatGlobalHomPos& ssc,
	16	const suffStatGlobalHomPos& cup,
	17	const suffStatGlobalHomPos& cdown,
	18	doubleRep & posProb){
	19
	20	// filling the exact probs.
	21	tree::nodeP mynode = NULL;
	22	ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
	23	treeIterTopDownConst tIt(et);
	24	for (mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	25	assert (mynode != NULL);
	26	int letter;
	27	if (mynode->isLeaf()) {
	28	for(letter=0; letter<pi.alphabetSize();letter++) {
	29	doubleRep val=convert(cup.get(mynode->id(),letter))?1.0:0.0;
	30	ssc.set(mynode->id(),letter,val);
	31	}
	32	continue;
	33	}
	34	doubleRep sumProb =0;
	35	for(letter=0; letter<pi.alphabetSize();letter++) {
	36	doubleRep prob=0.0;
	37	if (mynode->father()==NULL) prob=1.0; // special case of the root.
	38	else {
	39	for(int letter_in_f=0; letter_in_f<pi.alphabetSize();letter_in_f++) {
	40	prob +=cdown.get(mynode->id(),letter_in_f)*
	41	pi.getPij(mynode->id(),letter,letter_in_f);
	42	}
	43	}
	44
	45	prob = probsp.freq(letter)
	46	cup.get(mynode->id(),letter);
	47	ssc.set(mynode->id(),letter,prob);
	48	sumProb += prob;
	49	}
	50	for(letter=0; letter<pi.alphabetSize();letter++) {
	51	doubleRep getV = ssc.get(mynode->id(),letter);
	52	ssc.set(mynode->id(),letter,getV/sumProb);
	53	}
	54
	55
	56
	57	// CHECKING:
	58	/* LOG(5,<<" checking marginal of node: "<<mynode->name()<<endl);
	59	MDOUBLE SSum =0;
	60	for (int u=0; u < pi.alphabetSize(); ++u) {
	61	LOG(5,<<ssc.get(mynode->id(),u)<<" ");
	62	SSum +=ssc.get(mynode->id(),u);
	63	}
	64	LOG(5,<<"\nsum of marginals = "<<SSum<<endl);
	65	*/
	66	if (mynode->isRoot()) posProb = convert(sumProb);
	67	}
	68	}
	69
	70
	71
	72
	73	/*
	74	if (val>1) {
	75	LOG(5,<<"x val = " << val<<endl);
	76	LOG(5,<<" my node = " << mynode->name()<<endl);
	77	LOG(5,<<" let = " << let << endl);
	78	LOG(5,<<" up = " << cup.get(mynode->id(),let));
	79	LOG(5,<< "pos prob = " << posProb<<endl);
	80	LOG(5,<<" root of tree = " << et.getRoot()->name()<<endl);
	81	errorMsg::reportError(" error in compute marginal >1 ");
	82	}
	83	if (val>1) {
	84	LOG(5,<<" val = " << val<<endl);
	85	LOG(5,<<" pos = " << pos<<endl);
	86	LOG(5,<<" my node = " << mynode->name()<<endl);
	87	LOG(5,<<" let = " << let << endl);
	88	LOG(5,<<" up = " << cup.get(mynode->id(),let)<<endl);
	89	LOG(5,<<" down[sameLetter] = " << cdown.get(mynode->id(),let)<<endl);
	90	LOG(5,<<" pij[sameLetter] = " << pi.getPij(mynode->id(),let,let)<<endl);
	91	LOG(5,<< "pos prob = " << posProb<<endl);
	92	LOG(5,<<" root of tree = " << et.getRoot()->name()<<endl);
	93	LOG(5,<<"sp.freq(letter) = "<<sp.freq(let)<<endl);
	94	errorMsg::reportError(" error in compute marginal >1 ");
	95	}
	96
	97
	98	*/
	99

+29

-0

libs/phylogeny/computeMarginalAlg.h less more

	0	// $Id: computeMarginalAlg.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___COMPUTE_MARGINAL_ALG
	3	#define ___COMPUTE_MARGINAL_ALG
	4
	5	#include "definitions.h"
	6	#include "suffStatComponent.h"
	7	#include "sequenceContainer.h"
	8	#include "computePijComponent.h"
	9
	10	// This function will give one (for DNA, for example)
	11	// P(A \| DATA), P (C \| DATA), ... etc, for each node.
	12	// This is the case in the homogenous model only.
	13	// for the Gamma case, the marginal in a specific node, is in fact
	14	// p(A \| DATA, r), P( C \| DATA, r), ... etc.
	15
	16	class computeMarginalAlg {
	17	public:
	18	void fillComputeMarginal(const tree& et,
	19	const sequenceContainer& sc,
	20	const stochasticProcess& sp,
	21	const int pos,
	22	const computePijHom& pi,
	23	suffStatGlobalHomPos& ssc,
	24	const suffStatGlobalHomPos& cup,
	25	const suffStatGlobalHomPos& cdown,
	26	doubleRep & posProb);
	27	};
	28	#endif

+110

-0

libs/phylogeny/computePijComponent.cpp less more

	0
	1	// $Id: computePijComponent.cpp 9253 2011-01-31 01:37:21Z rubi $
	2
	3	#include "definitions.h"
	4	#include "treeIt.h"
	5	#include "computePijComponent.h"
	6	#include "logFile.h"
	7
	8	void computePijHomSpec::fillPij(const MDOUBLE dis, const stochasticProcess& sp, int derivationOrder, bool isReversible)
	9	{
	10
	11	if (!(isReversible && sp.isReversible())) // if one is false
	12	isReversible = false;
	13	resize(sp.alphabetSize());
	14	int i,j;
	15	for (i=0; i<sp.alphabetSize(); i++) {
	16	switch (derivationOrder) {
	17	case 0:
	18	_V[i][i] = sp.Pij_t(i,i,dis);
	19	break;
	20	case 1:
	21	_V[i][i] = sp.dPij_dt(i,i,dis);
	22	break;
	23	case 2:
	24	_V[i][i] = sp.d2Pij_dt2(i,i,dis);
	25	break;
	26	default:
	27	errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
	28	}
	29
	30	for (j=i+1; j<sp.alphabetSize(); j++) {
	31	switch (derivationOrder) {
	32	case 0:
	33	_V[i][j] = sp.Pij_t(i,j,dis);
	34	if ((_V[i][j] == 0 )&& (dis !=0)){
	35
	36	_V[i][j] = EPSILON;
	37	}
	38
	39	break;
	40	case 1:
	41	_V[i][j] = sp.dPij_dt(i,j,dis);
	42	break;
	43	case 2:
	44	_V[i][j] = sp.d2Pij_dt2(i,j,dis);
	45	break;
	46	default:
	47	errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
	48	}
	49	if (sp.freq(j) == 0.0) {
	50	if (isReversible) {
	51	errorMsg::reportError("error in function fillPij");
	52	}
	53
	54	}
	55	// else {
	56	if (isReversible){
	57	_V[j][i] = _V[i][j]* sp.freq(i)/sp.freq(j);
	58	}
	59	else {
	60	switch (derivationOrder) {
	61	case 0:
	62	_V[j][i] = sp.Pij_t(j,i,dis);
	63	if ((_V[j][i] == 0 )&& (dis !=0))
	64	_V[j][i] = EPSILON;
	65	break;
	66	case 1:
	67	_V[j][i] = sp.dPij_dt(j,i,dis);
	68	break;
	69	case 2:
	70	_V[j][i] = sp.d2Pij_dt2(j,i,dis);
	71	break;
	72	default:
	73	errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
	74	}
	75	}
	76	// }
	77	}
	78	}
	79	}
	80
	81
	82	void computePijHom::fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder, bool isReversible) {
	83	_V.resize(et.getNodesNum());
	84	treeIterTopDownConst tIt(et);
	85	tree::nodeP myNode = tIt.first();
	86	{// skipping the root, but allocating place for the root pij even if they are not use
	87	// to maintain that all arrays have the same size.
	88	_V[myNode->id()].resize(sp.alphabetSize());
	89	}
	90	LOGDO(50,et.output(myLog::LogFile(),tree::ANCESTOR));
	91	LOGDO(50,et.output(myLog::LogFile(),tree::PHYLIP));
	92	for (; myNode != tIt.end(); myNode = tIt.next()) {
	93	if (!(myNode->isRoot()))
	94	_V[myNode->id()].fillPij(myNode->dis2father()*sp.getGlobalRate(),sp,derivationOrder,isReversible);
	95	// else
	96	// myLog::LogFile()<<"ROOT IS "<<myNode->name()<<endl;
	97	}
	98	}
	99
	100
	101	void computePijGam::fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder, bool isReversible) {
	102	_V.resize(sp.categories());
	103	for (int i=0; i < _V.size(); ++i) {
	104	tree cp = et;
	105	cp.multipleAllBranchesByFactor(sp.rates(i)/sp.getGlobalRate());// the global rate is taken care of in the hom pij.
	106	_V[i].fillPij(cp,sp,derivationOrder,isReversible);
	107	}
	108	}
	109

+58

-0

libs/phylogeny/computePijComponent.h less more

	0	// $Id: computePijComponent.h 9253 2011-01-31 01:37:21Z rubi $
	1
	2	#ifndef ___COMPUTE_PIJ_COMPONENT
	3	#define ___COMPUTE_PIJ_COMPONENT
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "stochasticProcess.h"
	8	#include "multipleStochasticProcess.h"
	9	#include "gammaDistribution.h"
	10
	11
	12	class computePijHomSpec {//specific node, no rate variation
	13	public:
	14	virtual ~computePijHomSpec(){};
	15	void fillPij(const MDOUBLE dis, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
	16	void resize(const int alphabetSize) {
	17	_V.resize(alphabetSize);
	18	for (int z=0;z<alphabetSize;++z) _V[z].resize(alphabetSize);
	19	}
	20
	21	int alphabetSize() const {return _V.size();}
	22	MDOUBLE getPij(const int let1,const int let2)const{
	23	return _V[let1][let2];
	24	}
	25	VVdouble _V; // let, let
	26	};
	27
	28	class computePijHom {//all nodes, no rate variation
	29	public:
	30	virtual ~computePijHom(){};
	31	void fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
	32	int alphabetSize() const {return _V[0].alphabetSize();}
	33	int getNodesNum() const {return _V.size();}
	34	MDOUBLE getPij(const int nodeId,const int let1,const int let2)const{
	35	return _V[nodeId].getPij(let1,let2);
	36	}
	37	vector<computePijHomSpec> _V; // let, let
	38	};
	39
	40	class computePijGam {//
	41	public:
	42	virtual ~computePijGam(){};
	43	void fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
	44	int categories() const {return _V.size();}
	45	int alphabetSize() const {return _V[0].alphabetSize();}
	46	int getNodesNum() const {return _V[0].getNodesNum();}
	47
	48	MDOUBLE getPij(const int rateCategor,const int nodeId,const int let1,const int let2)const{
	49	return _V[rateCategor].getPij(nodeId,let1,let2);
	50	}
	51	computePijHom& operator[] (int i) {return _V[i];}
	52	const computePijHom& operator[] (int i) const {return _V[i];}
	53	vector<computePijHom> _V; // each rate category
	54	};
	55
	56
	57	#endif

+202

-0

libs/phylogeny/computePosteriorExpectationOfSubstitutions.cpp less more

	0	#include "computePosteriorExpectationOfSubstitutions.h"
	1	#include "definitions.h"
	2	#include "computeDownAlg.h"
	3	#include "computeUpAlg.h"
	4	#include "matrixUtils.h"
	5	#include "treeIt.h"
	6	#include "likelihoodComputation.h"
	7
	8	using namespace std;
	9
	10	/********************************************************************************************
	11	computePosteriorExpectationOfSubstitutions
	12	*********************************************************************************************/
	13	computePosteriorExpectationOfSubstitutions::computePosteriorExpectationOfSubstitutions(const tree &tr, const sequenceContainer &sc, const stochasticProcess *sp):
	14	_tr(tr), _sc(sc){
	15	if(!sp){
	16	errorMsg::reportError("error in the constructor computePosteriorExpectationOfSubstitutions sp argument is NULL");
	17	}
	18	else{
	19	_sp = sp;
	20	}
	21	}
	22	/********************************************************************************************
	23	Expectation of number of substitutions from character u to v --- =
	24	sum over all substitutions x,y:
	25	Posterior(Node=x,Father=y\|D)*Exp(substitutions u to v\|Node=x,Father=y)
	26	The second term is given to the function as input (can be obtained via simulations)
	27	*********************************************************************************************/
	28	VVdouble computePosteriorExpectationOfSubstitutions::computeExpectationAcrossTree(
	29	simulateJumpsAbstract &sim, //input given from simulation studies
	30	const VVVdouble &posteriorProbs,
	31	VVVdouble &expForBranch)
	32	{
	33	//int numNodes = _tr.getNodesNum();
	34	int alphabetSize = _sp->alphabetSize();
	35	VVdouble res;
	36	resizeMatrix(res,alphabetSize,alphabetSize);
	37	treeIterTopDownConst tIt(_tr);
	38	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	39	for (int fromState=0;fromState<alphabetSize;++fromState)
	40	{
	41	for (int toState=0;toState<alphabetSize;++toState)
	42	{
	43	if (fromState==toState)
	44	continue;
	45	expForBranch[mynode->id()][fromState][toState] = computeExpectationOfChangePerBranch(sim,posteriorProbs,mynode,fromState,toState);
	46	res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState];
	47
	48	}
	49	}
	50	}
	51	return res;
	52	}
	53	/********************************************************************************************
	54	Posterior probabilities computed across entire tree, for all substitutions from character u to v
	55	*********************************************************************************************/
	56	VVdouble computePosteriorExpectationOfSubstitutions::computePosteriorAcrossTree(
	57	simulateJumpsAbstract &sim, //input given from simulation studies
	58	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch)
	59	{
	60	//int numNodes = _tr.getNodesNum();
	61	int alphabetSize = _sp->alphabetSize();
	62	// N: resized before
	63	//probsForBranch.resize(numNodes);
	64	//for (int n=0;n<numNodes;++n)
	65	// resizeMatrix(probsForBranch[n],alphabetSize,alphabetSize);
	66
	67	VVdouble res;
	68	resizeMatrix(res,alphabetSize,alphabetSize);
	69	treeIterTopDownConst tIt(_tr);
	70	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	71	for (int fromState=0;fromState<alphabetSize;++fromState)
	72	{
	73	for (int toState=0;toState<alphabetSize;++toState)
	74	{
	75	if (fromState==toState)
	76	continue;
	77	probsForBranch[mynode->id()][fromState][toState]= computePosteriorOfChangePerBranch(sim,posteriorProbsGivenTerminals,mynode,fromState,toState);
	78	res[fromState][toState] +=probsForBranch[mynode->id()][fromState][toState];
	79
	80	}
	81	}
	82	}
	83	return res;
	84	}
	85	/********************************************************************************************
	86	*********************************************************************************************/
	87	MDOUBLE computePosteriorExpectationOfSubstitutions::computePosteriorOfChangePerBranch(simulateJumpsAbstract &sim, //input given from simulation studies
	88	const VVVdouble &posteriorProbs,
	89	tree::nodeP node,
	90	int fromState, int toState)
	91	{
	92	int alphabetSize = _sp->alphabetSize();
	93	MDOUBLE res = 0;
	94
	95	for (int x=0;x<alphabetSize;++x)
	96	{
	97	for (int y=0;y<alphabetSize;++y)
	98	{
	99	res+=sim.getProb(node->name(),x,y,fromState,toState)*posteriorProbs[node->id()][x][y];
	100	}
	101	}
	102	return res;
	103	}
	104
	105	/********************************************************************************************
	106	Posterior of observing a certain state substitution along a branch:
	107	P(Node=x,Father=y\|D) = P(D,Node=x,Father=y)/P(D)
	108	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	109	*********************************************************************************************/
	110	void computePosteriorExpectationOfSubstitutions::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){
	111	int numNodes = _tr.getNodesNum();
	112	int alphabetSize = _sp->alphabetSize();
	113	posteriorPerNodePer2States.resize(numNodes);
	114	for (int n=0;n<posteriorPerNodePer2States.size();++n)
	115	resizeMatrix(posteriorPerNodePer2States[n],alphabetSize,alphabetSize);
	116	suffStatGlobalHomPos sscUp;
	117	suffStatGlobalHomPos sscDown; //for a reversible model
	118	sscUp.allocatePlace(numNodes,alphabetSize);
	119	computePijHom pi;
	120	pi.fillPij(_tr,*_sp);
	121
	122	computeUpAlg comp_Up;
	123	computeDownAlg comp_Down;
	124	comp_Up.fillComputeUp(_tr,_sc,pos,pi,sscUp);
	125	comp_Down.fillComputeDown(_tr,_sc,pos,pi,sscDown,sscUp);
	126	treeIterTopDownConst tIt(_tr);
	127	MDOUBLE ll = convert(likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp));
	128	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	129	for (int sonState = 0; sonState<alphabetSize; ++sonState){
	130	for (int fatherState = 0; fatherState<alphabetSize; ++fatherState){
	131	posteriorPerNodePer2States[mynode->id()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDown, pi,ll,mynode->name());
	132	}
	133	}
	134	}
	135	}
	136	/********************************************************************************************
	137	Posterior of observing a certain state substitution along a branch:
	138	P(Node=sonState,Father=fatherState\|D) = P(D,Node=sonState,Father=fatherState)/P(D)
	139	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	140	*********************************************************************************************/
	141	MDOUBLE computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch
	142	(int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	143	suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName)
	144	{
	145	MDOUBLE res, Down, Up, pij;
	146	Down = convert(sscDown.get(nodeId,fatherState));
	147	Up = convert(sscUp.get(nodeId,sonState));
	148	pij = pi.getPij(nodeId,fatherState,sonState);
	149	res=_sp->freq(fatherState)DownUp*pij;
	150	res/=LLData;
	151	// if(gainLossOptions::_printDEBUGinfo)
	152	// LOG(3,<<nodeName<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
	153
	154	if (res > 1 + 1e-4){
	155	LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" res "<<res<<" LLData "<<LLData<<endl);
	156	res = 1;
	157	}
	158	if (res<-1e-4){
	159	LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" res "<<res<<" LLData "<<LLData<<endl);
	160	res = 0;
	161	}
	162	if ((res > 1 + 0.000001) \|\| (res<-0.000001)){
	163	string err = "Error in computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch, non probability value ";
	164	err+=double2string(res);
	165	err+=" at node ";
	166	err+=int2string(nodeId);
	167	err+= " sonState ";
	168	err+= int2string(sonState);
	169	err+= " fatherState ";
	170	err+= int2string(fatherState);
	171	errorMsg::reportError(err);
	172	}
	173	return res;
	174	}
	175	/********************************************************************************************
	176	*********************************************************************************************/
	177	MDOUBLE computePosteriorExpectationOfSubstitutions::computeExpectationOfChangePerBranch(
	178	simulateJumpsAbstract &sim, //input given from simulation studies
	179	const VVVdouble &posteriorProbsGivenTerminals,
	180	tree::nodeP node,int fromState, int toState)
	181	{
	182	int alphabetSize = _sp->alphabetSize();
	183
	184
	185	MDOUBLE nodeExpectation = 0;
	186	for (int x = 0; x<alphabetSize; ++x){
	187	for (int y = 0; y<alphabetSize; ++y){
	188	nodeExpectation+=(posteriorProbsGivenTerminals[node->id()][x][y]*
	189	sim.getExpectation(node->name(),x,y,fromState,toState));
	190	//DEBUG
	191	LOG(6,<<"node "<<node->id()<<endl);
	192	LOG(6,<<"from "<<fromState<<" to "<<toState<<" given "<<x<<" and "<<y
	193	<<" post= "<<posteriorProbsGivenTerminals[node->id()][x][y]<<" sim= "<< sim.getExpectation(node->name(),x,y,fromState,toState)<<endl);
	194	}
	195	}
	196	return nodeExpectation;
	197	}
	198
	199
	200
	201

+60

-0

libs/phylogeny/computePosteriorExpectationOfSubstitutions.h less more

	0
	1	#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS
	2	#define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS
	3
	4
	5	/*
	6	This is a father class where it implements the computePosteriorExpectationOfSubstitutions
	7	procedure for a reversible stochastic process. Its son, computePosteriorExpectationOfSubstitutions_nonReversibleSp
	8	implements the computePosteriorExpectationOfSubstitutions for a non-reversible stochastic process. The implementation
	9	difference is in two functions: computePosteriorOfChangeGivenTerminals and computePosterioGivenTerminalsPerBranch
	10	*/
	11
	12	#include "definitions.h"
	13	#include "simulateJumps.h"
	14	#include "tree.h"
	15	#include "sequenceContainer.h"
	16	#include "stochasticProcess.h"
	17	#include "suffStatComponent.h"
	18	#include "computePijComponent.h"
	19	#include "simulateJumpsAbstract.h"
	20
	21	class computePosteriorExpectationOfSubstitutions {
	22
	23	public:
	24	explicit computePosteriorExpectationOfSubstitutions(const tree &tr, const sequenceContainer &sc, const stochasticProcess *sp);
	25	virtual ~computePosteriorExpectationOfSubstitutions(){};
	26
	27
	28	VVdouble computeExpectationAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies
	29	const VVVdouble &posteriorProbs, VVVdouble &expForBranch);
	30	VVdouble computePosteriorAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies
	31	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch);
	32
	33	virtual void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos);
	34
	35	private:
	36	MDOUBLE computePosteriorOfChangePerBranch(
	37	simulateJumpsAbstract &sim, //input given from simulation studies
	38	const VVVdouble &posteriorProbs,
	39	tree::nodeP node,
	40	int fromState, int toState);
	41
	42	MDOUBLE computeExpectationOfChangePerBranch(
	43	simulateJumpsAbstract &sim, //input given from simulation studies
	44	const VVVdouble &posteriorProbsGivenTerminals,
	45	tree::nodeP node,
	46	int fromState, int toState);
	47
	48	MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	49	suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName);
	50
	51
	52	protected:
	53	const tree &_tr;
	54	const sequenceContainer &_sc;
	55	const stochasticProcess *_sp;
	56	};
	57
	58
	59	#endif

+91

-0

libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp less more

	0	#include "definitions.h"
	1	#include "computeDownAlg.h"
	2	#include "computeUpAlg.h"
	3	#include "matrixUtils.h"
	4	#include "treeIt.h"
	5	#include "likelihoodComputation.h"
	6	#include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h"
	7
	8	using namespace std;
	9
	10
	11
	12	/********************************************************************************************
	13	Posterior of observing a certain state substitution along a branch:
	14	P(Node=x,Father=y\|D) = P(D,Node=x,Father=y)/P(D)
	15	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	16	*********************************************************************************************/
	17	void computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){
	18	int numNodes = _tr.getNodesNum();
	19	int alphabetSize = _sp->alphabetSize();
	20	posteriorPerNodePer2States.resize(numNodes);
	21	for (int n=0;n<posteriorPerNodePer2States.size();++n)
	22	resizeMatrix(posteriorPerNodePer2States[n],alphabetSize,alphabetSize);
	23	suffStatGlobalHomPos sscUp;
	24	suffStatGlobalGamPos sscDownNonRev; // The "Gam" is used for the letter at father - sscGivenRoot
	25	sscUp.allocatePlace(numNodes,alphabetSize);
	26	computePijHom pi;
	27	pi.fillPij(_tr,*_sp);
	28
	29	computeUpAlg comp_Up;
	30	computeDownAlg comp_Down;
	31	comp_Up.fillComputeUp(_tr,_sc,pos,pi,sscUp);
	32	comp_Down.fillComputeDownNonReversible(_tr,_sc,pos,pi,sscDownNonRev,sscUp);
	33	treeIterTopDownConst tIt(_tr);
	34	MDOUBLE ll = convert(likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp));
	35	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	36	for (int sonState = 0; sonState<alphabetSize; ++sonState){
	37	for (int fatherState = 0; fatherState<alphabetSize; ++fatherState){
	38	posteriorPerNodePer2States[mynode->id()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDownNonRev, pi,ll,mynode->name());
	39	}
	40	}
	41	}
	42	}
	43
	44	/********************************************************************************************
	45	Posterior of observing a certain state substitution along a branch:
	46	P(Node=sonState,Father=fatherState\|D) = P(D,Node=sonState,Father=fatherState)/P(D)
	47	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	48	*********************************************************************************************/
	49	MDOUBLE computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch
	50	(int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	51	suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName)
	52	{
	53	MDOUBLE res=0.0;
	54	MDOUBLE resDXY, Down, Up, pij;
	55	for (int stateAtRoot = 0; stateAtRoot<_sp->alphabetSize(); ++stateAtRoot){
	56	Down = convert(sscDown.get(stateAtRoot,nodeId,fatherState));
	57	Up = convert(sscUp.get(nodeId,sonState));
	58	pij = pi.getPij(nodeId,fatherState,sonState);
	59
	60	res+=(_sp->freq(stateAtRoot)*
	61	Down*
	62	Up*
	63	pij);
	64	}
	65	resDXY = res;
	66	res/=LLData;
	67	// if(gainLossOptions::_printDEBUGinfo)
	68	// LOG(3,<<nodeName<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
	69
	70	if (res > 1 + 1e-4){
	71	LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
	72	res = 1;
	73	}
	74	if (res<-1e-4){
	75	LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
	76	res = 0;
	77	}
	78	if ((res > 1 + 0.000001) \|\| (res<-0.000001)){
	79	string err = "Error in computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch, non probability value ";
	80	err+=double2string(res);
	81	err+=" at node ";
	82	err+=int2string(nodeId);
	83	err+= " sonState ";
	84	err+= int2string(sonState);
	85	err+= " fatherState ";
	86	err+= int2string(fatherState);
	87	errorMsg::reportError(err);
	88	}
	89	return res;
	90	}⏎

+22

-0

libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.h less more

	0	#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP
	1	#define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP
	2
	3	#include "computePosteriorExpectationOfSubstitutions.h"
	4
	5	class computePosteriorExpectationOfSubstitutions_nonReversibleSp:public computePosteriorExpectationOfSubstitutions {
	6	public:
	7	explicit computePosteriorExpectationOfSubstitutions_nonReversibleSp(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp):computePosteriorExpectationOfSubstitutions(tr,sc,sp){}
	8	virtual ~computePosteriorExpectationOfSubstitutions_nonReversibleSp(){};
	9
	10	void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos);
	11
	12	private:
	13	MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	14	suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName);
	15
	16	};
	17
	18	#endif
	19
	20
	21

+378

-0

libs/phylogeny/computeSubstitutionCounts.cpp less more

	0	#include "computeSubstitutionCounts.h"
	1	#include "computePosteriorExpectationOfSubstitutions.h"
	2	#include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h"
	3	#include "multipleStochasticProcess.h"
	4	#include "matrixUtils.h"
	5	#include "simulateJumps.h"
	6	#include "simulateCodonsJumps.h"
	7	#include "simulateJumpsAbstract.h"
	8	#include "treeIt.h"
	9	#include "treeUtil.h"
	10
	11	/********************************************************************************************
	12	computeSubstitutionCounts
	13	*********************************************************************************************/
	14	computeSubstitutionCounts::computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum, const MDOUBLE probCutOffSum, bool isSilent):
	15	_tr(tr),_sc(sc),_pMSp(MultSpPtr),_outDir(outDir),_LpostPerSpPerCat(LpostPerSpPerCat), _simulationsIterNum(simulationsIterNum), _probCutOffSum(probCutOffSum),_isSilent(isSilent)
	16	{
	17	if(!_pMSp->getSPVecSize()){
	18	errorMsg::reportError("Trying to call computeSubstitutionCounts with an empty multipleStochasticProcess object at computeSubstitutionCounts::computeSubstitutionCounts");
	19	}
	20	_alphabetSize = _pMSp->getSp(0)->alphabetSize();
	21	}
	22
	23	computeSubstitutionCounts& computeSubstitutionCounts::operator=(const computeSubstitutionCounts &other){
	24	if (this != &other) { // Check for self-assignment
	25	}
	26	return *this;
	27	}
	28
	29
	30	/********************************************************************************************
	31	*********************************************************************************************/
	32	void computeSubstitutionCounts::run()
	33	{
	34	for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	35	for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	36	//if(sonStateIndex == fatherStateIndex) continue;
	37	_expMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0);
	38	_probMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0);
	39	}
	40	}
	41
	42	resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_jointProb_PosNodeXY);
	43	resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_probChanges_PosNodeXY);
	44	resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_expChanges_PosNodeXY);
	45
	46	computePosteriorOfChangeGivenTerminalsPerSpPerCat(); // GLM - multiple SPs
	47	}
	48
	49	/********************************************************************************************
	50	*********************************************************************************************/
	51	void computeSubstitutionCounts::computePosteriorOfChangeGivenTerminalsPerSpPerCat()
	52	{
	53	int numOfSPs = _pMSp->getSPVecSize();
	54
	55	// per Sp
	56	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	57	// Per RateCategory -- All the computations are done while looping over rate categories
	58	stochasticProcess * currentSp = _pMSp->getSp(spIndex);
	59	int numOfRateCategories = currentSp->categories();
	60	for (int rateCategIndex=0 ; rateCategIndex < numOfRateCategories;++rateCategIndex)
	61	{
	62	tree copy_et = _tr;
	63	MDOUBLE rateCategVal = currentSp->rates(rateCategIndex);
	64	MDOUBLE minimumRateCategVal = 0.0000001;
	65	MDOUBLE rate2multiply = max(rateCategVal,minimumRateCategVal);
	66	if(rateCategVal < minimumRateCategVal){
	67	LOGnOUT(4, <<" >>> NOTE: the rate category "<<rateCategVal<<" is too low for computePosteriorExpectationOfChangePerSite"<<endl); }
	68	copy_et.multipleAllBranchesByFactor(rate2multiply);
	69	//if(!_isSilent)
	70	//LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
	71	simulateJumpsAbstract* simPerRateCategory;
	72	if(_alphabetSize == 61)
	73	simPerRateCategory = new simulateCodonsJumps(copy_et,*currentSp,_alphabetSize);
	74	else
	75	simPerRateCategory = new simulateJumps(copy_et,*currentSp,_alphabetSize);
	76
	77	simPerRateCategory->runSimulation(_simulationsIterNum);
	78	if(!_isSilent)
	79	LOGnOUT(4,<<"finished simulations"<<endl);
	80
	81	// Per POS
	82	for (int pos = 0; pos <_sc.seqLen(); ++pos)
	83	{
	84	LOG(6,<<"pos "<<pos+1<<endl);
	85	// I) computePosteriorOfChangeGivenTerminals
	86	VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos;
	87	computePosteriorExpectationOfSubstitutions* cpesPerRateCategoryPerPos ;
	88	if(currentSp->isReversible())
	89	cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions(copy_et,_sc,currentSp); // Per POS,CAT
	90	else
	91	cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions_nonReversibleSp(copy_et,_sc,currentSp); // Per POS,CAT
	92	cpesPerRateCategoryPerPos->computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos);
	93
	94	// II) Exp - take in account both: 1) simulations 2) posteriorsGivenTerminal
	95	VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp
	96	resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,expChangesForBranchPerRateCategoryPerPos);
	97
	98	VVdouble expVV = cpesPerRateCategoryPerPos->computeExpectationAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	99	expChangesForBranchPerRateCategoryPerPos); // Per POS
	100	for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	101	for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	102	if(sonStateIndex == fatherStateIndex) continue;
	103	_expMap_father2son[fatherStateIndex][sonStateIndex][pos] += expVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
	104	}
	105	}
	106
	107	// III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal
	108	VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob
	109	resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,probChangesForBranchPerRateCategoryPerPos);
	110	VVdouble probVV = cpesPerRateCategoryPerPos->computePosteriorAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos);
	111	for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	112	for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	113	if(sonStateIndex == fatherStateIndex) continue;
	114	_probMap_father2son[fatherStateIndex][sonStateIndex][pos] += probVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
	115	}
	116	}
	117	// Store all information PerCat,PerPOS
	118	for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId
	119	for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState
	120	for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState
	121	_jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
	122	_probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
	123	_expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
	124	}
	125	}
	126	}
	127	delete(cpesPerRateCategoryPerPos);
	128	}
	129	delete(simPerRateCategory);
	130	// Per POS
	131	}
	132	// per rateCat
	133	}
	134	// Per Sp
	135	}
	136
	137
	138
	139	/********************************************************************************************
	140	printProbExp()
	141	print perPos (over all branches)
	142	use the members _expV01, _expV10 for basic
	143	*********************************************************************************************/
	144	void computeSubstitutionCounts::printProbExp()
	145	{
	146
	147	string posteriorExpectationOfChangeString = _outDir + "//" + "posteriorExpectationOfChange.txt";
	148	ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str());
	149	string posteriorProbabilityOfChangeString = _outDir + "//" + "posteriorProbabilityOfChange.txt";
	150	ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str());
	151
	152	int fatherStateIndex,sonStateIndex;
	153	posteriorExpectationStream<<"#POS"<<"\t";
	154	posteriorProbabilityStream<<"#POS"<<"\t";
	155
	156	for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	157	for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	158	if(sonStateIndex == fatherStateIndex) continue;
	159	posteriorExpectationStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
	160	posteriorProbabilityStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
	161	}
	162	}
	163	posteriorExpectationStream<<endl;
	164	posteriorProbabilityStream<<endl;
	165
	166	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	167	posteriorExpectationStream<<pos+1<<"\t";
	168	posteriorProbabilityStream<<pos+1<<"\t";
	169	for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	170	for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	171	if(sonStateIndex == fatherStateIndex) continue;//ofir, note the change in print format
	172	posteriorExpectationStream<<_expMap_father2son[fatherStateIndex][sonStateIndex][pos]<<"\t";
	173	posteriorProbabilityStream<<_probMap_father2son[fatherStateIndex][sonStateIndex][pos]<<"\t";
	174	}
	175	}
	176	posteriorExpectationStream<<endl;
	177	posteriorProbabilityStream<<endl;
	178	}
	179	posteriorExpectationStream.close();
	180	posteriorProbabilityStream.close();
	181	}
	182
	183
	184	/********************************************************************************************
	185	printProbabilityPerPosPerBranch 1
	186	produce 2 print files:
	187	1. print detailed file (out)
	188	2. print summary over all branches (outSum)
	189	*********************************************************************************************/
	190	void computeSubstitutionCounts::printProbabilityPerPosPerBranch()
	191	{
	192	string probabilityPerPosPerBranch = _outDir + "//" + "probabilityPerPosPerBranch.txt";
	193	ofstream probabilityPerPosPerBranchStream(probabilityPerPosPerBranch.c_str());
	194	probabilityPerPosPerBranchStream<<"# print values over probCutOff "<<_probCutOffSum<<endl;
	195	probabilityPerPosPerBranchStream<<"#Event"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"probability"<<endl;
	196
	197	string countProbPerPos = _outDir + "//" + "probabilityPerPos.txt";
	198	ofstream countProbPerPosStream(countProbPerPos.c_str());
	199	countProbPerPosStream<<"# print values over probCutOff "<<_probCutOffSum<<endl;
	200	countProbPerPosStream<<"#POS"<<"\t";
	201	for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	202	for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	203	if(sonStateIndex == fatherStateIndex) continue;
	204	countProbPerPosStream<<"prob"<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
	205	}
	206	}
	207	countProbPerPosStream<<endl;
	208
	209	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	210	printProbabilityPerPosPerBranch(pos, _probChanges_PosNodeXY[pos],probabilityPerPosPerBranchStream,countProbPerPosStream);
	211	}
	212	}
	213	/********************************************************************************************
	214	printGainLossProbabilityPerPosPerBranch 1.1
	215	*********************************************************************************************/
	216	void computeSubstitutionCounts::printProbabilityPerPosPerBranch(int pos, VVVdouble& probChanges, ostream& out, ostream& outCount)
	217	{
	218	VVdouble countFromFather2Son;
	219	countFromFather2Son.resize(_alphabetSize);
	220	int fatherStateIndex,sonStateIndex;
	221	treeIterTopDownConst tIt(_tr);
	222	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	223	for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	224	countFromFather2Son[fatherStateIndex].resize(_alphabetSize,0);
	225	for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	226	if(sonStateIndex == fatherStateIndex) continue;
	227	if(probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > _probCutOffSum){//NIM
	228	out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
	229	countFromFather2Son[fatherStateIndex][sonStateIndex] += probChanges[mynode->id()][fatherStateIndex][sonStateIndex];
	230	}
	231	}
	232	}
	233	}
	234	outCount<<pos+1<<"\t";
	235	for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	236	for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	237	if(sonStateIndex == fatherStateIndex) continue;
	238	//if(countFromFather2Son[fatherStateIndex][sonStateIndex] == 0) continue;//NIMROD
	239	outCount<<countFromFather2Son[fatherStateIndex][sonStateIndex]<<"\t";
	240	}
	241	}
	242	outCount<<endl;
	243	}
	244
	245
	246
	247	/********************************************************************************************
	248	*********************************************************************************************/
	249	void computeSubstitutionCounts::printExpectationPerBranch()
	250	{
	251	// ExpectationPerBranch
	252	VVVdouble posteriorsGivenTerminalsTotal;
	253	resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,posteriorsGivenTerminalsTotal);
	254	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	255	for(int i=0;i<_expChanges_PosNodeXY[pos].size();++i){
	256	for(int j=0;j<_expChanges_PosNodeXY[pos][i].size();++j){
	257	for(int k=0;k<_expChanges_PosNodeXY[pos][i][j].size();++k){
	258	posteriorsGivenTerminalsTotal[i][j][k] += _expChanges_PosNodeXY[pos][i][j][k];
	259	}
	260	}
	261	}
	262	}
	263	string expectationPerBranch = _outDir + "//" + "ExpectationPerBranch.txt";
	264	ofstream expectationPerBranchStream(expectationPerBranch.c_str());
	265	printExpectationPerBranch(posteriorsGivenTerminalsTotal,expectationPerBranchStream);
	266	}
	267	/********************************************************************************************
	268	*********************************************************************************************/
	269	void computeSubstitutionCounts::printExpectationPerBranch(VVVdouble& expectChanges, ostream& out)
	270	{
	271	treeIterTopDownConst tIt(_tr);
	272	out<<"#Event"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"expectation"<<endl;
	273	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	274	for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	275	for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	276	if(sonStateIndex == fatherStateIndex) continue;
	277	out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
	278	mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<expectChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
	279	}
	280	}
	281	}
	282	}
	283
	284
	285	/********************************************************************************************
	286	*********************************************************************************************/
	287	void computeSubstitutionCounts::printTreesWithExpectationValuesAsBP(int from,int to)
	288	{
	289	// ExpectationPerPosPerBranch - Print Trees
	290	Vstring Vnames;
	291	fillAllNodesNames(Vnames,_tr);
	292	createDir(_outDir, "TreesWithExpectationValuesAsBP");
	293	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	294	string strTreeNum = _outDir + "//" + "TreesWithExpectationValuesAsBP" + "//" + "expTree" + int2string(pos+1) + ".ph";
	295	ofstream tree_out(strTreeNum.c_str());
	296	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_expChanges_PosNodeXY[pos],from,to);
	297	}
	298	}
	299
	300	/********************************************************************************************
	301	*********************************************************************************************/
	302	void computeSubstitutionCounts::printTreesWithProbabilityValuesAsBP(int from,int to)
	303	{
	304	// ProbabilityPerPosPerBranch - Print Trees
	305	Vstring Vnames;
	306	fillAllNodesNames(Vnames,_tr);
	307	createDir(_outDir, "TreesWithProbabilityValuesAsBP");
	308	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	309	string strTreeNum = _outDir + "//" + "TreesWithProbabilityValuesAsBP"+ "//" + "probTree" + int2string(pos+1) + ".ph";
	310	ofstream tree_out(strTreeNum.c_str());
	311	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_probChanges_PosNodeXY[pos],from,to);
	312	}
	313	}
	314
	315	/********************************************************************************************
	316	printProbExpPerPosPerBranch 1
	317	produce 2 print files:
	318	1. print detailed file (out)
	319	2. print summary over all branches (outSum)
	320	*********************************************************************************************/
	321	void computeSubstitutionCounts::printProbExpPerPosPerBranch(MDOUBLE probCutOff, MDOUBLE countsCutOff)
	322	{
	323	string probExpPerPosPerBranch = _outDir + "//" + "expPerPosPerBranch.txt";
	324	ofstream probExpPerPosPerBranchStream(probExpPerPosPerBranch.c_str());
	325	probExpPerPosPerBranchStream<<"# print values over probCutOff "<<probCutOff<<endl;
	326	probExpPerPosPerBranchStream<<"#Event"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"probability"<<"\t"<<"expectation"<<endl;
	327
	328	string probExpPerPos = _outDir + "//" + "probExpCountPerPos.txt";
	329	ofstream countProbPerPosStream(probExpPerPos.c_str());
	330	countProbPerPosStream<<"# print count over probCutOff "<<countsCutOff<<endl;
	331	countProbPerPosStream<<"#POS"<<"\t"<<"Event"<<"\t"<<"EventProb"<<"\t"<<"EventExp"<<"\t"<<"EventCount"<<endl;
	332
	333	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	334	printProbExpPerPosPerBranch(pos, probCutOff,countsCutOff, _probChanges_PosNodeXY[pos],_expChanges_PosNodeXY[pos],probExpPerPosPerBranchStream,countProbPerPosStream);
	335	}
	336	}
	337	/********************************************************************************************
	338	printGainLossProbExpPerPosPerBranch 1.1
	339	Get pos, and iterate over all branches:
	340	1. print detailed file (out)
	341	2. print summary over all branches (outSum)
	342	*********************************************************************************************/
	343	void computeSubstitutionCounts::printProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum)
	344	{
	345	VVdouble probFather2Son,expFather2Son;
	346	VVint countFather2Son;
	347	probFather2Son.resize(_alphabetSize);
	348	expFather2Son.resize(_alphabetSize);
	349	countFather2Son.resize(_alphabetSize);
	350	int fatherStateIndex,sonStateIndex;
	351
	352	treeIterTopDownConst tIt(_tr);
	353	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	354	for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	355	probFather2Son[fatherStateIndex].resize(_alphabetSize,0);
	356	expFather2Son[fatherStateIndex].resize(_alphabetSize,0);
	357	countFather2Son[fatherStateIndex].resize(_alphabetSize,0);
	358	for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	359	if(sonStateIndex == fatherStateIndex) continue;
	360	out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
	361	pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<"\t"<<expChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
	362	probFather2Son[fatherStateIndex][sonStateIndex] += probChanges[mynode->id()][fatherStateIndex][sonStateIndex];
	363	expFather2Son[fatherStateIndex][sonStateIndex] += expChanges[mynode->id()][fatherStateIndex][sonStateIndex];
	364	if (probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > countCutOff)
	365	countFather2Son[fatherStateIndex][sonStateIndex] += 1;
	366	}
	367	}
	368	}
	369	for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
	370	for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
	371	if(sonStateIndex == fatherStateIndex) continue;
	372	outSum<<pos+1<<"\t"<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
	373	probFather2Son[fatherStateIndex][sonStateIndex]<<"\t"<<expFather2Son[fatherStateIndex][sonStateIndex]<<"\t"<<countFather2Son[fatherStateIndex][sonStateIndex]<<endl;
	374	}
	375	}
	376	}
	377

+71

-0

libs/phylogeny/computeSubstitutionCounts.h less more

	0	#ifndef ___COMPUTE_SUBSTITUTION_COUNTS
	1	#define ___COMPUTE_SUBSTITUTION_COUNTS
	2
	3	#include "definitions.h"
	4	#include "replacementModel.h"
	5	#include "sequenceContainer.h"
	6	#include "tree.h"
	7	#include <map>
	8
	9	class multipleStochasticProcess;
	10	class computeSubstitutionCounts{
	11	public:
	12	explicit computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum=1000, const MDOUBLE probCutOffSum=0.5, bool isSilent=false);//DEBUG: Change simulationsIterNum back to 10000
	13
	14	computeSubstitutionCounts(const computeSubstitutionCounts& other) {*this = other;}
	15	computeSubstitutionCounts& operator=(const computeSubstitutionCounts &other);
	16	virtual ~computeSubstitutionCounts() {}
	17	void run();
	18	void computePosteriorOfChangeGivenTerminalsPerSpPerCat();
	19
	20	void printProbExp();
	21	void printProbabilityPerPosPerBranch();
	22	void printProbExpPerPosPerBranch(MDOUBLE probCutOff =0.5,MDOUBLE countsCutOff= 0.2);
	23	void printExpectationPerBranch();
	24
	25	void printTreesWithExpectationValuesAsBP(int from,int to);
	26	void printTreesWithProbabilityValuesAsBP(int from,int to);
	27
	28	void printProbabilityPerPosPerBranch(int pos, VVVdouble& probChanges, ostream& out, ostream& outCount);
	29	void printExpectationPerBranch(VVVdouble& expectChanges, ostream& out);
	30	void printProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outCount);
	31
	32
	33	map<int,map<int,vector<double> > > get_expMap_father2son() {return _expMap_father2son;};
	34	map<int,map<int,vector<double> > > get_probMap_father2son() {return _probMap_father2son;};
	35
	36	VVVVdouble getExpChanges(){return _expChanges_PosNodeXY;}; // expChanges_PosNodeXY[pos][nodeID][x][y]
	37	VVVVdouble getProbChanges(){return _probChanges_PosNodeXY;}; // probChangesForBranch[pos][nodeID][x][y]
	38	VVVVdouble getJointProb(){return _jointProb_PosNodeXY;}; // _jointProb_PosNodeXY[pos][nodeID][x][y]
	39
	40
	41	protected:
	42	//members
	43	int _alphabetSize;
	44	const tree _tr;
	45	const sequenceContainer _sc;
	46
	47	multipleStochasticProcess* _pMSp;
	48
	49	sequence* _refSeq; // the reference sequence
	50	string _outDir;
	51	bool _isSilent;
	52	int _simulationsIterNum;
	53	MDOUBLE _probCutOffSum;
	54
	55	VVdouble _LpostPerCat; // the posterior probability for each position for each rate category
	56	VVVdouble _LpostPerSpPerCat; // _LpostPerSpPerCat[sp][rateCat][pos]
	57
	58
	59	map<int,map<int,vector<double> > > _expMap_father2son;
	60
	61	map<int,map<int,vector<double> > > _probMap_father2son;
	62
	63	//VVVVdouble _posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y]
	64	VVVVdouble _probChanges_PosNodeXY; // probChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations
	65	VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	66	VVVVdouble _jointProb_PosNodeXY; // probJoint_PosNodeXY[pos][nodeID][fatherState][sonState] - after computePosteriorOfChangeGivenTerminals
	67
	68	};
	69
	70	#endif

+157

-0

libs/phylogeny/computeUpAlg.cpp less more

	0	// $Id: computeUpAlg.cpp 5988 2009-03-18 18:20:05Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "computeUpAlg.h"
	4	#include "treeIt.h"
	5	#include "seqContainerTreeMap.h"
	6	#include "logFile.h"
	7	#include <iostream>
	8	#include <cassert>
	9	using namespace std;
	10
	11	void computeUpAlg::fillComputeUp(const tree& et,
	12	const sequenceContainer & sc,
	13	const computePijGam& pi,
	14	suffStatGlobalGam& ssc) {
	15	computeUpAlg cupAlg;
	16	ssc.allocatePlace(sc.seqLen(),pi.categories(),et.getNodesNum(),pi.alphabetSize());
	17	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	18	for (int categor = 0; categor < pi.categories(); ++categor) {
	19	cupAlg.fillComputeUp(et,sc,pos,pi[categor],ssc[pos][categor]);
	20	}
	21	}
	22	}
	23
	24	void computeUpAlg::fillComputeUp(const tree& et,
	25	const sequenceContainer& sc,
	26	const int pos,
	27	const computePijHom& pi,
	28	suffStatGlobalHomPos& ssc) {
	29
	30	seqContainerTreeMap sctm(sc,et);
	31
	32	ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
	33	treeIterDownTopConst tIt(et);
	34	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	35	int letter;
	36	if (mynode->isLeaf()) {
	37	for(letter=0; letter<pi.alphabetSize();letter++) {
	38	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	39	doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	40	ssc.set(mynode->id(),letter,val);
	41	}
	42	}
	43	else {
	44	for(letter=0; letter<pi.alphabetSize();letter++) {
	45	doubleRep total_prob=1.0;
	46	for(int i=0; i < mynode->getNumberOfSons();++i){
	47	doubleRep prob=0.0;
	48	for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
	49	prob += ssc.get(mynode->getSon(i)->id(), letInSon)*
	50	pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
	51	}
	52	total_prob*=prob;
	53	}
	54	ssc.set(mynode->id(),letter,total_prob);
	55	}
	56	}
	57	}
	58	}
	59	/*
	60	void computeUpAlg::fillComputeUp(const tree& et,
	61	const sequenceContainer& sc,
	62	const int pos,
	63	const stochasticProcess& sp,
	64	suffStatGlobalHomPos& ssc) {
	65
	66	seqContainerTreeMap sctm(sc,et);
	67
	68	ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
	69	treeIterDownTopConst tIt(et);
	70	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	71	int letter;
	72	if (mynode->isLeaf()) {// leaf
	73	for(letter=0; letter<sp.alphabetSize();letter++) {
	74	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	75	MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	76	ssc.set(mynode->id(),letter,val);
	77	}
	78	}
	79	else {
	80	for(letter=0; letter<sp.alphabetSize();letter++) {
	81	MDOUBLE total_prob=1.0;
	82	for(int i=0; i < mynode->getNumberOfSons();++i){
	83	MDOUBLE prob=0.0;
	84	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	85	prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
	86	sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new.
	87	}
	88	assert(prob>=0.0);
	89	total_prob*=prob;
	90	}
	91	ssc.set(mynode->id(),letter,total_prob);
	92	}
	93	}
	94	}
	95	}
	96	*/
	97	void computeUpAlg::fillComputeUpSpecificGlobalRate(const tree& et,
	98	const sequenceContainer& sc,
	99	const int pos,
	100	const stochasticProcess& sp,
	101	suffStatGlobalHomPos& ssc,
	102	const MDOUBLE gRate) {
	103	if (sp.categories() >1) {// because we do not multiply all branch lengths by the rate[categories])
	104	errorMsg::reportError("the function fillComputeUpSpecificGlobalRate should not be used with a gamma model");
	105	}
	106
	107	seqContainerTreeMap sctm(sc,et);
	108
	109	ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
	110	treeIterDownTopConst tIt(et);
	111	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	112	#ifdef VERBOS
	113	LOG(15,<<endl<<endl<<"doing node: "<<mynode->name()<<endl);
	114	#endif
	115	int letter;
	116	if (mynode->isLeaf()) {
	117	for(letter=0; letter<sp.alphabetSize();letter++) {
	118	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	119	doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	120	ssc.set(mynode->id(),letter,val);
	121	}
	122	}
	123	else {
	124	int letterWithTotalProbEqZero =0;
	125	for(letter=0; letter<sp.alphabetSize();letter++) {
	126	doubleRep total_prob=1.0;
	127	for(int i=0; i < mynode->getNumberOfSons();++i){
	128	doubleRep prob=0.0;
	129	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	130	assert(ssc.get(mynode->getSon(i)->id(),letInSon)>=0);
	131	assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0);
	132	prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
	133	sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate);
	134	}
	135	assert(prob>=0.0);
	136	total_prob*=prob;
	137	}
	138	if (total_prob==0.0) ++letterWithTotalProbEqZero;
	139
	140	ssc.set(mynode->id(),letter,total_prob);
	141	} // end of else
	142	if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) {
	143	LOG(5,<<" total prob =0");
	144	for (int z=0; z <mynode->getNumberOfSons(); ++z) {
	145	LOG(5,<<"son "<<z<<" is "<<mynode->getSon(z)->name()<<endl);
	146	LOG(5,<<"dis2father is "<<mynode->getSon(z)->dis2father()<<endl);
	147	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	148	LOG(5,<<"let = "<<letInSon<<endl);
	149	LOG(5,<<"ssc.get(mynode->getSon(z)->id(),letInSon) = "<<convert(ssc.get(mynode->getSon(z)->id(),letInSon))<<endl);
	150	}
	151	}
	152	return;
	153	}
	154	}
	155	}
	156	}

+67

-0

libs/phylogeny/computeUpAlg.h less more

	0	// $Id: computeUpAlg.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___COMPUTE_UP_ALG
	3	#define ___COMPUTE_UP_ALG
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "suffStatComponent.h"
	8	#include "sequenceContainer.h"
	9	#include "computePijComponent.h"
	10
	11
	12	class computeUpAlg {
	13	public:
	14	void fillComputeUp(const tree& et,
	15	const sequenceContainer& sc,
	16	const int pos,
	17	const computePijHom& pi,
	18	suffStatGlobalHomPos& ssc);
	19
	20	void fillComputeUp(const tree& et,
	21	const sequenceContainer & sc,
	22	const computePijGam& pi,
	23	suffStatGlobalGam& ssc);
	24
	25	/*void fillComputeUp(const tree& et, // not to be used at all. problematic in case of a gamma function.
	26	const sequenceContainer& sc,
	27	const int pos,
	28	const stochasticProcess& sp,
	29	suffStatGlobalHomPos& ssc);*/
	30
	31	/*void fillComputeUp(const tree& et, // not to be used, accept for debuging (very slow func.)
	32	const sequenceContainer& sc,
	33	const stochasticProcess& sp,
	34	suffStatGlobalGam& ssc);*/
	35
	36	void fillComputeUpSpecificGlobalRate(const tree& et,
	37	const sequenceContainer& sc,
	38	const int pos,
	39	const stochasticProcess& sp,
	40	suffStatGlobalHomPos& ssc,
	41	const MDOUBLE gRate);
	42
	43	// my attemp to add factors
	44	void fillComputeUpWithFactors(const tree& et,
	45	const sequenceContainer& sc,
	46	const int pos,
	47	const computePijHom& pi,
	48	suffStatGlobalHomPos& ssc,
	49	vector<MDOUBLE>& factors);
	50	void fillComputeUpWithFactors(const tree& et,
	51	const sequenceContainer& sc,
	52	const int pos,
	53	const stochasticProcess& sp,
	54	suffStatGlobalHomPos& ssc,
	55	vector<MDOUBLE>& factors);
	56	void fillComputeUpSpecificGlobalRateFactors(const tree& et,
	57	const sequenceContainer& sc,
	58	const int pos,
	59	const stochasticProcess& sp,
	60	suffStatGlobalHomPos& ssc,
	61	const MDOUBLE gRate,
	62	vector<MDOUBLE>& factors);
	63	};
	64	#endif
	65
	66

+190

-0

libs/phylogeny/computeUpAlgFactors.cpp less more

	0	// $Id: computeUpAlgFactors.cpp 8034 2010-06-03 20:26:39Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "computeUpAlg.h"
	4	#include "seqContainerTreeMap.h"
	5	#include "logFile.h"
	6	#include <iostream>
	7	#include <cassert>
	8	#include <cmath>
	9	#include <cstdlib>
	10	using namespace std;
	11
	12	void computeNodeFactorAndSetSsc(MDOUBLE & minFactor,suffStatGlobalHomPos& ssc, int nodeId, const int alphSize){
	13	// given a number = probability (val), it is changed to a new number which is 10 to the power of factor + val.
	14	// for example if val = 0.001, it is changed to 0.1 and factor 2.
	15	minFactor=100000;
	16	for (int i=0; i < alphSize; ++i) {
	17	MDOUBLE tmpfactor=0;
	18	doubleRep val = ssc.get(nodeId,i);
	19	if (val >0) {
	20	while (val < 0.1) {
	21	val *=10;
	22	tmpfactor++;
	23	}
	24	}
	25	else tmpfactor=minFactor;
	26	if (tmpfactor<minFactor) minFactor=tmpfactor;
	27	}
	28	for (int j=0; j < alphSize; ++j) {
	29	doubleRep tmp = ssc.get(nodeId,j);
	30	tmp = tmp * pow(static_cast<MDOUBLE>(10.0),minFactor);
	31	ssc.set(nodeId,j,tmp);
	32	}
	33	}
	34
	35	void computeUpAlg::fillComputeUpWithFactors(const tree& et,
	36	const sequenceContainer& sc,
	37	const int pos,
	38	const computePijHom& pi,
	39	suffStatGlobalHomPos& ssc,
	40	vector<MDOUBLE>& factors) {
	41	factors.resize(et.getNodesNum(),0.0);
	42	seqContainerTreeMap sctm(sc,et);
	43
	44	ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
	45	treeIterDownTopConst tIt(et);
	46	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	47	int letter;
	48	if (mynode->getNumberOfSons() == 0) {// leaf
	49	for(letter=0; letter<pi.alphabetSize();letter++) {
	50	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	51	doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	52	ssc.set(mynode->id(),letter,val);
	53	}
	54	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize());
	55	}
	56	else {
	57	for(letter=0; letter<pi.alphabetSize();letter++) {
	58	doubleRep total_prob=1.0;
	59	for(int i=0; i < mynode->getNumberOfSons(); ++i){
	60	doubleRep prob=0.0;
	61	for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
	62	prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
	63	pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
	64	}
	65	total_prob*=prob;
	66	}
	67	ssc.set(mynode->id(),letter,total_prob);
	68	}
	69	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize());
	70	for(int k=0; k < mynode->getNumberOfSons();++k) {
	71	factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
	72	}
	73	}
	74	}
	75	}
	76
	77	void computeUpAlg::fillComputeUpWithFactors(const tree& et,
	78	const sequenceContainer& sc,
	79	const int pos,
	80	const stochasticProcess& sp,
	81	suffStatGlobalHomPos& ssc,
	82	vector<MDOUBLE>& factors) {
	83	factors.resize(et.getNodesNum(),0.0);
	84	seqContainerTreeMap sctm(sc,et);
	85
	86	ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
	87	treeIterDownTopConst tIt(et);
	88	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	89	int letter;
	90	if (mynode->getNumberOfSons() == 0) {// leaf
	91	for(letter=0; letter<sp.alphabetSize();letter++) {
	92	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	93	doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	94	ssc.set(mynode->id(),letter,val);
	95	}
	96	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
	97	}
	98	else {
	99	for(letter=0; letter<sp.alphabetSize();letter++) {
	100	doubleRep total_prob=1.0;
	101	for(int i=0; i < mynode->getNumberOfSons();++i){
	102	doubleRep prob=0.0;
	103	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	104	prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
	105	sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new.
	106	}
	107	assert(prob>=0);
	108	total_prob*=prob;
	109	}
	110	ssc.set(mynode->id(),letter,total_prob);
	111	}
	112	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
	113	for(int k=0; k < mynode->getNumberOfSons();++k) {
	114	factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
	115	}
	116	}
	117	}
	118	}
	119
	120	void computeUpAlg::fillComputeUpSpecificGlobalRateFactors(const tree& et,
	121	const sequenceContainer& sc,
	122	const int pos,
	123	const stochasticProcess& sp,
	124	suffStatGlobalHomPos& ssc,
	125	const MDOUBLE gRate,
	126	vector<MDOUBLE>& factors) {
	127	factors.resize(et.getNodesNum(),0.0);
	128	seqContainerTreeMap sctm(sc,et);
	129
	130	ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
	131	treeIterDownTopConst tIt(et);
	132	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	133	#ifdef VERBOS
	134	LOG(5,<<endl<<endl<<"doing node: "<<mynode->name()<<endl);
	135	#endif
	136	int letter;
	137	if (mynode->getNumberOfSons() == 0) {// leaf
	138	for(letter=0; letter<sp.alphabetSize();letter++) {
	139	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	140	doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	141	ssc.set(mynode->id(),letter,val);
	142	}
	143	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
	144	}
	145	else {
	146	int letterWithTotalProbEqZero =0;
	147	for(letter=0; letter<sp.alphabetSize();letter++) {
	148	doubleRep total_prob=1.0;
	149	for(int i=0; i < mynode->getNumberOfSons();++i){
	150	doubleRep prob=0.0;
	151	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	152	assert(ssc.get(mynode->getSon(i)->id(),letInSon)>=0);
	153	assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0);
	154	prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
	155	sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate);
	156	}
	157	assert(prob>=0);
	158	total_prob*=prob;
	159	}
	160	if (total_prob ==0) ++letterWithTotalProbEqZero;
	161
	162	ssc.set(mynode->id(),letter,total_prob);
	163	} // end of else
	164	computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
	165	for(int k=0; k < mynode->getNumberOfSons();++k) {
	166	factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
	167	}
	168	if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) {
	169	LOG(5,<<" total prob =0");
	170	for (int z=0; z <mynode->getNumberOfSons(); ++z) {
	171	LOG(5,<<"son "<<z<<" is "<<mynode->getSon(z)->name()<<endl);
	172	LOG(5,<<"dis2father is "<<mynode->getSon(z)->dis2father()<<endl);
	173	for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
	174	LOG(5,<<"let = "<<letInSon<<endl);
	175	LOG(5,<<"ssc.get(mynode->sons[z]->id(),letInSon) = "<<convert(ssc.get(mynode->getSon(z)->id(),letInSon))<<endl);
	176	// LOG(5,<<"sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()gRate) = "<<sp.Pij_t(letter,letInSon,mynode->sons[i]->dis2father()gRate)<<endl);
	177	// LOG(5,<<"mynode->getSon(i)->dis2father() = "<<mynode->getSon(i)->dis2father()<<endl);
	178
	179
	180
	181
	182
	183	}
	184	}
	185	exit(3);
	186	}
	187	}
	188	}
	189	}

+34

-0

libs/phylogeny/countTableComponent.cpp less more

	0	// $Id: countTableComponent.cpp 9595 2011-06-30 18:56:40Z rubi $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#include "countTableComponent.h"
	6	#include "logFile.h"
	7
	8	void countTableComponentHom::zero() {
	9	for (int alphabetChar1=0; alphabetChar1 < _countValues.size() ;++alphabetChar1) {
	10	for (int alphabetChar2=0; alphabetChar2 < _countValues[alphabetChar1].size() ;++alphabetChar2) {
	11	_countValues[alphabetChar1][alphabetChar2] = 0;
	12	}
	13	}
	14	}
	15
	16	void countTableComponentHom::countTableComponentAllocatePlace(
	17	const int alphabetSize) {
	18	_countValues.resize(alphabetSize);
	19	for (int alphabetChar=0; alphabetChar < alphabetSize;++alphabetChar) _countValues[alphabetChar].resize(alphabetSize);
	20	}
	21
	22	void countTableComponentHom::printTable(ostream& out) const {
	23	MDOUBLE sumCheck = 0.0;
	24	for (int i=0; i < _countValues.size();++i) {
	25	for (int k=0; k < _countValues.size();++k) {
	26	out<<"counts["<<i<<"]["<<k<<"]"<<_countValues[i][k];
	27	sumCheck += _countValues[i][k];
	28	out<<endl;
	29	}
	30	}
	31	out<<"sum is: "<<sumCheck<<endl;
	32	}
	33

+140

-0

libs/phylogeny/countTableComponent.h less more

	0	// $Id: countTableComponent.h 9595 2011-06-30 18:56:40Z rubi $
	1
	2	#ifndef ___COUNT_TABLE_COMPONENT
	3	#define ___COUNT_TABLE_COMPONENT
	4
	5	#include "definitions.h"
	6	#include <iostream>
	7	#include <cassert>
	8
	9	class countTableComponentHom{
	10	public:
	11
	12	void setCount( const int letter1,
	13	const int letter2,
	14	const MDOUBLE val) {
	15	_countValues[letter1][letter2]=val;
	16	}
	17	int alphabetSize() const {return _countValues.size();}
	18	void zero();
	19	MDOUBLE getCounts( const int letter1,
	20	const int letter2) const {
	21	return _countValues[letter1][letter2];
	22	}
	23	void addToCounts(const int let1,const int let2,const MDOUBLE val) {
	24	_countValues[let1][let2]+=val;
	25	}
	26	int getSize() const {return _countValues.size();}
	27	bool isEmpty (){return (_countValues.empty());};
	28	void countTableComponentAllocatePlace(const int alphabetSize);
	29	void printTable(ostream & out) const;
	30	const Vdouble& operator[] (int i) const {return _countValues[i];}
	31	private:
	32	VVdouble _countValues;//letter1,letter2
	33
	34	};
	35
	36	class countTableComponentGam{
	37	public:
	38
	39	void setCount( const int letter1,
	40	const int letter2,
	41	const int rateCategor,
	42	const MDOUBLE val) {
	43	_countValues[rateCategor].setCount(letter1,letter2,val);
	44	}
	45
	46	int alphabetSize() const {return _countValues.empty()?0:_countValues[0].alphabetSize();}
	47	void zero(){
	48	for (int rateCat=0; rateCat < _countValues.size(); ++rateCat) _countValues[rateCat].zero();
	49	}
	50
	51
	52	MDOUBLE getCounts( const int letter1,
	53	const int letter2,
	54	const int rateCategor) const {
	55	assert(_countValues[rateCategor].getCounts(letter1,letter2)>=0);
	56	return _countValues[rateCategor].getCounts(letter1,letter2);
	57	}
	58
	59	void addToCounts(const int let1,const int let2,
	60	const int rate,const MDOUBLE val) {
	61	_countValues[rate].addToCounts(let1,let2,val);
	62	}
	63
	64	bool isEmpty (){return (_countValues.empty());};
	65
	66	void countTableComponentAllocatePlace(const int alphabetSize,
	67	const int numberOfrateCategories) {
	68	_countValues.resize(numberOfrateCategories);
	69	for (int rateCat=0; rateCat < _countValues.size(); ++rateCat){
	70	_countValues[rateCat].countTableComponentAllocatePlace(alphabetSize);
	71	}
	72	}
	73	void printTable(ostream & out) const {
	74	for (int rateCat=0; rateCat < _countValues.size(); ++rateCat) {
	75	_countValues[rateCat].printTable(out);
	76	}
	77	}
	78	int getSize() const {return _countValues.size();}
	79	countTableComponentHom& operator[] (int i) {return _countValues[i];}
	80	const countTableComponentHom& operator[] (int i) const {return _countValues[i];}
	81	private:
	82	vector<countTableComponentHom> _countValues;//letter1,letter2,rateCategor
	83
	84	};
	85
	86	class countTableComponentGamProportional{
	87	public:
	88
	89	void setCount( const int letter1,
	90	const int letter2,
	91	const int globalRateCategor,
	92	const int localRateCategor,
	93	const MDOUBLE val) {
	94	_countValues[globalRateCategor].setCount(letter1,letter2,localRateCategor,val);
	95	}
	96
	97	int alphabetSize() const {return _countValues.empty()?0:_countValues[0].alphabetSize();}
	98	void zero(){
	99	for (int globalRateCat=0; globalRateCat < _countValues.size(); ++globalRateCat) _countValues[globalRateCat].zero();
	100	}
	101
	102
	103	MDOUBLE getCounts( const int letter1,
	104	const int letter2,
	105	const int globalRateCategor,
	106	const int localRateCategor) const {
	107	assert(_countValues[globalRateCategor].getCounts(letter1,letter2,localRateCategor)>=0);
	108	return _countValues[globalRateCategor].getCounts(letter1,letter2,localRateCategor);
	109	}
	110
	111	void addToCounts(const int let1,const int let2,
	112	const int globalRate,const int localRate,const MDOUBLE val) {
	113	_countValues[globalRate].addToCounts(let1,let2,localRate,val);
	114	}
	115
	116	bool isEmpty (){return (_countValues.empty());}
	117
	118	void countTableComponentAllocatePlace(const int alphabetSize,
	119	const int numberOfGlobalRateCategories,const int numberOfLocalRateCategories) {
	120	_countValues.resize(numberOfGlobalRateCategories);
	121	for(int globalRateCat = 0;globalRateCat < _countValues.size(); ++globalRateCat){
	122	_countValues[globalRateCat].countTableComponentAllocatePlace(alphabetSize,numberOfLocalRateCategories);
	123	}
	124	}
	125	void printTable(ostream & out) const {
	126	for (int globalRateCat=0; globalRateCat < _countValues.size(); ++globalRateCat) {
	127	_countValues[globalRateCat].printTable(out);
	128	}
	129	}
	130	int getSize() const {return _countValues.size();}
	131	countTableComponentGam& operator[] (int i) {return _countValues[i];}
	132	const countTableComponentGam& operator[] (int i) const {return _countValues[i];}
	133	private:
	134	vector<countTableComponentGam> _countValues;//letter1,letter2,globalRateCategor,localRateCategor
	135
	136	};
	137
	138	#endif
	139

+22

-0

libs/phylogeny/cpREV45.dat.q less more

	0	" 105 "
	1	" 227 357 "
	2	" 175 43 4435 "
	3	" 669 823 538 10 "
	4	" 157 1745 768 400 10 "
	5	" 499 152 1055 3691 10 3122 "
	6	" 665 243 653 431 303 133 379 "
	7	" 66 715 1405 331 441 1269 162 19 "
	8	" 145 136 168 10 280 92 148 40 29 "
	9	" 197 203 113 10 396 286 82 20 66 1745 "
	10	" 236 4482 2430 412 48 3313 2629 263 305 345 218 "
	11	" 185 125 61 47 159 202 113 21 10 1772 1351 193 "
	12	" 68 53 97 22 726 10 145 25 127 454 1268 72 327 "
	13	" 490 87 173 170 285 323 185 28 152 117 219 302 100 43 "
	14	" 2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202 "
	15	" 1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151 "
	16	" 14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29 "
	17	" 56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346 "
	18	" 968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119 "
	19	" 0.076 0.062 0.041 0.037 0.009 0.038 0.049 0.084 0.025 0.081 "
	20	" 0.101 0.050 0.022 0.051 0.043 0.062 0.054 0.018 0.031 0.066 "
	21	" cpREV45 model "

+32

-0

libs/phylogeny/datMatrixHolder.cpp less more

	0	// $Id: datMatrixHolder.cpp 5804 2009-01-20 09:18:05Z adido $
	1
	2	#include "datMatrixHolder.h"
	3
	4	const datMatrixString datMatrixHolder::cpREV45(
	5	#include "cpREV45.dat.q"
	6	);
	7	const datMatrixString datMatrixHolder::dayhoff(
	8	#include "dayhoff.dat.q"
	9	);
	10	const datMatrixString datMatrixHolder::jones(
	11	#include "jones.dat.q"
	12	);
	13	const datMatrixString datMatrixHolder::mtREV24(
	14	#include "mtREV24.dat.q"
	15	);
	16	const datMatrixString datMatrixHolder::wag(
	17	#include "wag.dat.q"
	18	);
	19	const datMatrixString datMatrixHolder::HIVb(
	20	#include "HIVb.dat.q"
	21	);
	22	const datMatrixString datMatrixHolder::HIVw(
	23	#include "HIVw.dat.q"
	24	);
	25	const datMatrixString datMatrixHolder::empiriCodon(
	26	#include "adrianCodon.dat.q"
	27	);
	28	const datMatrixString datMatrixHolder::lg(
	29	#include "LG.dat.q"
	30	);
	31

+31

-0

libs/phylogeny/datMatrixHolder.h less more

	0	// $Id: datMatrixHolder.h 5804 2009-01-20 09:18:05Z adido $
	1
	2	#ifndef ___DATMATRIXHOLDER
	3	#define ___DATMATRIXHOLDER
	4
	5	#include <string>
	6	using namespace std;
	7
	8	// THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX
	9	// THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES.
	10
	11	class datMatrixString {
	12	public:
	13	const string Val;
	14	explicit datMatrixString(const char * str): Val(str){};
	15	};
	16
	17	class datMatrixHolder {
	18	public:
	19	static const datMatrixString cpREV45;
	20	static const datMatrixString dayhoff;
	21	static const datMatrixString jones; // This is JTT
	22	static const datMatrixString mtREV24;
	23	static const datMatrixString wag;
	24	static const datMatrixString HIVb;
	25	static const datMatrixString HIVw;
	26	static const datMatrixString lg;
	27	static const datMatrixString empiriCodon; //This is the empirical matrix for codon by gina and adrian
	28	};
	29
	30	#endif // ___DATMATRIXHOLDER

+79

-0

libs/phylogeny/dayhoff.dat.q less more

	0	" 27 "
	1	" 98 32 "
	2	" 120 0 905 "
	3	" 36 23 0 0 "
	4	" 89 246 103 134 0 "
	5	" 198 1 148 1153 0 716 "
	6	" 240 9 139 125 11 28 81 "
	7	" 23 240 535 86 28 606 43 10 "
	8	" 65 64 77 24 44 18 61 0 7 "
	9	" 41 15 34 0 0 73 11 7 44 257 "
	10	" 26 464 318 71 0 153 83 27 26 46 18 "
	11	" 72 90 1 0 0 114 30 17 0 336 527 243 "
	12	" 18 14 14 0 0 0 0 15 48 196 157 0 92 "
	13	" 250 103 42 13 19 153 51 34 94 12 32 33 17 11 "
	14	" 409 154 495 95 161 56 79 234 35 24 17 96 62 46 245 "
	15	" 371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550 "
	16	" 0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0 "
	17	" 24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61 "
	18	" 208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28 "
	19	" 0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530 "
	20	" 0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772 "
	21	" 0.050680 0.069577 0.058542 0.010494 0.029916 0.064718 "
	22	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	23	" S_ij = S_ji and PI_i for the Dayhoff model, with the rate Q_ij=S_ij*PI_j "
	24	" The rest of the file is not used. "
	25	" Prepared by Z. Yang, March 1995. "
	26	" See the following reference for notation used here: "
	27	" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
	28	" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "
	29	" ----------------------------------------------------------------------- "
	30	" "
	31	" 30 "
	32	" 109 17 "
	33	" 154 0 532 "
	34	" 33 10 0 0 "
	35	" 93 120 50 76 0 "
	36	" 266 0 94 831 0 422 "
	37	" 579 10 156 162 10 30 112 "
	38	" 21 103 226 43 10 243 23 10 "
	39	" 66 30 36 13 17 8 35 0 3 "
	40	" 95 17 37 0 0 75 15 17 40 253 "
	41	" 57 477 322 85 0 147 104 60 23 43 39 "
	42	" 29 17 0 0 0 20 7 7 0 57 207 90 "
	43	" 20 7 7 0 0 0 0 17 20 90 167 0 17 "
	44	" 345 67 27 10 10 93 40 49 50 7 43 43 4 7 "
	45	" 772 137 432 98 117 47 86 450 26 20 32 168 20 40 269 "
	46	" 590 20 169 57 10 37 31 50 14 129 52 200 28 10 73 696 "
	47	" 0 27 3 0 0 0 0 0 3 0 13 0 0 10 0 17 0 "
	48	" 20 3 36 0 30 0 10 0 40 13 23 10 0 260 0 22 23 6 "
	49	" 365 20 13 17 33 27 37 97 30 661 303 17 77 10 50 43 186 0 17 "
	50	" A R N D C Q E G H I L K M F P S T W Y V "
	51	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	52	" Accepted point mutations (x10) Figure 80 (Dayhoff 1978) "
	53	" ------------------------------------------------------- "
	54	" A 100 /* Ala / A 0.087 / Ala */ "
	55	" R 65 /* Arg / R 0.041 / Arg */ "
	56	" N 134 /* Asn / N 0.040 / Asn */ "
	57	" D 106 /* Asp / D 0.047 / Asp */ "
	58	" C 20 /* Cys / C 0.033 / Cys */ "
	59	" Q 93 /* Gln / Q 0.038 / Gln */ "
	60	" E 102 /* Glu / E 0.050 / Glu */ "
	61	" G 49 /* Gly / G 0.089 / Gly */ "
	62	" H 66 /* His / H 0.034 / His */ "
	63	" I 96 /* Ile / I 0.037 / Ile */ "
	64	" L 40 /* Leu / L 0.085 / Leu */ "
	65	" K 56 /* Lys / K 0.081 / Lys */ "
	66	" M 94 /* Met / M 0.015 / Met */ "
	67	" F 41 /* Phe / F 0.040 / Phe */ "
	68	" P 56 /* Pro / P 0.051 / Pro */ "
	69	" S 120 /* Ser / S 0.070 / Ser */ "
	70	" T 97 /* Thr / T 0.058 / Thr */ "
	71	" W 18 /* Trp / W 0.010 / Trp */ "
	72	" Y 41 /* Tyr / Y 0.030 / Tyr */ "
	73	" V 74 /* Val / V 0.065 / Val */ "
	74	" scale factor = SUM_OF_PRODUCT = 75.246 "
	75	" Relative Mutability The equilibrium freqs. "
	76	" (Table 21) Table 22 "
	77	" (Dayhoff 1978) Dayhoff (1978) "
	78	" ---------------------------------------------------------------- "

+93

-0

libs/phylogeny/definitions.h less more

	0	// $Id: definitions.h 10679 2012-05-29 19:04:27Z cohenofi $
	1
	2	#ifndef ___DEFINITIONS_H
	3	#define ___DEFINITIONS_H
	4
	5	#ifdef _MSC_VER
	6	#define LIMITS_WORKING
	7	#endif
	8
	9	#ifdef _MSC_VER
	10	#pragma warning (disable: 4786)
	11	#pragma warning (disable: 4267)
	12	#pragma warning (disable: 4018)
	13	#pragma warning (disable: 4305) //truncation from 'double' to 'float'
	14	#endif
	15
	16
	17	#include <vector>
	18	#include <string>
	19
	20	#ifdef LIMITS_WORKING
	21	#include <limits>
	22	#endif
	23	using namespace std;
	24
	25	#define MDOUBLE double
	26	//#define MDOUBLE float
	27
	28	// Contants
	29	#define PI (3.1415926535897932384626433832795028841971693993751058)
	30
	31	typedef vector<MDOUBLE> Vdouble;
	32	typedef vector<int> Vint;
	33	typedef vector<Vint> VVint;
	34	typedef vector<VVint> VVVint;
	35	typedef vector<char> Vchar;
	36	typedef vector<Vdouble> VVdouble;
	37	typedef vector<VVdouble> VVVdouble;
	38	typedef vector<VVVdouble> VVVVdouble;
	39	typedef vector<VVVVdouble> VVVVVdouble;
	40	typedef vector<string> Vstring;
	41
	42	#ifdef LIMITS_WORKING
	43	const MDOUBLE VERYBIG = numeric_limits<MDOUBLE>::max();
	44	const MDOUBLE VERYSMALL = -VERYBIG;
	45	const MDOUBLE EPSILON = numeric_limits<MDOUBLE>::epsilon();
	46	#else
	47	// IF <limits> is not recognized, and MDOUBLE is double.
	48	const MDOUBLE VERYBIG = 1.79769e+308;
	49	const MDOUBLE VERYSMALL = -VERYBIG;
	50	const MDOUBLE EPSILON = 2.22045e-016;
	51	#endif
	52
	53	//The maximum value for type float is: 3.40282e+038
	54	//The maximum value for type double is: 1.79769e+308
	55	//::epsilon() returns the difference between 1 and the smallest value greater than 1 that is representable for the data type.
	56	//epsilon float 1.19209e-007
	57	//epsilon double 2.22045e-016
	58
	59	#ifdef LOGREP
	60	class logRep;
	61	typedef vector<logRep> VlogRep;
	62	typedef vector <vector<logRep> > VVlogRep;
	63	typedef vector< vector <vector<logRep> > > VVVlogRep;
	64	typedef logRep doubleRep;
	65	typedef VlogRep VdoubleRep;
	66	typedef VVlogRep VVdoubleRep;
	67	typedef VVVlogRep VVVdoubleRep;
	68	#include "logRep.h"
	69	#elif defined (DOUBLEREP)
	70	class doubleRepMantisa;
	71	typedef vector<doubleRepMantisa> VdoubleRepMantisa;
	72	typedef vector <vector<doubleRepMantisa> > VVdoubleRepMantisa;
	73	typedef vector <VVdoubleRepMantisa > VVVdoubleRepMantisa;
	74	typedef vector <VVVdoubleRepMantisa > VVVVdoubleRepMantisa;
	75	typedef doubleRepMantisa doubleRep;
	76	typedef VdoubleRepMantisa VdoubleRep;
	77	typedef VVdoubleRepMantisa VVdoubleRep;
	78	typedef VVVdoubleRepMantisa VVVdoubleRep;
	79	typedef VVVVdoubleRepMantisa VVVVdoubleRep;
	80	#include "doubleRep.h"
	81	#else
	82	typedef MDOUBLE doubleRep;
	83	typedef Vdouble VdoubleRep;
	84	typedef VVdouble VVdoubleRep;
	85	typedef VVVdouble VVVdoubleRep;
	86	typedef VVVVdouble VVVVdoubleRep;
	87	inline MDOUBLE convert (MDOUBLE d) {return (d);}
	88	#endif
	89
	90	#endif
	91
	92

+554

-0

libs/phylogeny/distanceBasedSeqs2Tree.cpp less more

	0	// $Id: distanceBasedSeqs2Tree.cpp 6002 2009-03-20 19:39:03Z privmane $
	1
	2	#include "distanceBasedSeqs2Tree.h"
	3	#include "uniDistribution.h"
	4	#include "distanceTable.h"
	5	#include "bestAlpha.h"
	6	#include "siteSpecificRate.h"
	7	#include "someUtil.h"
	8	#include "bblEM.h"
	9	#include "tamura92.h"
	10	#include "bestTamura92param.h"
	11	#include "bestGtrModelParams.h"
	12	#include <float.h>
	13	#include "replacementModelSSRV.h"
	14	#include "trivialAccelerator.h"
	15
	16	// **********************************************************************
	17	// * The basic non-iterative versions *******************************
	18	// **********************************************************************
	19
	20	tree distanceBasedSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	21	_constraintTreePtr=constraintTreePtr;
	22	_weights = weights;
	23
	24	// Calculate distance table
	25	tree et;
	26	VVdouble distTable;
	27	vector<string> vNames;
	28	giveDistanceTable(_distM,sc,distTable,vNames,_weights);
	29
	30	// Build tree from the distance table
	31	et = _dist2et->computeTree(distTable, vNames, _constraintTreePtr);
	32
	33	LOG(6,<<"# distanceBasedSeqs2Tree::seqs2Tree: The reconsructed tree:"<<endl);
	34	LOGDO(6,et.output(myLog::LogFile()));
	35
	36	return et;
	37	}
	38
	39	tree distanceBasedSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	40	return seqs2Tree(sc, weights, constraintTreePtr);
	41	}
	42
	43	// **********************************************************************
	44	// * iterativeDistanceSeqs2Tree *************************************
	45	// **********************************************************************
	46
	47	iterativeDistanceSeqs2Tree::iterativeDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights,
	48	const MDOUBLE epsilonLikelihoodImprovement,
	49	const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz,
	50	const MDOUBLE epsilonLikelihoodImprovement4BBL,
	51	const int maxIterationsBBL)
	52	: distanceBasedSeqs2Tree(distM, dist2et, weights),
	53	_epsilonLikelihoodImprovement ( epsilonLikelihoodImprovement ),
	54	_epsilonLikelihoodImprovement4alphaOptimiz( epsilonLikelihoodImprovement4alphaOptimiz),
	55	_epsilonLikelihoodImprovement4BBL ( epsilonLikelihoodImprovement4BBL ),
	56	_maxIterationsBBL ( maxIterationsBBL )
	57	{
	58	// Check that the stochasticProcess in likeDist is not const
	59	if (distM.isTheInternalStochasticProcessConst()) {
	60	errorMsg::reportError("iterativeDistanceSeqs2Tree::iterativeDistanceSeqs2Tree: The stochasticProcess in the given likeDist object is const. A non-const stochasticProcess is required.");
	61	}
	62
	63	// Keep a pointer to the stochasticProcess in distM, so that we will be able to change its alpha, etc.
	64	_spPtr = &(distM.getNonConstStochasticProcess());
	65	if (_spPtr->categories() >1)
	66	_alpha = (static_cast<gammaDistribution*>(_spPtr->distr()))->getAlpha();
	67	else
	68	_alpha=-99.9; // this should never be used
	69
	70	}
	71
	72	// * Iterative tree building ****************************************
	73	tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven) {
	74	LOGDO(3,printTime(myLog::LogFile()));
	75	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal:"<<endl<<"# Initial tree:"<<endl);
	76	seqs2TreeOneIterationInternal(sc, initSideInfoGiven);
	77
	78	return seqs2TreeIterativeInternalInitTreeGiven(sc, true, _newTree, _newAlpha);
	79	}
	80
	81	// * Iterative tree building, given an initial tree and alpha *******
	82	// *** Optimize branch lengths and sideInfo for the given tree topology
	83	tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, const tree &initTree) {
	84	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven: Started optimizeSideInfo. ");
	85	LOGDO(7,printTime(myLog::LogFile()));
	86	_newTree=initTree;
	87	_newTreeLogLikelihood=optimizeSideInfo(sc, _newTree);
	88	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven: Finished optimizeSideInfo. ");
	89	LOGDO(7,printTime(myLog::LogFile()));
	90
	91	return seqs2TreeIterativeInternalInitTreeGiven(sc, true, _newTree, _newAlpha);
	92	}
	93
	94	// * Iterative tree building, given an initial tree and alpha *******
	95	// *** If sideInfo is not given - calculate it for the fixed tree and alpha
	96	tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, bool initSideInfoGiven, const tree &initTree, MDOUBLE initAlpha) {
	97	_newTree=initTree;
	98	_newAlpha=initAlpha;
	99
	100	LOGDO(3,printTime(myLog::LogFile()));
	101	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven"<<endl);
	102	if (!initSideInfoGiven) {
	103	_newTreeLogLikelihood=calcSideInfoGivenTreeAndAlpha(sc, initTree, initAlpha);
	104	}
	105	int iterationNum = 0;
	106	LOGDO(3,printTime(myLog::LogFile()));
	107	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"<<endl<<"# The given initial tree:"<<endl);
	108	LOGDO(3,_newTree.output(myLog::LogFile()));
	109
	110	do {
	111	++iterationNum;
	112	LOGDO(5,printTime(myLog::LogFile()));
	113	LOG(3,<<"# Iteration "<<iterationNum<<":"<<endl);
	114
	115	// save the best tree so far, and its likelihood and the sideInfo that was calculated for it
	116	_et=_newTree;
	117	_treeLogLikelihood=_newTreeLogLikelihood;
	118	acceptSideInfo();
	119	LOG(7,<<"# Side info for the tree"<<endl);
	120	LOGDO(7,printSideInfo(myLog::LogFile()));
	121
	122	seqs2TreeOneIterationInternal(sc, true);
	123
	124	} while (_newTreeLogLikelihood > _treeLogLikelihood + _epsilonLikelihoodImprovement);
	125
	126	LOGDO(3,printTime(myLog::LogFile()));
	127	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"<<endl<<"# Finished iterative distance-based tree reconstruction, done "<<iterationNum<<" iterations"<<endl);
	128	return _et;
	129	}
	130
	131	// * Tree building procedure that is called iteratively ********************
	132	void iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal(const sequenceContainer &sc, const bool sideInfoSet) {
	133
	134	// 1. Calculate distance table
	135	VVdouble distTable;
	136	vector<string> vNames;
	137	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Started giveDistanceTable. ");
	138	LOGDO(7,printTime(myLog::LogFile()));
	139	if (!sideInfoSet) { // Then use homogeneous rates
	140
	141	// Create homogeneous likeDist
	142	_alpha = 1.5; // Since no ASRV side info is known yet, we set an initial alpha for bestAlphaAndBBL optimizations
	143	uniDistribution distribution;
	144	stochasticProcess* uniDistSp = NULL;
	145	replacementModelSSRV* rmSSRV =
	146	dynamic_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel());
	147	if (!rmSSRV) {
	148	uniDistSp = new stochasticProcess(&distribution, _spPtr->getPijAccelerator());
	149	} else {
	150	trivialAccelerator pijAcc(rmSSRV->getBaseRM());
	151	uniDistSp = new stochasticProcess(&distribution, &pijAcc);
	152	}
	153	likeDist homogeneousDist(uniDistSp,static_cast<likeDist>(_distM)->getToll());
	154
	155	giveDistanceTable(&homogeneousDist,sc,distTable,vNames,_weights);
	156	delete uniDistSp;
	157
	158	} else { // use the side information
	159	utilizeSideInfo();
	160	giveDistanceTable(_distM,sc,distTable,vNames,_weights);
	161	}
	162	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished giveDistanceTable, started distances2Tree::computeTree. ");
	163	LOGDO(7,printTime(myLog::LogFile()));
	164
	165	// 2. Build tree from the distance table
	166	_newTree = _dist2et->computeTree(distTable, vNames, _constraintTreePtr);
	167	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::computeTree, started optimizeSideInfo. ");
	168	LOGDO(7,printTime(myLog::LogFile()));
	169
	170	// 3. Optimize branch lengths and side info for the tree topology
	171	_newTreeLogLikelihood=optimizeSideInfo(sc, _newTree);
	172	LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::optimizeSideInfo. ");
	173	LOGDO(7,printTime(myLog::LogFile()));
	174
	175	if (!sideInfoSet) {
	176	LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"<<endl<<"# Homogeneous rates tree"<<endl);
	177	} else {
	178	LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"<<endl<<"# Tree based on alpha"<<endl);
	179	}
	180	LOGDO(5,_newTree.output(myLog::LogFile()));
	181	LOG(5,<<"# Log likelihood:"<<endl<<_newTreeLogLikelihood<<endl);
	182	}
	183
	184	// Perform one bootstrap iteration, assuming that side info has been set (as if acceptSideInfo has been called)
	185	tree iterativeDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	186	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrap: Started a single bootstrap iteration. ");
	187	LOGDO(3,printTime(myLog::LogFile()));
	188	_constraintTreePtr=constraintTreePtr;
	189	_weights = weights;
	190
	191	// Calculate distance table
	192	tree localScopeEt;
	193	VVdouble distTable;
	194	vector<string> vNames;
	195	utilizeSideInfo();
	196	giveDistanceTable(_distM,sc,distTable,vNames,_weights);
	197
	198	// Build tree from the distance table
	199	localScopeEt = _dist2et->computeTree(distTable,vNames, _constraintTreePtr);
	200
	201	LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrapInternal:"<<endl<<"# Bootstrap tree based on alpha, without optimizations"<<endl);
	202	LOGDO(3,localScopeEt.output(myLog::LogFile()));
	203
	204	return localScopeEt;
	205	}
	206
	207	/********************************
	208	* commonAlphaDistanceSeqs2Tree *
	209	********************************/
	210	tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const Vdouble weights, const tree constraintTreePtr) {
	211	_constraintTreePtr=constraintTreePtr;
	212	_alpha = initAlpha;
	213	_weights = weights;
	214	return seqs2TreeIterativeInternal(sc, true);
	215	}
	216
	217	tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	218	_constraintTreePtr=constraintTreePtr;
	219	_weights = weights;
	220	return seqs2TreeIterativeInternal(sc, false);
	221	}
	222
	223	tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights, const tree constraintTreePtr) {
	224	_constraintTreePtr=constraintTreePtr;
	225	_weights = weights;
	226	return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
	227	}
	228
	229	tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights, const tree constraintTreePtr) {
	230	_alpha = initAlpha;
	231	_weights = weights;
	232
	233	_constraintTreePtr=constraintTreePtr;
	234	return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
	235	}
	236
	237	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	238	tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, const Vdouble weights, const tree constraintTreePtr) {
	239	_weights = weights;
	240	_alpha = alpha;
	241	_constraintTreePtr=constraintTreePtr;
	242	seqs2TreeOneIterationInternal(sc, true);
	243	return _newTree;
	244	}
	245
	246	tree commonAlphaDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, const Vdouble weights, const tree constraintTreePtr) {
	247	_weights = weights;
	248	_alpha = alpha;
	249	return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
	250	}
	251
	252	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	253	tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	254	return seqs2TreeIterative(sc,weights,constraintTreePtr);
	255	}
	256
	257	MDOUBLE commonAlphaDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
	258	{
	259	if (dynamic_cast<tamura92*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
	260	// Optimizing params of the tamura92 model
	261	bestTamura92ParamAlphaAndBBL optimizer(et, sc, _spPtr, _weights, 5, _epsilonLikelihoodImprovement/0.05*/,
	262	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	263	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	264	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	265	_epsilonLikelihoodImprovement4BBL/0.01/,
	266	5.0, _maxIterationsBBL, _alpha, 5.0 );
	267	_newAlpha=optimizer.getBestAlpha();
	268	return(optimizer.getBestL());
	269
	270	} else if (dynamic_cast<gtrModel*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
	271	// Optimizing params of the gtr model
	272	bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5,
	273	_epsilonLikelihoodImprovement,
	274	_epsilonLikelihoodImprovement4alphaOptimiz,
	275	true, true);
	276	_newAlpha=optimizer.getBestAlpha();
	277	return(optimizer.getBestL());
	278
	279	} else {
	280	bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0,
	281	_epsilonLikelihoodImprovement4BBL/0.01/, _epsilonLikelihoodImprovement4alphaOptimiz,
	282	_maxIterationsBBL);
	283	_newAlpha=optimizer.getBestAlpha();
	284	return(optimizer.getBestL());
	285	}
	286	}
	287
	288	MDOUBLE commonAlphaDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
	289	{
	290	_newAlpha = alpha;
	291	(static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
	292	return likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et, sc, *_spPtr, _weights);
	293	}
	294
	295	void commonAlphaDistanceSeqs2Tree::acceptSideInfo()
	296	{
	297	_alpha = _newAlpha;
	298	}
	299
	300	void commonAlphaDistanceSeqs2Tree::utilizeSideInfo()
	301	{
	302	// set new alpha value in the sp that is used in _distM
	303	(static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
	304	LOG(10,<<"# utilizing alpha"<<endl<<_alpha<<endl<<endl);
	305
	306	}
	307
	308	void commonAlphaDistanceSeqs2Tree::printSideInfo(ostream& out) const
	309	{
	310	out<<"Alpha: "<<_alpha<<endl;
	311	}
	312
	313	// non virtual
	314	void commonAlphaDistanceSeqs2Tree::setSideInfo(const MDOUBLE alpha)
	315	{
	316	_alpha=alpha;
	317	}
	318
	319	MDOUBLE commonAlphaDistanceSeqs2Tree::getSideInfo() const
	320	{
	321	return _alpha;
	322	}
	323
	324	/******************************
	325	* rate4siteDistanceSeqs2Tree *
	326	******************************/
	327	tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble &initRates, const Vdouble weights, const tree constraintTreePtr) {
	328	_rates = initRates;
	329	_constraintTreePtr=constraintTreePtr;
	330	_weights = weights;
	331	return seqs2TreeIterativeInternal(sc, true);
	332	}
	333
	334	tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	335	_constraintTreePtr=constraintTreePtr;
	336	_weights = weights;
	337	return seqs2TreeIterativeInternal(sc, false);
	338	}
	339
	340	tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights, const tree constraintTreePtr) {
	341	_constraintTreePtr=constraintTreePtr;
	342	_weights = weights;
	343	return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
	344	}
	345
	346	tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights, const tree constraintTreePtr) {
	347	_constraintTreePtr=constraintTreePtr;
	348	_weights = weights;
	349	return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
	350	}
	351
	352	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	353	tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble &rates, const Vdouble weights, const tree constraintTreePtr) {
	354	_weights = weights;
	355	_rates = rates;
	356	_constraintTreePtr=constraintTreePtr;
	357
	358	seqs2TreeOneIterationInternal(sc, true);
	359	return _newTree;
	360	}
	361
	362	tree rate4siteDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble &rates, const Vdouble weights, const tree constraintTreePtr) {
	363	_weights = weights;
	364	_rates = rates;
	365	return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
	366	}
	367
	368	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	369	tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	370	return seqs2TreeIterative(sc,weights,constraintTreePtr);
	371	}
	372
	373	MDOUBLE rate4siteDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
	374	{
	375	bblEM optimizer(et, sc, *_spPtr, _weights, _maxIterationsBBL, _epsilonLikelihoodImprovement4BBL);
	376
	377	// Note: this verstion of ML rates computation can only use a uniDistribution stochasticProcess
	378	Vdouble likelihoods;
	379	MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement);
	380	//computeEB_EXP_siteSpecificRate
	381	return(treeLogLikelihood);
	382	}
	383
	384	MDOUBLE rate4siteDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
	385	{
	386	_newAlpha = alpha;
	387	Vdouble likelihoods;
	388	MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement);
	389	//computeEB_EXP_siteSpecificRate
	390	return(treeLogLikelihood);
	391	}
	392
	393	void rate4siteDistanceSeqs2Tree::acceptSideInfo()
	394	{
	395	_alpha = _newAlpha;
	396	_rates = _newRates;
	397	}
	398
	399	void rate4siteDistanceSeqs2Tree::utilizeSideInfo()
	400	{
	401	(static_cast<givenRatesMLDistance*>(_distM))->setRates(_rates);
	402	LOG(10,<<"# utilizing rates"<<endl<<_rates<<endl<<endl);
	403
	404	// set new alpha value in the sp that is used in _distM
	405	// (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
	406	}
	407
	408	void rate4siteDistanceSeqs2Tree::printSideInfo(ostream& out) const
	409	{
	410	if (_rates.size())
	411	out<<"ML rates: "<<_rates<<endl;
	412	}
	413
	414	// non virtual
	415	void rate4siteDistanceSeqs2Tree::setSideInfo(const Vdouble &rates)
	416	{
	417	_rates = rates;
	418	}
	419
	420	const Vdouble& rate4siteDistanceSeqs2Tree::getSideInfo() const
	421	{
	422	return _rates;
	423	}
	424
	425	/******************************
	426	* posteriorDistanceSeqs2Tree *
	427	********************************/
	428	tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble weights, const tree constraintTreePtr) {
	429	_alpha = initAlpha;
	430	_posterior = initPosterior;
	431	_weights = weights;
	432	_constraintTreePtr=constraintTreePtr;
	433	return seqs2TreeIterativeInternal(sc, true);
	434	}
	435
	436	tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	437	_constraintTreePtr=constraintTreePtr;
	438	_weights = weights;
	439	return seqs2TreeIterativeInternal(sc, false);
	440	}
	441
	442	tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights, const tree constraintTreePtr) {
	443	_constraintTreePtr=constraintTreePtr;
	444	_weights = weights;
	445	return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
	446	}
	447
	448	tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights, const tree constraintTreePtr) {
	449	_constraintTreePtr=constraintTreePtr;
	450	_weights = weights;
	451	return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
	452	}
	453
	454	tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble weights, const tree constraintTreePtr) {
	455	_alpha = initAlpha;
	456	_posterior = initPosterior;
	457	_weights = weights;
	458	_constraintTreePtr=constraintTreePtr;
	459	return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
	460	}
	461
	462	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	463	tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble weights, const tree constraintTreePtr) {
	464	_weights = weights;
	465	_posterior = posterior;
	466	_constraintTreePtr=constraintTreePtr;
	467	seqs2TreeOneIterationInternal(sc, true);
	468	return _newTree;
	469	}
	470
	471	tree posteriorDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble weights, const tree constraintTreePtr) {
	472	_weights = weights;
	473	_posterior = posterior;
	474	return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
	475	}
	476
	477	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	478	tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	479	return seqs2TreeIterative(sc, weights, constraintTreePtr);
	480	}
	481
	482	MDOUBLE posteriorDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
	483	{
	484	if (dynamic_cast<tamura92*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
	485	// Optimizing params of the tamura92 model
	486	bestTamura92ParamAlphaAndBBL optimizer(et, sc, _spPtr, _weights, 5, _epsilonLikelihoodImprovement/0.05*/,
	487	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	488	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	489	_epsilonLikelihoodImprovement4alphaOptimiz/0.01/,
	490	_epsilonLikelihoodImprovement4BBL/0.01/,
	491	5.0, _maxIterationsBBL, _alpha, 5.0 );
	492	_newAlpha=optimizer.getBestAlpha();
	493	return(optimizer.getBestL());
	494
	495	} else if (dynamic_cast<gtrModel*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
	496	// Optimizing params of the gtr model
	497	bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5,
	498	_epsilonLikelihoodImprovement,
	499	_epsilonLikelihoodImprovement4alphaOptimiz,
	500	true, true);
	501	_newAlpha=optimizer.getBestAlpha();
	502	return(optimizer.getBestL());
	503
	504	} else {
	505	bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0,
	506	_epsilonLikelihoodImprovement4BBL/0.01/, _epsilonLikelihoodImprovement4alphaOptimiz,
	507	_maxIterationsBBL);
	508	_newAlpha=optimizer.getBestAlpha(); // cached only to make alpha optimization faster
	509	}
	510
	511	// Compute posterior probabilities of rates per site
	512	return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior);
	513	}
	514
	515	MDOUBLE posteriorDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
	516	{
	517	_newAlpha = alpha;
	518	(static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
	519	// Compute posterior probabilities of rates per site
	520	return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior);
	521	}
	522
	523	void posteriorDistanceSeqs2Tree::acceptSideInfo()
	524	{
	525	_alpha = _newAlpha;
	526	_posterior = _newPosterior;
	527	}
	528
	529	void posteriorDistanceSeqs2Tree::utilizeSideInfo()
	530	{
	531	(static_cast<posteriorDistance*>(_distM))->setPosterior(_posterior);
	532	LOG(10,<<"# utilizing posterior"<<endl<<_posterior<<endl<<endl);
	533	// set new alpha value in the sp that is used in _distM
	534	// (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
	535	}
	536
	537	void posteriorDistanceSeqs2Tree::printSideInfo(ostream& out) const
	538	{
	539	if (_posterior.size())
	540	out<<_posterior<<endl;
	541	}
	542
	543	// non virtual
	544	void posteriorDistanceSeqs2Tree::setSideInfo(const VVdoubleRep &posterior)
	545	{
	546	_posterior = posterior;
	547	}
	548
	549	const VVdoubleRep& posteriorDistanceSeqs2Tree::getSideInfo() const
	550	{
	551	return _posterior;
	552	}
	553

+195

-0

libs/phylogeny/distanceBasedSeqs2Tree.h less more

	0	// $Id: distanceBasedSeqs2Tree.h 5989 2009-03-19 09:27:26Z privmane $
	1
	2	#ifndef ___DISTANCE_BASED_SEQS2TREE
	3	#define ___DISTANCE_BASED_SEQS2TREE
	4
	5	#include "distanceMethod.h"
	6	#include "sequenceContainer.h"
	7	#include "stochasticProcess.h"
	8	#include "likeDist.h"
	9	#include "distances2Tree.h"
	10	#include "givenRatesMLDistance.h"
	11	#include "posteriorDistance.h"
	12	#include "float.h"
	13
	14	// NOTE: These modules take sequenceContainer as argument, and do not
	15	// manipulate it. If you want to take care of gaps do it yourself!
	16	class distanceBasedSeqs2Tree {
	17	public:
	18	distanceBasedSeqs2Tree(distanceMethod &distM, distances2Tree &dist2et, const Vdouble *weights = NULL)
	19	: _distM(distM.clone()), _dist2et(dist2et.clone()), _weights(weights), _treeLogLikelihood(VERYBIG) {}
	20	virtual ~distanceBasedSeqs2Tree() {delete (_distM);delete (_dist2et);}
	21	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	22	// Does one bootstrap iteration
	23	virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	24	virtual MDOUBLE getLogLikelihood() {return _treeLogLikelihood;}
	25
	26	protected:
	27	distanceMethod *_distM;
	28	distances2Tree *_dist2et;
	29	const Vdouble * _weights;
	30	MDOUBLE _treeLogLikelihood;
	31	const tree* _constraintTreePtr;
	32	};
	33
	34	class iterativeDistanceSeqs2Tree : public distanceBasedSeqs2Tree {
	35	public:
	36	iterativeDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
	37	const MDOUBLE epsilonLikelihoodImprovement = 0.001,
	38	const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
	39	const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
	40	const int maxIterationsBBL = 10);
	41	virtual ~iterativeDistanceSeqs2Tree() {}
	42	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL) = 0; // iterative
	43	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL) = 0;
	44	// Start from optimization of branch length and side info for a given initial topology
	45	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights=NULL, const tree constraintTreePtr=NULL) = 0;
	46	// Start from calculating side info for a given tree and alpha
	47	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL) = 0;
	48	// Does one bootstrap iteration
	49	virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	50	tree getTree() {return _et;}
	51
	52	// * handling side info *
	53
	54	// Optimize nj tree (optimize alpha, branch lengths, etc.) and produce
	55	// side info based on the optimized tree
	56	virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et) = 0;
	57	// Calculate side info without changing the given tree and alpha
	58	// (Optimization should be done in here for side info that includes other optimizable parameters
	59	// e.g. ML rates, Nu...)
	60	virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) = 0;
	61	// Copy new side info (based on the new tree) to the "current" side info variable, before the next iteration
	62	virtual void acceptSideInfo() = 0;
	63	// Apply the optimized side info into _optimizedSp
	64	virtual void utilizeSideInfo() = 0;
	65	virtual void printSideInfo(ostream& out) const = 0;
	66	MDOUBLE getAlpha() const { return _alpha; }
	67
	68
	69	protected:
	70	tree seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven=false);
	71	tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, const tree &initTree);
	72	tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, bool initSideInfoGiven, const tree &initTree, MDOUBLE initAlpha);
	73	void seqs2TreeOneIterationInternal(const sequenceContainer &sc, const bool sideInfoSet);
	74
	75	MDOUBLE _newTreeLogLikelihood;
	76	MDOUBLE _epsilonLikelihoodImprovement;
	77	MDOUBLE _epsilonLikelihoodImprovement4alphaOptimiz;
	78	MDOUBLE _epsilonLikelihoodImprovement4BBL;
	79	int _maxIterationsBBL;
	80
	81	MDOUBLE _alpha;
	82	MDOUBLE _newAlpha;
	83
	84	stochasticProcess *_spPtr;
	85	tree _et, _newTree;
	86	};
	87
	88	class commonAlphaDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
	89	public:
	90	// Given likeDist is assumed to hold a gamma-distribution stochasticProcess
	91	commonAlphaDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
	92	const MDOUBLE epsilonLikelihoodImprovement = 0.001,
	93	const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
	94	const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
	95	const int maxIterationsBBL = 50)
	96	: iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
	97	virtual ~commonAlphaDistanceSeqs2Tree() {}
	98
	99	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	100	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	101	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	102	tree seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	103	// Does one bootstrap iteration
	104	tree seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, const Vdouble weights, const tree constraintTreePtr=NULL);
	105	// Explicitly ask for iterations
	106	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL); // homogenous rates will be used for first iteration
	107	tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	108	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	109	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	110
	111	// handling side info
	112	virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
	113	virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
	114	virtual void acceptSideInfo();
	115	virtual void utilizeSideInfo();
	116	virtual void printSideInfo(ostream& out) const;
	117	void setSideInfo(const MDOUBLE alpha);
	118	MDOUBLE getSideInfo() const;
	119	};
	120
	121	class rate4siteDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
	122	public:
	123	rate4siteDistanceSeqs2Tree(givenRatesMLDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
	124	const MDOUBLE epsilonLikelihoodImprovement = 0.001,
	125	const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
	126	const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
	127	const int maxIterationsBBL = 50)
	128	: iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
	129	virtual ~rate4siteDistanceSeqs2Tree() {}
	130
	131	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	132	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights = NULL, const tree constraintTreePtr=NULL);
	133	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	134	tree seqs2Tree(const sequenceContainer &sc, const Vdouble &rates, const Vdouble weights = NULL, const tree constraintTreePtr=NULL);
	135	// Does one bootstrap iteration
	136	tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble &rates, const Vdouble weights, const tree constraintTreePtr=NULL);
	137	// Explicitly ask for iterations
	138	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL); // homogenous rates will be used for first iteration
	139	tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble &initRates, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	140	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	141	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	142
	143	// handling side info
	144	virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
	145	virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
	146	virtual void acceptSideInfo();
	147	virtual void utilizeSideInfo();
	148	virtual void printSideInfo(ostream& out) const;
	149	void setSideInfo(const Vdouble &rates);
	150	const Vdouble& getSideInfo() const;
	151
	152	private:
	153	Vdouble _rates;
	154	Vdouble _newRates;
	155	};
	156
	157	class posteriorDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
	158	public:
	159	posteriorDistanceSeqs2Tree(posteriorDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
	160	const MDOUBLE epsilonLikelihoodImprovement = 0.001,
	161	const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
	162	const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
	163	const int maxIterationsBBL = 50)
	164	: iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
	165	virtual ~posteriorDistanceSeqs2Tree() {}
	166
	167	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	168	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights = NULL, const tree constraintTreePtr=NULL);
	169	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	170	tree seqs2Tree(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble weights = NULL, const tree constraintTreePtr=NULL);
	171	// Does one bootstrap iteration
	172	tree seqs2TreeBootstrap(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	173	// Explicitly ask for iterations
	174	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL); // homogenous rates will be used for first iteration
	175	tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	176	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	177	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	178	tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	179
	180	// handling side info
	181	virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
	182	virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
	183	virtual void acceptSideInfo();
	184	virtual void utilizeSideInfo();
	185	virtual void printSideInfo(ostream& out) const;
	186	void setSideInfo(const VVdoubleRep &posterior);
	187	const VVdoubleRep& getSideInfo() const;
	188
	189	private:
	190	VVdoubleRep _posterior;
	191	VVdoubleRep _newPosterior;
	192	};
	193
	194	#endif

+24

-0

libs/phylogeny/distanceMethod.h less more

	0	// $Id: distanceMethod.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___DISTANCE_METHOD
	3	#define ___DISTANCE_METHOD
	4	#include "definitions.h"
	5	#include "sequence.h"
	6
	7	/*********************************************************
	8	Distance method is a class for computing pairwise distance
	9	between 2 different sequences
	10	*******************************************************/
	11	class distanceMethod {
	12	public:
	13	virtual const MDOUBLE giveDistance(const sequence& s1,
	14	const sequence& s2,
	15	const vector<MDOUBLE> * weights=NULL,
	16	MDOUBLE* score=NULL) const=0;
	17	virtual distanceMethod* clone(void) const=0;
	18	virtual ~distanceMethod() {}
	19	};
	20
	21
	22	#endif
	23

+21

-0

libs/phylogeny/distanceTable.cpp less more

	0	// $Id: distanceTable.cpp 1740 2007-02-26 13:53:10Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "distanceTable.h"
	4
	5	void giveDistanceTable(const distanceMethod* dis,
	6	const sequenceContainer& sc,
	7	VVdouble& res,
	8	vector<string>& names,
	9	const vector<MDOUBLE> * weights){
	10	res.resize(sc.numberOfSeqs());
	11	for (int z=0; z< sc.numberOfSeqs();++z) res[z].resize(sc.numberOfSeqs(),0.0);
	12
	13	for (int i=0; i < sc.numberOfSeqs();++i) {
	14	for (int j=i+1; j < sc.numberOfSeqs();++j) {
	15	res[i][j] = dis->giveDistance(sc[sc.placeToId(i)],sc[sc.placeToId(j)],weights,NULL);
	16	//LOG(5,<<"res["<<i<<"]["<<j<<"] ="<<res[i][j]<<endl);
	17	}
	18	names.push_back(sc[sc.placeToId(i)].name());
	19	}
	20	}

+17

-0

libs/phylogeny/distanceTable.h less more

	0	// $Id: distanceTable.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___DISTANCE_TABLE
	3	#define ___DISTANCE_TABLE
	4
	5	#include "definitions.h"
	6	#include "distanceMethod.h"
	7	#include "sequenceContainer.h"
	8
	9	void giveDistanceTable(const distanceMethod* dis,
	10	const sequenceContainer& sc,
	11	VVdouble& res,
	12	vector<string>& names,
	13	const vector<MDOUBLE> * weights = NULL);
	14
	15
	16	#endif

+18

-0

libs/phylogeny/distances2Tree.h less more

	0	// $Id: distances2Tree.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___DISTANCES2TREE
	3	#define ___DISTANCES2TREE
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include <string>
	8	using namespace std;
	9
	10	class distances2Tree {
	11	public:
	12	virtual ~distances2Tree() {}
	13	virtual distances2Tree* clone() const =0;
	14	virtual tree computeTree(VVdouble distances, const vector<string>& names, const tree * const constriantTree = NULL) = 0;
	15	};
	16
	17	#endif

+13

-0

libs/phylogeny/distribution.cpp less more

	0	// $Id: distribution.cpp 2709 2007-11-19 14:49:21Z itaymay $
	1
	2	#include "distribution.h"
	3	#include "errorMsg.h"
	4
	5	distribution::~distribution(){}
	6	// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
	7	// assignment
	8
	9	void distribution::change_number_of_categories(int in_number_of_categories)
	10	{
	11	errorMsg::reportError("not implemented: distribution::change_number_of_categories()!");
	12	}⏎

+31

-0

libs/phylogeny/distribution.h less more

	0	// $Id: distribution.h 2709 2007-11-19 14:49:21Z itaymay $
	1
	2	// version 2.00
	3	// last modified 21 Mar 2004
	4
	5	/************************************************************
	6	This is a virtual class from which all types of distribution classes inherit from.
	7	************************************************************/
	8
	9	#ifndef ___DISTRIBUTION
	10	#define ___DISTRIBUTION
	11
	12	#include "definitions.h"
	13
	14	class distribution {
	15	public:
	16	virtual distribution* clone() const = 0;
	17	virtual ~distribution() = 0;
	18
	19	virtual const int categories() const=0; // @@@@ there is no need to return a const int.
	20	virtual void change_number_of_categories(int in_number_of_categories);
	21	virtual const MDOUBLE rates(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE.
	22	virtual const MDOUBLE ratesProb(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE.
	23	virtual void setGlobalRate(const MDOUBLE x)=0;
	24	virtual MDOUBLE getGlobalRate()const=0; // @@@@ there is no need to return a const MDOUBLE.
	25	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const = 0; // @@@@ there is no need to return a const MDOUBLE.
	26
	27	};
	28	#endif
	29
	30

+104

-0

libs/phylogeny/distributionPlusCategory.cpp less more

	0	#include "distributionPlusCategory.h"
	1
	2	distributionPlusCategory::distributionPlusCategory(const distribution* pBaseDist, MDOUBLE baseDistProb,MDOUBLE categoryVal,MDOUBLE globalRate)
	3	:
	4	_globalRate(globalRate),
	5	_categoryVal(categoryVal),
	6	_baseDistProb(baseDistProb)
	7	{
	8	if (pBaseDist!= NULL)
	9	_pBaseDist = pBaseDist->clone();
	10	if ((baseDistProb < 0.0) \|\| (baseDistProb>1.0) ) {
	11	errorMsg::reportError("illegal baseDistProb in distributionPlusCategory::distributionPlusCategory");
	12	}
	13	}
	14
	15	distributionPlusCategory::distributionPlusCategory()
	16	:
	17	_globalRate(1.0),
	18	_pBaseDist(NULL),
	19	_categoryVal(1.0),
	20	_baseDistProb(0.0)
	21	{
	22	}
	23
	24	distributionPlusCategory::distributionPlusCategory(const distributionPlusCategory& other)
	25	{
	26	(*this) = other;
	27	}
	28
	29	distributionPlusCategory& distributionPlusCategory::operator=(const distributionPlusCategory &other)
	30	{
	31	if (this != &other)
	32	{
	33	_globalRate = other._globalRate;
	34	if (other._pBaseDist) {
	35	_pBaseDist = other._pBaseDist->clone();
	36	}
	37	else {
	38	_pBaseDist = NULL;
	39	}
	40	_categoryVal = other._categoryVal;
	41	_baseDistProb = other._baseDistProb;
	42
	43	}
	44	return *this;
	45	}
	46
	47	distributionPlusCategory::~distributionPlusCategory()
	48	{
	49	if (_pBaseDist)
	50	delete _pBaseDist;
	51	}
	52
	53	const int distributionPlusCategory::categories() const
	54	{
	55	return _pBaseDist->categories()+1;
	56	}
	57
	58
	59	const MDOUBLE distributionPlusCategory::rates(const int category) const
	60	{
	61	if (category < _pBaseDist->categories())
	62	return _pBaseDist->rates(category);
	63	else
	64	return _categoryVal;
	65	}
	66
	67
	68	const MDOUBLE distributionPlusCategory::ratesProb(const int category) const
	69	{
	70	if (category < _pBaseDist->categories())
	71	return _pBaseDist->ratesProb(category) * _baseDistProb;
	72	else
	73	return (1-_baseDistProb); //category prob
	74	}
	75
	76
	77	//gets cumulative probability till a certain point
	78	const MDOUBLE distributionPlusCategory::getCumulativeProb(const MDOUBLE x) const
	79	{
	80	MDOUBLE res(0.0);
	81	if (x < 0)
	82	errorMsg::reportError("x < 0 in distributionPlusCategory::getCumulativeProb()");
	83	if (x > _categoryVal - EPSILON)
	84	res += 1-_baseDistProb;
	85	res += _baseDistProb * _pBaseDist->getCumulativeProb(x);
	86	return res;
	87	}
	88
	89
	90	void distributionPlusCategory::change_number_of_categories(int in_number_of_categories)
	91	{
	92	_pBaseDist->change_number_of_categories(in_number_of_categories);
	93	}
	94
	95
	96	void distributionPlusCategory::setBaseDistProb(MDOUBLE baseDistProb)
	97	{
	98	if ((baseDistProb < 0.0) \|\| (baseDistProb > 1.0) ) {
	99	errorMsg::reportError("illegal baseDistProb in distributionPlusCategory::setBaseDistProb");
	100	}
	101
	102	_baseDistProb = baseDistProb;
	103	}

+43

-0

libs/phylogeny/distributionPlusCategory.h less more

	0
	1	#ifndef ___DIST_PLUS_CATEGORY
	2	#define ___DIST_PLUS_CATEGORY
	3
	4	#include "definitions.h"
	5	#include "distribution.h"
	6	#include "logFile.h"
	7	#include "errorMsg.h"
	8
	9	class distributionPlusCategory : public distribution {
	10
	11	public:
	12	explicit distributionPlusCategory(const distribution* pBaseDist, MDOUBLE baseDistProb,MDOUBLE categoryVal,MDOUBLE globalRate=1);
	13	explicit distributionPlusCategory();
	14	explicit distributionPlusCategory(const distributionPlusCategory& other);
	15	virtual ~distributionPlusCategory();
	16	virtual distributionPlusCategory& operator=(const distributionPlusCategory &other);
	17	virtual distribution* clone() const { return new distributionPlusCategory(*this); }
	18
	19	distribution* getBaseDistribution() {return _pBaseDist;}
	20	virtual const int categories() const;
	21	virtual const MDOUBLE rates(const int category) const;
	22	virtual const MDOUBLE ratesProb(const int category) const;
	23
	24	virtual void setGlobalRate(const MDOUBLE x) {_globalRate=x;}
	25	virtual MDOUBLE getGlobalRate()const {return _globalRate;}
	26	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	27	virtual void change_number_of_categories(int in_number_of_categories);
	28
	29	virtual MDOUBLE getCategoryVal() const {return _categoryVal;}
	30	virtual MDOUBLE getBaseDistProb() const {return _baseDistProb;}
	31	virtual void setCategoryVal(MDOUBLE categoryVal) { _categoryVal = categoryVal;}
	32	virtual void setBaseDistProb(MDOUBLE baseDistProb);
	33
	34	protected:
	35	MDOUBLE _globalRate;
	36	distribution* _pBaseDist;
	37	MDOUBLE _categoryVal;
	38	MDOUBLE _baseDistProb;
	39
	40	};
	41
	42	#endif // ___DIST_PLUS_CATEGORY

+77

-0

libs/phylogeny/distributionPlusInvariant.cpp less more

	0	#include "definitions.h"
	1	#include "distributionPlusInvariant.h"
	2	#include "errorMsg.h"
	3	#include "logFile.h"
	4
	5	//#define RATE_INVARIANT 1e-10
	6
	7
	8	distributionPlusInvariant::distributionPlusInvariant(
	9	distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate, MDOUBLE rateInvariantVal)
	10	{
	11	_globalRate=globalRate;
	12	_Pinv = pInv;
	13	_rateInvariantVal = rateInvariantVal;
	14	_pBaseDist = NULL;
	15	if (pDist!= NULL)
	16	_pBaseDist = pDist->clone();
	17	}
	18
	19	distributionPlusInvariant::distributionPlusInvariant()
	20	{
	21	_globalRate=1.0;
	22	_Pinv = 0;
	23	_rateInvariantVal = 0;
	24	_pBaseDist = NULL;
	25	}
	26
	27
	28	distributionPlusInvariant& distributionPlusInvariant::operator=(const distributionPlusInvariant& other)
	29	{
	30	_globalRate = other._globalRate;
	31	_Pinv = other._Pinv;
	32	_rateInvariantVal = other._rateInvariantVal;
	33	_pBaseDist = NULL;
	34	if (other._pBaseDist != NULL)
	35	_pBaseDist = other._pBaseDist->clone();
	36	return *this;
	37	}
	38
	39	distributionPlusInvariant::~distributionPlusInvariant()
	40	{
	41	if (_pBaseDist != NULL)
	42	delete _pBaseDist;
	43	}
	44
	45
	46	//gets cumulative probability till a certain point
	47	const MDOUBLE distributionPlusInvariant::getCumulativeProb(const MDOUBLE x) const
	48	{
	49	if (x < 0)
	50	errorMsg::reportError("x < 0 in distributionPlusInvariant::getCumulativeProb()");
	51	return (_Pinv + (1 -_Pinv) * _pBaseDist->getCumulativeProb(x));
	52	}
	53
	54
	55	const MDOUBLE distributionPlusInvariant::ratesProb(const int category) const
	56	{
	57	if (category == categories()-1)
	58	return _Pinv;
	59	else
	60	return (1 - _Pinv) * _pBaseDist->ratesProb(category);
	61	}
	62
	63	const MDOUBLE distributionPlusInvariant::rates(const int category) const
	64	{
	65	if (category == categories()-1)
	66	return _rateInvariantVal; //RATE_INVARIANT
	67	else
	68	return _pBaseDist->rates(category);
	69	}
	70
	71	const int distributionPlusInvariant::categories() const
	72	{
	73	return 1 + _pBaseDist->categories();
	74	}
	75
	76

+41

-0

libs/phylogeny/distributionPlusInvariant.h less more

	0	#ifndef __DISTPLUSINV
	1	#define __DISTPLUSINV
	2	/************************************************************
	3	This class describes a combination of a predefined dsitrubtion ,
	4	with an additional invariant category of probability _Pinv
	5	This category is always the last rate category (i.e., rate(categories()) == 0)
	6	************************************************************/
	7	#include "definitions.h"
	8	#include "distribution.h"
	9
	10	class distributionPlusInvariant : public distribution {
	11	public:
	12	explicit distributionPlusInvariant(
	13	distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10);
	14	explicit distributionPlusInvariant();
	15	distributionPlusInvariant(const distributionPlusInvariant& other): _pBaseDist(NULL){(*this) = other;}
	16	virtual distributionPlusInvariant& operator=(const distributionPlusInvariant& other);
	17	distributionPlusInvariant* clone() const {return new distributionPlusInvariant(*this);}
	18
	19	virtual ~distributionPlusInvariant();
	20
	21	distribution* getBaseDistribution(){return _pBaseDist;}
	22	//get/set the parameters of the mixture
	23	const int categories() const;
	24	void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
	25	MDOUBLE getGlobalRate() const {return _globalRate;}
	26	virtual void setInvProb(const MDOUBLE p) {_Pinv = p;}
	27	const MDOUBLE getInvProb() const {return _Pinv;}
	28
	29	//get distribution statistics
	30	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	31	virtual const MDOUBLE rates(const int category) const;
	32	virtual const MDOUBLE ratesProb(const int i) const;
	33
	34	protected:
	35	MDOUBLE _globalRate;
	36	MDOUBLE _Pinv;
	37	MDOUBLE _rateInvariantVal;
	38	distribution* _pBaseDist;
	39	};
	40	#endif

+71

-0

libs/phylogeny/doubleRep.cpp less more

	0	#ifdef DOUBLEREP
	1	#include "doubleRep.h"
	2	#include <cmath>
	3
	4
	5
	6	doubleRepMantisa::doubleRepMantisa(MDOUBLE mantissa, int expon){
	7	_mantissa=mantissa;
	8	_expon=expon;
	9	fixParams();
	10	}
	11
	12
	13	doubleRepMantisa::doubleRepMantisa(MDOUBLE a){
	14	int answerExp;
	15	MDOUBLE answerMantissa=frexp(a,&answerExp);
	16	_mantissa=answerMantissa;
	17	_expon=answerExp;
	18	}
	19
	20	doubleRepMantisa::doubleRepMantisa(const doubleRepMantisa& other): _mantissa(other._mantissa), _expon(other._expon) {
	21	}
	22
	23
	24	//make sure 0.5<=mantissa<1, as a matter of convention
	25	void doubleRepMantisa::fixParams(){
	26	while (_mantissa>=1){
	27	_expon++;
	28	_mantissa/=2.0;
	29	}
	30	while ((_mantissa<0.5) && (_mantissa>0)){
	31	_expon--;
	32	_mantissa*=2.0;
	33	}
	34	while (_mantissa<=-1){
	35	_expon++;
	36	_mantissa/=2.0;
	37	}
	38	while ((_mantissa>-0.5) && (_mantissa<0)){
	39	_expon--;
	40	_mantissa*=2.0;
	41	}
	42	}
	43
	44	MDOUBLE convert(const doubleRepMantisa& a){
	45	MDOUBLE aFullRep= ldexp(a._mantissa,a._expon);
	46	return aFullRep;
	47	}
	48
	49	//switches from base 2 to base e
	50	const MDOUBLE doubleRepMantisa::d_log() const{
	51	static const MDOUBLE log2(log(2.0));
	52	return log(_mantissa)+log2*_expon;
	53	}
	54
	55
	56	ostream& operator<<(ostream &out, const doubleRepMantisa& a){
	57	a.output(out);
	58	// a.output0x(out);
	59	// out<<a._mantissa<<string(" * 2^")<<a._expon;
	60	// out<<a._mantissa<<" * 2^"<<a._expon;
	61	return out;
	62	}
	63
	64	istream& operator>>(istream &in, doubleRepMantisa& a) {
	65	MDOUBLE num;
	66	in >> num;
	67	a = num;
	68	return in;
	69	}
	70	#endif

+319

-0

libs/phylogeny/doubleRep.h less more

	0	#ifndef __DOUBLE_REP_H
	1	#define __DOUBLE_REP_H
	2
	3	#ifdef DOUBLEREP
	4	#include "definitions.h"
	5
	6	#include <iostream>
	7	#include <cmath>
	8	#include <cstdlib>
	9	using namespace std;
	10
	11	/* doubleRepMantisa: enables working with much larger or smaller numbers than normally possible
	12	by the regular double representation
	13	* Representation of a double x as x=_mantissa*2^_expon
	14	Note: Base is 2!!
	15	*/
	16
	17	class doubleRepMantisa{
	18	public:
	19
	20	doubleRepMantisa(){};
	21	explicit doubleRepMantisa(MDOUBLE mantissa, int expon);
	22	doubleRepMantisa(MDOUBLE a);
	23	doubleRepMantisa(const doubleRepMantisa& other);
	24	doubleRepMantisa* clone() {return new doubleRepMantisa(*this);}
	25	void output(ostream &out) const{ out<<_mantissa<<string(" * 2^")<<_expon;}
	26	// void output0x(ostream &out) const{ double e0x=_expon*0.3010299956639; // log_10(2)
	27	// int e=(int)(trunc(e0x))-1;
	28	// double m=_mantissa*pow(10,e0x-e);
	29	// out<<m;
	30	// if (e<0)
	31	// out<<"e"<<e;
	32	// else
	33	// out<<"e+"<<e;
	34	//}
	35	void outputn(ostream &out) { out<<_mantissa<<string(" * 2^")<<_expon<<endl;}
	36
	37	friend MDOUBLE convert(const doubleRepMantisa& a);
	38	inline doubleRepMantisa& operator=(const doubleRepMantisa& a);
	39	inline doubleRepMantisa& operator+=(doubleRepMantisa a);
	40	inline doubleRepMantisa& operator++();
	41	inline doubleRepMantisa operator++(int);
	42	inline doubleRepMantisa& operator--();
	43	inline doubleRepMantisa operator--(int);
	44	friend inline doubleRepMantisa operator+(const doubleRepMantisa& a, const doubleRepMantisa& b);
	45	inline doubleRepMantisa& operator-=(const doubleRepMantisa& a);
	46	friend inline doubleRepMantisa operator-(const doubleRepMantisa& a, const doubleRepMantisa& b);
	47	inline doubleRepMantisa& operator*=(const doubleRepMantisa& a);
	48	friend inline doubleRepMantisa operator*(const doubleRepMantisa& a, const doubleRepMantisa& b);
	49	inline doubleRepMantisa& operator/=(const doubleRepMantisa& a);
	50	friend inline doubleRepMantisa operator/(const doubleRepMantisa& a, const doubleRepMantisa& b);
	51
	52	friend inline bool operator==(const doubleRepMantisa& a, const doubleRepMantisa& b);
	53	friend inline bool operator!=(const doubleRepMantisa& a, const doubleRepMantisa& b);
	54	friend inline bool operator<(const doubleRepMantisa& a, const doubleRepMantisa& b);
	55	friend inline bool operator<=(const doubleRepMantisa& a, const doubleRepMantisa& b);
	56	friend inline bool operator>(const doubleRepMantisa& a, const doubleRepMantisa& b);
	57	friend inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b);
	58	friend inline doubleRepMantisa abs(const doubleRepMantisa& d);
	59
	60
	61	const MDOUBLE d_log() const;
	62	// friend ostream& operator<<(ostream &out, const doubleRepMantisa& a);
	63
	64	const MDOUBLE mantissa() const {return _mantissa;}
	65	const int expon() const {return _expon;}
	66
	67	private:
	68	void fixParams();
	69
	70
	71	private:
	72	MDOUBLE _mantissa;
	73	int _expon;
	74	};
	75
	76	MDOUBLE convert(const doubleRepMantisa& a); //declaration of this function to be implemented cpp
	77
	78	inline doubleRepMantisa& doubleRepMantisa::operator=(const doubleRepMantisa& a){
	79	_mantissa=a.mantissa();
	80	_expon=a.expon();
	81	return *this;
	82	}
	83
	84
	85	inline doubleRepMantisa& doubleRepMantisa::operator++() {
	86	return (*this)+=1;
	87	}
	88
	89	// matan:
	90	inline doubleRepMantisa doubleRepMantisa::operator++(int) {
	91	doubleRepMantisa ans = *this;
	92	++(*this);
	93	return ans;
	94	}
	95
	96	// matan:
	97	inline doubleRepMantisa& doubleRepMantisa::operator--() {
	98	return (*this)-=1;
	99	}
	100
	101	// matan:
	102	inline doubleRepMantisa doubleRepMantisa::operator--(int) {
	103	doubleRepMantisa ans = *this;
	104	--(*this);
	105	return ans;
	106	}
	107
	108
	109	// Original version by Adi Stern
	110	inline doubleRepMantisa& doubleRepMantisa::operator+=(doubleRepMantisa a){
	111	//ensuring that (*this) is bigger than 'a' for sake of convenience
	112	if (a.expon()>_expon \|\| ((a.expon()==_expon) && (a.mantissa()>_mantissa))){
	113	MDOUBLE tmpMant=0.0; int tmpExp=0;
	114	tmpMant=_mantissa;
	115	tmpExp=_expon;
	116	_mantissa=a.mantissa();
	117	a._mantissa=tmpMant;
	118	tmpExp=_expon;
	119	_expon=a.expon();
	120	a._expon=tmpExp;
	121	}
	122	if (a.mantissa()==0)
	123	return *this;
	124	if (_mantissa==0){
	125	_mantissa=a.mantissa();
	126	_expon=a.expon();
	127	return *this;
	128	}
	129	int exp_dif = _expon-a.expon();
	130	if (abs(exp_dif)>51){ //limit of epsilon difference
	131	return *this;
	132	}
	133	_mantissa+=a.mantissa()pow(2.0,(a.expon()-_expon)1.0);
	134	fixParams();
	135	return *this;
	136	}
	137
	138	inline doubleRepMantisa operator+(const doubleRepMantisa& a, const doubleRepMantisa& b){
	139	doubleRepMantisa temp(a);
	140	temp+=b;
	141	return temp;
	142	}
	143
	144	inline doubleRepMantisa& doubleRepMantisa::operator-=(const doubleRepMantisa& a){
	145	doubleRepMantisa b(-a.mantissa(),a.expon());
	146	doubleRepMantisa me(_mantissa,_expon);
	147	me+=b;
	148	_mantissa=me.mantissa();
	149	_expon=me.expon();
	150	return *this;
	151	}
	152
	153	inline doubleRepMantisa operator-(const doubleRepMantisa& a, const doubleRepMantisa& b){
	154	doubleRepMantisa temp(a);
	155	temp-=b;
	156	return temp;
	157	}
	158
	159	inline doubleRepMantisa operator-(const doubleRepMantisa& a) {
	160	return doubleRepMantisa(0) - a;
	161	}
	162
	163	inline doubleRepMantisa& doubleRepMantisa::operator*=(const doubleRepMantisa& a){
	164	_mantissa*=a.mantissa();
	165	_expon+=a.expon();
	166	fixParams();
	167	return *this;
	168	}
	169
	170	inline doubleRepMantisa operator*(const doubleRepMantisa& a, const doubleRepMantisa& b){
	171	doubleRepMantisa temp(a);
	172	temp*=b;
	173	return temp;
	174	}
	175
	176	inline doubleRepMantisa& doubleRepMantisa::operator/=(const doubleRepMantisa& a){
	177	_mantissa/=a.mantissa();
	178	_expon-=a.expon();
	179	fixParams();
	180	return *this;
	181	}
	182
	183	inline doubleRepMantisa operator/(const doubleRepMantisa& a, const doubleRepMantisa& b){
	184	doubleRepMantisa temp(a);
	185	temp/=b;
	186	return temp;
	187	}
	188
	189	/************************
	190	* Comparison operators *
	191	************************/
	192	inline bool operator==(const doubleRepMantisa& a, const doubleRepMantisa& b){
	193	return (a._mantissa==b._mantissa && a._expon==b._expon);
	194	}
	195	inline bool operator!=(const doubleRepMantisa& a, const doubleRepMantisa& b){
	196	return !(a==b);
	197	}
	198
	199	inline bool operator<(const doubleRepMantisa& a, const doubleRepMantisa& b){
	200	// if the numbers have opposite signs
	201	if (a._mantissa*b._mantissa<0.0){
	202	if (a._mantissa<b._mantissa) {return true;}
	203	else {return false;}
	204	}
	205	// if the expon values are different
	206	if (a._expon!=b._expon) {
	207	// special case where one number is zero
	208	if (a._mantissa == 0.0) {
	209	if (b._mantissa > 0.0) {return true;}
	210	else {return false;}
	211	}
	212	if (b._mantissa == 0.0) {
	213	if (a._mantissa < 0.0) {return true;}
	214	else {return false;}
	215	}
	216
	217	if (a._expon<b._expon) {
	218	if (a._mantissa > 0.0) {return true;}
	219	else {return false;}
	220	} else {
	221	if (a._mantissa < 0.0) {return true;}
	222	else {return false;}
	223	}
	224	// expon values are identical
	225	} else {
	226	return (a._mantissa < b._mantissa);
	227	}
	228	}
	229
	230	inline bool operator>(const doubleRepMantisa& a, const doubleRepMantisa& b){
	231	// if the numbers have opposite signs
	232	if (a._mantissa*b._mantissa<0.0){
	233	if (a._mantissa>b._mantissa) {return true;}
	234	else {return false;}
	235	}
	236	// if the expon values are different
	237	if (a._expon!=b._expon) {
	238	// special case where one number is zero
	239	if (a._mantissa == 0.0) {
	240	if (b._mantissa < 0.0) {return true;}
	241	else {return false;}
	242	}
	243	if (b._mantissa == 0.0) {
	244	if (a._mantissa > 0.0) {return true;}
	245	else {return false;}
	246	}
	247
	248	if (a._expon>b._expon) {
	249	if (a._mantissa > 0.0) {return true;}
	250	else {return false;}
	251	} else {
	252	if (a._mantissa < 0.0) {return true;}
	253	else {return false;}
	254	}
	255	// expon values are identical
	256	} else {
	257	return (a._mantissa > b._mantissa);
	258	}
	259	}
	260
	261	inline bool operator<=(const doubleRepMantisa& a, const doubleRepMantisa& b){
	262	return !(a>b);
	263	}
	264
	265	inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b){
	266	return !(a<b);
	267	}
	268
	269
	270
	271
	272	ostream& operator<<(ostream &out, const doubleRepMantisa& a);
	273	istream& operator>>(istream &in, doubleRepMantisa& a);
	274
	275	inline MDOUBLE log(const doubleRepMantisa& d) {return d.d_log();}
	276
	277	inline ostream &operator<<(ostream &out, const VdoubleRepMantisa &v){
	278	for (int j=0;j<v.size();++j)
	279	out<< v[j]<<" ";
	280	out <<endl;
	281	return(out);
	282	}
	283
	284	inline ostream &operator<<(ostream &out, const VVdoubleRepMantisa &m){
	285	for (int i=0;i<m.size();++i)
	286	out<<m[i];
	287	out <<endl;
	288	return(out);
	289	}
	290
	291	inline doubleRepMantisa pow(const doubleRepMantisa& d1, const doubleRepMantisa& d2) {
	292	return doubleRepMantisa(pow(convert(d1), convert(d2)));
	293	}
	294
	295	inline doubleRepMantisa abs(const doubleRepMantisa& d) {
	296	return doubleRepMantisa(abs(d._mantissa), d._expon);
	297	}
	298
	299	inline doubleRepMantisa fabs(const doubleRepMantisa& d) {
	300	return abs(d);
	301	}
	302
	303	inline doubleRepMantisa exp(const doubleRepMantisa& d) {
	304	return doubleRepMantisa(exp(convert(d)));
	305	}
	306
	307	inline doubleRepMantisa sqrt(const doubleRepMantisa& d) {
	308	return doubleRepMantisa(sqrt(convert(d)));
	309	}
	310
	311
	312
	313
	314
	315	//inline const MDOUBLE convert (const MDOUBLE d) const {return(d);}
	316
	317	#endif
	318	#endif

+45

-0

libs/phylogeny/errorMsg.cpp less more

	0	// $Id: errorMsg.cpp 6066 2009-04-14 19:11:10Z itaymay $
	1
	2	// version 1.01
	3	// last modified 1 Jan 2004
	4	#include "definitions.h"
	5	#include <cassert>
	6	#include "errorMsg.h"
	7	#include "logFile.h"
	8	#include <errno.h>
	9	#include <string.h> //for strerror
	10	#include <stdlib.h> //for exit()
	11
	12	ostream *errorMsg::_errorOut= NULL;
	13
	14	void errorMsg::reportError(const vector<string>& textToPrint, const int exitCode) {
	15	for (int i =0 ; i < textToPrint.size() ; ++i) {
	16	LOG(1,<<textToPrint[i]<<endl);
	17	cerr<<textToPrint[i]<<endl;
	18	if (_errorOut != NULL && *_errorOut != cerr) {
	19	(*_errorOut)<<textToPrint[i]<<endl;
	20	}
	21	}
	22	if (errno!=0){
	23	LOG(1,<<"System Error: "<<strerror(errno)<<endl);
	24	cerr<<"System Error: "<<strerror(errno)<<endl;
	25	}
	26	assert(0); // always stop here if in DEBUG mode.
	27	exit(exitCode);
	28	}
	29
	30	void errorMsg::reportError(const string& textToPrint, const int exitCode) {
	31	LOG(1,<<endl<<textToPrint<<endl);
	32	cerr<<endl<<textToPrint<<endl;
	33	if (_errorOut != NULL && *_errorOut != cerr) {
	34	(*_errorOut)<<textToPrint<<endl;
	35	}
	36	if (errno!=0){
	37	LOG(1,<<"System Error: "<<strerror(errno)<<endl);
	38	cerr<<"System Error: "<<strerror(errno)<<endl;
	39	}
	40	assert(0); // always stop here if in DEBUG mode.
	41	exit(exitCode);
	42	}
	43
	44

+33

-0

libs/phylogeny/errorMsg.h less more

	0	// $Id: errorMsg.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.01
	3	// last modified 1 Jan 2004
	4
	5	#ifndef ___ERROR_MSG_H
	6	#define ___ERROR_MSG_H
	7
	8	#include <string>
	9	#include <vector>
	10	#include <iostream>
	11
	12	using namespace std;
	13
	14	// The error is always send to cerr. _errorOut is NULL, unless setErrorOstream is called.
	15
	16
	17	class errorMsg {
	18	public:
	19	static void reportError(const vector<string>& textToPrint, const int exitCode=1);
	20	static void reportError(const string& textToPrint, const int exitCode=1);
	21	static void setErrorOstream(ostream* errorOut) {_errorOut = errorOut;}
	22	private:
	23	static ostream* _errorOut;
	24	};
	25
	26	// example of how to output to a file called error.txt
	27	// ofstream f("error.txt");
	28	// errorMsg::setErrorOstream(&f);
	29	// errorMsg::reportError("cheers");
	30
	31	#endif
	32

+153

-0

libs/phylogeny/evaluateCharacterFreq.cpp less more

	0	// $Id: evaluateCharacterFreq.cpp 10474 2012-03-18 07:54:07Z itaymay $
	1
	2	#include "evaluateCharacterFreq.h"
	3	#include "someUtil.h"
	4	#include <cassert>
	5
	6	vector<MDOUBLE> sumAlphabetCounts(const sequenceContainer & sc) {
	7	vector<MDOUBLE> charFreq(sc.alphabetSize(),0.0);
	8	sequenceContainer::constTaxaIterator tIt;
	9	sequenceContainer::constTaxaIterator tItEnd;
	10	tIt.begin(sc);
	11	tItEnd.end(sc);
	12	while (tIt!= tItEnd) {
	13	sequence::constIterator sIt;
	14	sequence::constIterator sItEnd;
	15	sIt.begin(*tIt);
	16	sItEnd.end(*tIt);
	17	while (sIt != sItEnd) {
	18	if ((sIt >= 0) && (sIt <charFreq.size())) ++charFreq[(*sIt)];
	19	++sIt;
	20	}
	21	++tIt;
	22	}
	23	return charFreq;
	24	}
	25
	26	void changeCountsToFreqs(vector<MDOUBLE>& charFreq){
	27	MDOUBLE sumA = 0;
	28	int i=0;
	29	for (i=0; i < charFreq.size(); ++i) {
	30	sumA+=charFreq[i] ;
	31	}
	32	for (i=0; i < charFreq.size(); ++i) {
	33	charFreq[i] /= sumA;
	34	}
	35	}
	36
	37	void makeSureNoZeroFreqs(vector<MDOUBLE> & charFreq){
	38	// CORRECT SO THAT THERE ARE NO ZERO FREQUENCIES.
	39	// ALL FREQS THAT WERE ZERO ARE CHANGED
	40	MDOUBLE ZERO_FREQ = 0.0000000001;
	41	MDOUBLE sumB=0;
	42	int charWithZeroFreq = 0;
	43	int i=0;
	44	for (i=0; i < charFreq.size(); ++i) {
	45	if (DSMALL_EQUAL(charFreq[i], ZERO_FREQ)) {
	46	charFreq[i] = ZERO_FREQ;
	47	++charWithZeroFreq;
	48	}
	49	else sumB +=charFreq[i];
	50	}
	51	if (!DEQUAL(sumB, 1.0, 0.01))
	52	{
	53	cerr.precision(10);
	54	cerr<<"sumFreq = "<<sumB<<endl;
	55	//errorMsg::reportError("error in makeSureNoZeroFreqs(). Input frequencies must sum to 1.0");
	56	}
	57	scaleVec(charFreq, 1.0/charFreq.size());
	58	//MDOUBLE scaleFactor = sumB - (charWithZeroFreq * ZERO_FREQ);
	59	//for (i=0; i < charFreq.size(); ++i) {
	60	// if (charFreq[i] != ZERO_FREQ)
	61	// charFreq[i] *= scaleFactor;
	62	//}
	63	}
	64
	65
	66	vector<MDOUBLE> evaluateCharacterFreq(const sequenceContainer & sc) {
	67	vector<MDOUBLE> charFreq=sumAlphabetCounts(sc);
	68	changeCountsToFreqs(charFreq);
	69	makeSureNoZeroFreqs(charFreq);
	70	return charFreq;
	71	}
	72
	73	VVdouble evaluateCharacterFreqOneForEachGene(const vector<sequenceContainer> & scVec){
	74	VVdouble charFreq;
	75	for (int k=0; k < scVec.size(); ++k) {
	76	charFreq.push_back(evaluateCharacterFreq(scVec[k]));
	77	}
	78	return charFreq;
	79	}
	80
	81
	82
	83
	84	vector<MDOUBLE> evaluateCharacterFreqBasedOnManyGenes(const vector<sequenceContainer> & scVec) {
	85	// note: all alphabets have to be the same!
	86	vector<MDOUBLE> charFreq(scVec[0].alphabetSize(),0.0);
	87	for (int i=0; i < scVec.size();++i) {
	88	assert(scVec[0].getAlphabet()->size()==scVec[i].getAlphabet()->size());
	89	vector<MDOUBLE> charFreqTmp=sumAlphabetCounts(scVec[i]);
	90	for (int z=0; z < charFreq.size();++z) charFreq[z]+=charFreqTmp[z];
	91	}
	92	changeCountsToFreqs(charFreq);
	93	makeSureNoZeroFreqs(charFreq);
	94	return charFreq;
	95	}
	96
	97	//returns the number of each character in each position.
	98	//NOTE: returns also the number of unknown charecters in the last place in each vector, so that the actual vector size for each position is alphabetSize()+1
	99	void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos)
	100	{
	101	const alphabet* pAlph = sc.getAlphabet();
	102	int alphSize = sc.alphabetSize();
	103	int pos;
	104	counts4pos.resize(sc.seqLen());
	105	for (pos = 0; pos < sc.seqLen(); ++pos)
	106	counts4pos[pos].resize(alphSize + 1, 0);
	107
	108	for (int seq = 0; seq < sc.numberOfSeqs();++seq)
	109	{
	110	int id = sc.placeToId(seq);
	111	for (pos = 0; pos < sc.seqLen(); ++pos)
	112	{
	113	int charType = sc[id][pos];
	114	if (pAlph->isSpecific(charType))
	115	{
	116	++counts4pos[pos][charType];
	117	}
	118	else
	119	++counts4pos[pos][alphSize];
	120	}
	121	}
	122	}
	123
	124	//returns the number of different character types in each position
	125	void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos)
	126	{
	127	VVint counts4Pos;
	128	getCharacterCounts(sc, counts4Pos);
	129	charactersType4pos.resize(sc.seqLen(), 0);
	130	for (int pos = 0; pos < sc.seqLen(); ++pos)
	131	{
	132	for (int c = 0; c < counts4Pos[pos].size()-1; ++c)
	133	{
	134	if (counts4Pos[pos][c] > 0)
	135	++charactersType4pos[pos];
	136	}
	137	}
	138	}
	139
	140	//returns the distribution of the different character types in each position along the whole alignment
	141	void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist)
	142	{
	143	Vint charactersType4pos;
	144	getCharacterType4pos(sc, charactersType4pos);
	145	charactersTypeDist.resize(sc.numberOfSeqs()+1, 0);
	146	for (int pos = 0; pos < sc.seqLen(); ++pos)
	147	{
	148	int count = charactersType4pos[pos];
	149	++charactersTypeDist[count];
	150	}
	151
	152	}

+26

-0

libs/phylogeny/evaluateCharacterFreq.h less more

	0	// $Id: evaluateCharacterFreq.h 3895 2008-04-21 07:38:32Z itaymay $
	1
	2	#ifndef __Evaluate_Character_Freq_h
	3	#define __Evaluate_Character_Freq_h
	4
	5	#include <iostream>
	6	using namespace std;
	7
	8	#include "sequenceContainer.h"
	9	#include "definitions.h"
	10
	11	vector<MDOUBLE> sumAlphabetCounts(const sequenceContainer & sc);
	12	vector<MDOUBLE> evaluateCharacterFreq(const sequenceContainer & sc);
	13	VVdouble evaluateCharacterFreqOneForEachGene(const vector<sequenceContainer> & scVec);
	14	vector<MDOUBLE> evaluateCharacterFreqBasedOnManyGenes(const vector<sequenceContainer> & scVec);
	15
	16	void changeCountsToFreqs(vector<MDOUBLE>& charFreq);
	17	void makeSureNoZeroFreqs(vector<MDOUBLE> & charFreq);
	18
	19	//returns the number of each character in each position
	20	void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos);
	21	//returns the number of different character types in each position
	22	void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos);
	23	//returns the distribution of the different character types in each position along the whole alignment
	24	void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist);
	25	#endif

+113

-0

libs/phylogeny/evolObjs.args less more

	0	# $Id: evolObjs.args 5928 2009-02-25 16:30:50Z privmane $
	1	#purpose "structural EM based Phylogeny"
	2	#package "semphy"
	3	#version "1.0.b2"
	4
	5
	6	#files
	7	section "Basic Options"
	8	option "sequence" s "Sequence file name" string typestr="FILENAME" default="-" no
	9	option "tree" t "Tree file name" string typestr="FILENAME" no
	10	option "constraint" c "Constraint Tree file name" string typestr="FILENAME" no
	11	option "outputfile" o "Output file" string typestr="FILENAME" default="-" no
	12	option "treeoutputfile" T "Tree output file" string typestr="FILENAME" default="-" no
	13	option "gaps" g "Remove positions with gaps" flag off
	14	option "seed" r "Seed random number generator" long no
	15
	16
	17	# model options:
	18	section "Model Options"
	19	option "alphabet" a "Alphabet Size" int typestr="4\|20\|61\|64" default="20" no
	20	option "ratio" z "Transition/Transversion ratio" float default="2.0" no
	21	option "ACGprob" p "User input nucleotide frequencies. String separated list for A,C,G" string typestr="A,C,G" default="0.25,0.25,0.25" no
	22	option "inputRate" - "Set External globalRate" float default="1.0" no
	23
	24	section "Among Site Rate Variation (ASRV)"
	25	option "homogeneous" H "Don't use Gamma ASRV" flag off
	26	option "alpha" A "Set alpha for Gamma ASRV" float typestr="Alpha" no
	27	option "optimizeAlpha" O "Optimize alpha for Gamma ASRV" flag off
	28	option "categories" C "Number of categories to use with descrete Gamma ASRV" int default="8" no
	29	option "laguerre" - "Use Laguerre approximation of Gamma - CURRENTLY NOT IMPLIMENTED" flag off
	30	option "ssrv" - "Use a Site-Specific Rate Variation model (SSRV)" flag off
	31	option "nu" - "Set Nu for the SSRV model" float typestr="Nu" default="1.0" no
	32	option "posteriorRates" - "File with posterior distribution of the rate for each sequence site - for ASRV" string typestr="FILENAME" no
	33
	34	defgroup "Model" groupdesc="Model type"
	35
	36	groupoption "day" - "Use 'day' model" group="Model"
	37	groupoption "jtt" - "Use 'jtt' model (default)" group="Model"
	38	groupoption "rev" - "Use 'rev' model" group="Model"
	39	groupoption "wag" - "Use 'wag' model" group="Model"
	40	groupoption "cprev" - "Use 'cprev' model" group="Model"
	41	groupoption "nucjc" - "Use nucleic acid JC model" group="Model"
	42	groupoption "aaJC" - "Use amino acid JC model" group="Model"
	43	groupoption "k2p" - "Use 'k2p' model" group="Model"
	44	groupoption "hky" - "Use 'hky' model" group="Model"
	45	groupoption "tamura92" - "Use 'tamura92' model" group="Model"
	46	groupoption "gtr" - "Use 'gtr' model (general reversible model)" group="Model"
	47
	48	option "modelfile" - "Read replacement matrix from user input file" string typestr="NAME" no
	49
	50
	51	section "Log Options"
	52
	53	option "verbose" v "Log report level (verbose)" int default="1" no
	54	option "Logfile" l "Log output file name" string typestr="FILENAME" default="-" no
	55
	56
	57	## not for general ggo? ##
	58	## not for general ggo? ## section "Algorithm Options"
	59	## not for general ggo? ##
	60	## not for general ggo? ## # algorithm options
	61	## not for general ggo? ## section "Which algorithm to run"
	62	## not for general ggo? ## #defgroup "Run Options" groupdesc="Which algorithm to run"
	63	## not for general ggo? ##
	64	## not for general ggo? ## option "SEMPHY" S "Do SEMPHY step" flag off
	65	## not for general ggo? ## option "bbl" n "Only optimize branch length" flag off
	66	## not for general ggo? ## option "likelihood" L "Compute likelihood for fixed tree" flag off
	67	## not for general ggo? ## option "PerPosLike" P "Compute likelihood per position for a fixed tree" flag off
	68	## not for general ggo? ## option "NJ" J "compute NJ tree only" flag off
	69	## not for general ggo? ##
	70	## not for general ggo? ## option "rate" R "optimize rate of gene" flag off
	71	## not for general ggo? ##
	72	## not for general ggo? ##
	73	## not for general ggo? ## section "Other Algorithm Options"
	74	## not for general ggo? ## option "max-semphy-iter" M "Max number of SEM iterations" int default="100" no
	75	## not for general ggo? ## option "max-bbl-iter" b "Max number of BBL iterations" int default="1000" no
	76	## not for general ggo? ## #option "min-improv" d "Minimum improvement" float default="0.001" no
	77	## not for general ggo? ## option "gaps" g "Remove positions with gaps" flag off
	78	## not for general ggo? ## option "dont-use-NJ" N "Do not Use NJ to break stars in treeRearrange" flag on
	79	## not for general ggo? ## #option "exact" e "Compute exact counts" flag off
	80	## not for general ggo? ## #option "maxDistance" x "'infinity' distance for sequence pairs" float default="2.0" no
	81	## not for general ggo? ##
	82
	83
	84	## not for general ggo ## section "Bootstrap"
	85	## not for general ggo ## option "BPrepeats" - "Use bootstrap and set number of repeats" int no
	86	## not for general ggo ## option "BPfile" - "Use bootstrap and read the weights from a file" string typestr="FILENAME" no
	87	## not for general ggo ## option "BPconsensus" - "Use bootstrap and compute a Consensus tree" int no
	88	## not for general ggo ## option "BPonUserTree" - "Use compute support for user provided tree" flag off
	89	## not for general ggo ##
	90	## not for general ggo ##
	91	## not for general ggo ## section "Advanced @@"
	92	## not for general ggo ## option "ADVBBLEpsilinLikeToll" - "@@ BBL and BBL LL tol" float default="0.05" no
	93	## not for general ggo ## option "ADVNumOfBBLIterInBBLPlusAlpha" - "@@ Numver of BBL interations in BBL+ALPHA" int default="10" no
	94	## not for general ggo ## option "ADVNoPost" - "@@ do not use posterior" flag off
	95	## not for general ggo ## option "consurf" - "@@ version for consurf use" flag off
	96	## not for general ggo ## option "numbins" - "@@ number of bins in gamma" int default="4" no
	97	## not for general ggo ##
	98	## not for general ggo ##
	99	## not for general ggo ## #option "paramFile" f "Parameter file name" string no
	100	## not for general ggo ## #option "cin" I "Get input sequence file from cin" flag off
	101	## not for general ggo ##
	102	## not for general ggo ## # annealing:
	103	## not for general ggo ## #option "anneal" A "Do anneal step" flag off
	104	## not for general ggo ## #option "ratchet" R "Do Ratchet step" flag off
	105	## not for general ggo ## #option "start-temp" H "Starting temp" float no
	106	## not for general ggo ## #option "cooling-factor" c "Variance decay factor for anneal noise" float default="1.1" no
	107	## not for general ggo ## #option "final-temp" C "Final temperature of anneal noise" float default="0.1" no
	108	## not for general ggo ## #option "adversarial" - "Use Adversarial Re-weighting" flag off
	109	## not for general ggo ## #option "learning-rate" L "learning rate for Adversary" float default="1.0" no
	110	## not for general ggo ## #option "Orig-dumping" D "Dumping to the original weights" float default="0.5" no
	111	## not for general ggo ## #option "prev-dumping" X "Dumping to the previous weights" float default="0.5" no
	112	## not for general ggo ##

+5

-0

libs/phylogeny/evolObjs.header less more

	0	# $Id: evolObjs.header 962 2006-11-07 15:13:34Z privmane $
	1
	2	purpose "General Phylogenetic program"
	3	package "LibEvol"
	4	version "0.9"

+66

-0

libs/phylogeny/extremeValDistribution.cpp less more

	0	#include "extremeValDistribution.h"
	1	#include <cmath>
	2	using namespace std;
	3
	4
	5
	6	extremeValDistribution::extremeValDistribution()
	7	: _alpha(0), _beta(0)
	8	{
	9	}
	10
	11	extremeValDistribution::extremeValDistribution(const extremeValDistribution& other)
	12	{
	13	_alpha = other._alpha;
	14	_beta = other._beta;
	15	}
	16
	17	extremeValDistribution& extremeValDistribution::operator=(const extremeValDistribution& other)
	18	{
	19	_alpha = other._alpha;
	20	_beta = other._beta;
	21	return *this;
	22	}
	23
	24	extremeValDistribution::~extremeValDistribution()
	25	{
	26	}
	27
	28	/*fits the _alpha and _beta parameters based on a population mean and std.
	29	Based on the following arguments:
	30	1. If variable Z has a cumulative distribution F(Z) = exp(-exp((-z))
	31	Then E(Z) = EULER_CONSTANT
	32	Var(Z) = pi^2/6
	33	2. We assign Z = (X-_alpha) / _beta --> X = _beta*Z + _alpha
	34	and we get:
	35	E(X) = _betaE(Z)+_alpha = _betaEULER_CONSTANT+_alpha
	36	Var(X) = _beta^2*pi^2/6
	37	3. We can now find _alpha and _beta based on the method of moments:
	38	mean = _beta*EULER_CONSTANT+_alpha
	39	s = _beta * pi / sqrt(6)
	40	4. And solve:
	41	_beta = s * qsrt(6) / pi
	42	_alpha = mean - _beta*EULER_CONSTANT
	43	*/
	44	void extremeValDistribution::fitParametersFromMoments(MDOUBLE mean, MDOUBLE s)
	45	{
	46	_beta = s * sqrt(6.0) / PI;
	47	_alpha = mean - (_beta * EULER_CONSTANT);
	48	}
	49
	50	MDOUBLE extremeValDistribution::getCDF(MDOUBLE score) const
	51	{
	52	MDOUBLE res = exp(-exp(-(score-_alpha) / _beta));
	53	return res;
	54	}
	55
	56	//get y such that pVal = CDF(y):
	57	// pVal = exp(-exp(-(y-alpha)/beta))
	58	// ln(-ln(pVal)) = -(y-alpha)/beta
	59	// y = alpha - beta*ln(-ln(pVal))
	60	MDOUBLE extremeValDistribution::getInverseCDF(MDOUBLE pVal) const
	61	{
	62	MDOUBLE res = _alpha - _beta * log(-log(pVal));
	63	return res;
	64	}
	65

+33

-0

libs/phylogeny/extremeValDistribution.h less more

	0	#ifndef _EXTREME_VAL_DISTRIBUTION
	1	#define _EXTREME_VAL_DISTRIBUTION
	2	#include "definitions.h"
	3	#define EULER_CONSTANT 0.5772156649015328606065120900824024310421593359399235
	4	/*
	5	The extreme value distribution is used to model the largest
	6	value from a large collection of random observations from the
	7	same distribution.
	8	1. The distribution has two parameters:
	9	a location parameter alpha and a scale parameter beta.
	10	2. The cumulative distribution function (CDF) is:
	11	exp(-exp(-(x-alpha)/beta))
	12	3. Mean: E(x) = alpha + beta*EULER_CONSTANT
	13	STD(x) = beta * pi / sqrt(6)
	14	*/
	15	class extremeValDistribution
	16	{
	17	public:
	18	extremeValDistribution();
	19	extremeValDistribution(const extremeValDistribution& other);
	20	extremeValDistribution& operator=(const extremeValDistribution& other);
	21	virtual ~extremeValDistribution();
	22	//fit the alpha and beta parameters from a population mean and std
	23	void fitParametersFromMoments(MDOUBLE exp, MDOUBLE std);
	24
	25	MDOUBLE getCDF(MDOUBLE score) const;
	26	MDOUBLE getInverseCDF(MDOUBLE pVal) const;
	27	private:
	28	MDOUBLE _alpha;
	29	MDOUBLE _beta;
	30
	31	};
	32	#endif //_EXTREME_VAL_DISTRIBUTION

+145

-0

libs/phylogeny/fastStartTree.cpp less more

	0	// $Id: fastStartTree.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "tree.h"
	4	#include "treeUtil.h"
	5	#include "fastStartTree.h"
	6	#include "bblEM.h"
	7	#include "likeDist.h"
	8	#include "likelihoodComputation.h"
	9	#include "getRandomWeights.h"
	10	#include "distanceTable.h"
	11	#include "nj.h"
	12	#include "logFile.h"
	13
	14	#include <algorithm>
	15
	16	using namespace std;
	17	using namespace likelihoodComputation;
	18
	19
	20	vector<tree> eliminateHalf(vector<tree>& tVec,
	21	sequenceContainer& orginal,
	22	stochasticProcess& sp,
	23	ostream& out,
	24	const int maxIterEM){
	25	vector<MDOUBLE> likeScore(tVec.size(),0.0);
	26	int i;
	27	for (i=0; i < tVec.size(); ++i) {
	28	bblEM bblEM1(tVec[i],orginal,sp,NULL,maxIterEM,0.01);
	29	likeScore[i] = bblEM1.getTreeLikelihood();
	30
	31	LOG(5,<<"~");
	32	}
	33
	34	vector<MDOUBLE> sortedL = likeScore;
	35	sort(sortedL.begin(),sortedL.end());
	36	MDOUBLE median = sortedL[sortedL.size()/2];
	37
	38	// printing the top ten with their scores;
	39	// int toPrint = sortedL.size()>10? 10 : sortedL.size();
	40	// MDOUBLE treshToPrint = sortedL[sortedL.size()-toPrint];
	41	// out<<"current best 10 (or less) trees: "<<endl;
	42	// for (int h=0; h < likeScore.size(); ++h) {
	43	// if (likeScore[h]>treshToPrint) {
	44	// out<<"likelihood of tree: "<<h<<" = "<<likeScore[h]<<endl;
	45	// tVec[h].output(out);
	46	// }
	47	// }
	48
	49	for (int p=0; p < sortedL.size(); ++p ){
	50	out<<"L["<<p<<"]= "<<sortedL[p]<<endl;
	51	}
	52	out<<endl;
	53
	54	vector<tree> newTreeVec;
	55	for (i=0;i < tVec.size(); ++i) {
	56	if (likeScore[i]>=median) newTreeVec.push_back(tVec[i]); // ok this is a heck to mark trees
	57	}
	58	if (newTreeVec.size() == 0 ) newTreeVec.push_back(tVec[0]); // in case for example that all have the same L
	59	return newTreeVec;
	60	}
	61
	62
	63
	64
	65
	66
	67
	68
	69
	70
	71	//------------------ get N starting different NJ trees --------------------
	72
	73	tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether,
	74	stochasticProcess& sp,
	75	const int numOfNJtrees,
	76	const MDOUBLE tmpForStartingTreeSearch,
	77	const MDOUBLE epslionWeights,
	78	ostream& out) {
	79
	80
	81	likeDist pd1(sp,0.01);
	82	vector<tree> tVec;
	83	int treeTries = 0;
	84	while (tVec.size() < numOfNJtrees) {
	85	++treeTries;
	86	if (treeTries == 5000) break;
	87
	88	Vdouble startingTreeWeights(allTogether.seqLen(),1.0);
	89	if (treeTries>1) {// the first is the regular NJ tree
	90	getRandomWeights::randomWeightsGamma(startingTreeWeights,
	91	tmpForStartingTreeSearch);
	92	}
	93	for (int p=0; p < startingTreeWeights.size(); ++p){
	94	if (startingTreeWeights[p]<epslionWeights) startingTreeWeights[p]=0.0;
	95	}
	96	#ifdef VERBOS
	97	if (treeTries ==2){ LOG(5,<<" weights for the 25 positions"<<endl);
	98	for (int h=0; h < 25; ++h) LOG(5,<<startingTreeWeights[h]<<" ");
	99	}
	100	#endif
	101	VVdouble disTab;
	102	vector<string> vNames;
	103	giveDistanceTable(&pd1,
	104	allTogether,
	105	disTab,
	106	vNames,
	107	&startingTreeWeights);
	108	NJalg nj1;
	109	tree et = nj1.computeTree(disTab,vNames);
	110
	111	bool treeAlreadyThere = false;
	112	for (int z=0; z< tVec.size();++z) {
	113	if (sameTreeTolopogy(tVec[z],et)) treeAlreadyThere=true;
	114	}
	115	if (treeAlreadyThere == false) {
	116	tVec.push_back(et);
	117	}
	118	}
	119	LOG(5,<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl);
	120	out<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl;
	121
	122	int numOfTreesToPrint = tVec.size()<10?tVec.size():10;
	123	out<<"starting with: "<<tVec.size()<<" trees! "<<endl;
	124	for (int g=0; g < numOfTreesToPrint; ++g) tVec[g].output(out);
	125
	126	//------------------ chossing the ML tree from these NJ trees --------------------
	127	int maxIterEM=0;
	128	while (tVec.size() > 1) {
	129	LOG(5,<<" current size = "<<tVec.size()<<endl);
	130	tVec = eliminateHalf(tVec,allTogether,sp,out,maxIterEM);
	131	maxIterEM=1; // first round without bbl at all.
	132	}
	133	LOG(5,<<" final size = "<<tVec.size()<<endl);
	134
	135	bblEM bblEM1(tVec[0],allTogether,sp,NULL,100,0.01);
	136	MDOUBLE res = bblEM1.getTreeLikelihood();
	137
	138
	139	LOGDO(5,tVec[0].output(myLog::LogFile()));
	140	LOG(5,<<"likelihood = "<<res<<endl);
	141	tVec[0].output(out);
	142	out<<"likelihood = "<<res<<endl;
	143	return tVec[0];
	144	}

+24

-0

libs/phylogeny/fastStartTree.h less more

	0	// $Id: fastStartTree.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FAST_START_TREE
	3	#define ___FAST_START_TREE
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "stochasticProcess.h"
	8	#include "sequenceContainer.h"
	9	#include <iostream>
	10
	11	using namespace std;
	12
	13
	14
	15	tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether,
	16	stochasticProcess& sp,
	17	const int numOfNJtrees,
	18	const MDOUBLE tmpForStartingTreeSearch,
	19	const MDOUBLE epslionWeights,
	20	ostream& out);
	21
	22
	23	#endif

+74

-0

libs/phylogeny/fastaFormat.cpp less more

	0	// $Id: fastaFormat.cpp 10280 2012-02-06 09:45:26Z itaymay $
	1	#include "fastaFormat.h"
	2	#include "someUtil.h"
	3	#include "errorMsg.h"
	4	#include "ConversionUtils.h"
	5	#include <algorithm>
	6	using namespace std;
	7
	8	sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) {
	9	sequenceContainer mySeqData = readUnAligned(infile, alph);
	10	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	11	return mySeqData;
	12	}
	13
	14
	15	sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) {
	16	sequenceContainer mySeqData;
	17
	18	vector<string> seqFileData;
	19	putFileIntoVectorStringArray(infile,seqFileData);
	20	if (seqFileData.empty()){
	21	errorMsg::reportError("unable to open file, or file is empty in fasta format");
	22	}
	23
	24	vector<string>::const_iterator it1;
	25	int localid=0;
	26	for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
	27	if (it1->empty()) {++it1;continue; }// empty line continue
	28
	29	string remark;
	30	string name;
	31
	32	if ((*it1)[0] == '>') {
	33	string::const_iterator itstrtmp = (*it1).begin();
	34	itstrtmp++;
	35	while (itstrtmp != (*it1).end()) {
	36	name+= *itstrtmp;
	37	itstrtmp++;
	38	}
	39
	40	//for (string::iterator i = name.begin(); i!=(name.end()-2);++i) {
	41	// i=(i+1); // removing the ">". should be done more elegant...
	42	//}
	43	++it1;
	44	} else {
	45	LOG(0,<<"problem in line: "<<*it1<<endl);
	46	errorMsg::reportError("Error reading fasta file, error finding sequence name starting with >",1);
	47	}
	48	while (it1->empty()) it1++; // empty line continue
	49
	50	string str;
	51	while (it1!= seqFileData.end()) {
	52	if ((*it1)[0] == '>') break;
	53	str+=*it1;
	54	++it1;
	55	}
	56	// remove spaces form str;
	57	str = takeCharOutOfString(" \t", str);
	58	name = trim(name);
	59	mySeqData.add(sequence(str,name,remark,localid,alph));
	60	localid++;
	61	}
	62
	63	return mySeqData;
	64	}
	65
	66
	67	void fastaFormat::write(ostream &out, const sequenceContainer& sd) {
	68	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	69	out<<">"<<(it5)->name()<<endl;
	70	out<<it5->toString()<<endl;
	71	}
	72	}
	73

+35

-0

libs/phylogeny/fastaFormat.h less more

	0	// $Id: fastaFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FASTA_FORMAT
	3	#define ___FASTA_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class fastaFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	11	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	12	static void write(ostream &out, const sequenceContainer& sd);
	13	};
	14
	15	#endif
	16
	17	/* EXAMPLE OF FASTA FORMAT:
	18	>Langur
	19	KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
	20	>Baboon
	21	KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
	22	>Human
	23	KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
	24	>Rat
	25	KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
	26	>Cow
	27	KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
	28	>Horse
	29	KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
	30
	31
	32	*/
	33
	34

+81

-0

libs/phylogeny/findRateOfGene.cpp less more

	0	// $Id: findRateOfGene.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "findRateOfGene.h"
	4	#include "computeUpAlg.h"
	5
	6	//#define VERBOS
	7
	8	class findRateOfGene{
	9	public:
	10	explicit findRateOfGene(const tree &t,
	11	const sequenceContainer& sc,
	12	stochasticProcess& sp,
	13	const Vdouble * weights): _t(t), _sc(sc),
	14	_sp(sp),_weights(weights){};
	15	private:
	16	const tree& _t;
	17	const sequenceContainer& _sc;
	18	stochasticProcess& _sp;
	19	const Vdouble * _weights;
	20	public:
	21	MDOUBLE operator() (const MDOUBLE fac) {
	22	#ifdef VERBOS
	23	LOG(5,<<"factor = "<<fac<<endl);
	24	#endif
	25	_sp.setGlobalRate(fac);
	26	MDOUBLE tmp = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_t,_sc,_sp,_weights);
	27	#ifdef VERBOS
	28	LOG(5,<<"likelihood = "<<tmp<<endl);
	29	#endif
	30	return -tmp;
	31	}
	32	};
	33
	34	MDOUBLE findTheBestFactorFor(const tree &t,
	35	const sequenceContainer& sc,
	36	stochasticProcess& sp,
	37	const Vdouble * weights,
	38	MDOUBLE & logLresults) {
	39	#ifdef VERBOS
	40	LOG(5,<<"xxx in funtion findTheNestFactorFor xxxxxxxxx"<<endl);
	41	LOG(5,<<"xxx b4 optimization xxxxxxxxx"<<endl);
	42	MDOUBLE myL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(t,sc,sp);
	43	LOG(5,<<" likelihod is: "<<myL<<endl);
	44	LOG(5,<<" global rate is: "<<sp.getGlobalRate()<<endl);
	45	LOG(5,<<"\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \n");
	46	#endif
	47
	48	const MDOUBLE ax=0,bx=1.0,cx=4.0,tol=0.01f;
	49	MDOUBLE res=-1.0;
	50	logLresults =-brent(ax,bx,cx,
	51	findRateOfGene(t,sc,sp,weights),
	52	tol,
	53	&res);
	54	#ifdef VERBOS
	55	LOG(5,<<"rate of gene = "<<res<<endl);
	56	LOG(5,<<"xxx in funtion findTheNestFactorFor xxxxxxxxx"<<endl);
	57	LOG(5,<<"xxx after optimization xxxxxxxxx"<<endl);
	58	myL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(t,sc,sp);
	59	LOG(5,<<" likelihod is: "<<myL<<"\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \n");
	60	#endif
	61	sp.setGlobalRate(res);
	62	return res;}
	63
	64	void makeAverageRateEqOne(tree& et,vector<stochasticProcess> & spVec){
	65	MDOUBLE sumGlobalRates=0.0;
	66	for (int k=0; k < spVec.size(); ++k) {
	67	sumGlobalRates+=spVec[k].getGlobalRate();
	68	}
	69	for (int j=0; j < spVec.size(); ++j) {
	70	MDOUBLE newGlobalRate = spVec[j].getGlobalRate();
	71	newGlobalRate*=(spVec.size()/sumGlobalRates);
	72	spVec[j].setGlobalRate(newGlobalRate);
	73
	74	}
	75	et.multipleAllBranchesByFactor(sumGlobalRates/spVec.size());
	76	}
	77
	78
	79
	80

+24

-0

libs/phylogeny/findRateOfGene.h less more

	0	// $Id: findRateOfGene.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ____FIND_RATE_OF_GENE
	3	#define ____FIND_RATE_OF_GENE
	4
	5
	6	#include "numRec.h"
	7	#include "errorMsg.h"
	8	#include "likelihoodComputation.h"
	9	#include "tree.h"
	10	#include "sequenceContainer.h"
	11	#include "stochasticProcess.h"
	12	#include "suffStatComponent.h"
	13	#include "definitions.h"
	14
	15	MDOUBLE findTheBestFactorFor(const tree &t,
	16	const sequenceContainer& sc,
	17	stochasticProcess& sp,
	18	const Vdouble * weights,
	19	MDOUBLE & logLresults);
	20
	21	void makeAverageRateEqOne(tree& et,vector<stochasticProcess> & spVec);
	22
	23	#endif

+24

-0

libs/phylogeny/fromCountTableComponentToDistance.cpp less more

	0	// $Id: fromCountTableComponentToDistance.cpp 9582 2011-06-21 11:31:21Z cohenofi $
	1
	2	#include "fromCountTableComponentToDistance.h"
	3	#include "likeDist.h"
	4	#include <cassert>
	5
	6	fromCountTableComponentToDistance::fromCountTableComponentToDistance(
	7	const countTableComponentGam& ctc,
	8	const stochasticProcess &sp,
	9	const MDOUBLE toll,
	10	const MDOUBLE brLenIntialGuess,
	11	unObservableData* unObservableData_p) : _sp(sp), _ctc(ctc),_unObservableData_p(unObservableData_p) {
	12	_distance = brLenIntialGuess ;//0.03;
	13	_toll = toll;
	14	}
	15
	16	void fromCountTableComponentToDistance::computeDistance() {
	17	MDOUBLE maxPairwiseDistance = 10.0; // The default is 5.0
	18	MDOUBLE minPairwiseDistance = 0.0000001; // The default
	19	likeDist likeDist1(_sp,_toll,maxPairwiseDistance,minPairwiseDistance,_unObservableData_p);
	20	MDOUBLE initGuess = _distance;
	21	_distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess);
	22	assert(_distance>=0);
	23	}

+37

-0

libs/phylogeny/fromCountTableComponentToDistance.h less more

	0	// $Id: fromCountTableComponentToDistance.h 4742 2008-08-19 17:40:56Z cohenofi $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8	#include "unObservableData.h"
	9
	10	static const MDOUBLE startingGuessForTreeBrLen = 0.029;
	11
	12	class fromCountTableComponentToDistance {
	13
	14	public:
	15	explicit fromCountTableComponentToDistance(
	16	const countTableComponentGam& ctc,
	17	const stochasticProcess &sp,
	18	const MDOUBLE toll,
	19	const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen
	20	unObservableData* unObservableData_p = NULL); // a class used to for presence/absence
	21
	22	void computeDistance();// return the likelihood
	23	MDOUBLE getDistance() { return _distance;} // return the distance.
	24	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	25	private:
	26	const stochasticProcess & _sp;
	27	const countTableComponentGam& _ctc;
	28	MDOUBLE _toll;
	29	MDOUBLE _distance;
	30	MDOUBLE _likeDistance;
	31	unObservableData* _unObservableData_p;
	32	int alphabetSize() {return _ctc.alphabetSize();}
	33	};
	34
	35	#endif
	36

+22

-0

libs/phylogeny/fromCountTableComponentToDistance2Codon.cpp less more

	0	// $Id: fromCountTableComponentToDistance2Codon.cpp 950 2006-10-19 12:12:34Z eyalprivman $
	1
	2	#include "fromCountTableComponentToDistance2Codon.h"
	3	#include "likeDist2Codon.h"
	4	#include "likeDist.h"
	5	#include <cassert>
	6
	7	fromCountTableComponentToDistance2Codon::fromCountTableComponentToDistance2Codon(
	8	const countTableComponentGam& ctc,
	9	const vector<stochasticProcess> &spVec,
	10	const MDOUBLE toll,
	11	const MDOUBLE brLenIntialGuess ) : _spVec(spVec), _ctc(ctc) {
	12	_distance =brLenIntialGuess ;//0.03;
	13	_toll = toll;
	14	}
	15
	16	void fromCountTableComponentToDistance2Codon::computeDistance() {
	17	likeDist2Codon likeDist1(_spVec,_toll);
	18	MDOUBLE initGuess = _distance;
	19	_distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess);
	20	assert(_distance>=0);
	21	}

+34

-0

libs/phylogeny/fromCountTableComponentToDistance2Codon.h less more

	0	// $Id: fromCountTableComponentToDistance2Codon.h 950 2006-10-19 12:12:34Z eyalprivman $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8
	9	static const MDOUBLE startingGuessForTreeBrLen = 0.029;
	10
	11	class fromCountTableComponentToDistance2Codon {
	12
	13	public:
	14	explicit fromCountTableComponentToDistance2Codon(
	15	const countTableComponentGam& ctc,
	16	const vector<stochasticProcess> &spVec,
	17	const MDOUBLE toll,
	18	const MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen
	19
	20	void computeDistance();// return the likelihood
	21	MDOUBLE getDistance() { return _distance;} // return the distance.
	22	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	23	private:
	24	const vector<stochasticProcess> & _spVec;
	25	const countTableComponentGam& _ctc;
	26	MDOUBLE _toll;
	27	MDOUBLE _distance;
	28	MDOUBLE _likeDistance;
	29	int alphabetSize() {return _ctc.alphabetSize();}
	30	};
	31
	32	#endif
	33

+22

-0

libs/phylogeny/fromCountTableComponentToDistance2USSRV.cpp less more

	0	// $Id: fromCountTableComponentToDistance2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "fromCountTableComponentToDistance2USSRV.h"
	3	#include "likeDist.h"
	4	#include <cassert>
	5
	6	fromCountTableComponentToDistance2USSRV::fromCountTableComponentToDistance2USSRV(
	7	const countTableComponentGam& ctcBase,
	8	const countTableComponentHom& ctcSSRV,
	9	const ussrvModel &model,
	10	MDOUBLE toll,
	11	MDOUBLE brLenIntialGuess ) : _model(model), _ctcBase(ctcBase), _ctcSSRV(ctcSSRV) {
	12	_distance = brLenIntialGuess ;//0.03;
	13	_toll = toll;
	14	}
	15
	16	void fromCountTableComponentToDistance2USSRV::computeDistance() {
	17	likeDist2USSRV likeDist1(_model,_toll);
	18	MDOUBLE initGuess = _distance;
	19	_distance = likeDist1.giveDistance(_ctcBase,_ctcSSRV,_likeDistance,initGuess);
	20	assert(_distance>=0);
	21	}

+39

-0

libs/phylogeny/fromCountTableComponentToDistance2USSRV.h less more

	0	// $Id: fromCountTableComponentToDistance2USSRV.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8	#include "ussrvModel.h"
	9	#include "likeDist2USSRV.h"
	10
	11	static const MDOUBLE startingGuessForTreeBrLen = 0.029;
	12
	13	class fromCountTableComponentToDistance2USSRV {
	14
	15	public:
	16	explicit fromCountTableComponentToDistance2USSRV(
	17	const countTableComponentGam& ctcBase,
	18	const countTableComponentHom& ctcSSRV,
	19	const ussrvModel& model,
	20	MDOUBLE toll,
	21	MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen
	22
	23	void computeDistance();// return the likelihood
	24	MDOUBLE getDistance() { return _distance;} // return the distance.
	25	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	26
	27	private:
	28	const ussrvModel & _model;
	29	const countTableComponentGam& _ctcBase;
	30	const countTableComponentHom& _ctcSSRV;
	31	MDOUBLE _toll;
	32	MDOUBLE _distance;
	33	MDOUBLE _likeDistance;
	34	// int alphabetSize() {return _ctc.alphabetSize();}
	35	};
	36
	37	#endif //___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
	38

+18

-0

libs/phylogeny/fromCountTableComponentToDistanceProp.cpp less more

	0	// $Id: fromCountTableComponentToDistanceProp.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "fromCountTableComponentToDistanceProp.h"
	3	#include "likeDistProp.h"
	4
	5	fromCountTableComponentToDistanceProp::fromCountTableComponentToDistanceProp(
	6	const vector<countTableComponentGam>& ctc,
	7	const vector<stochasticProcess> &sp,
	8	const MDOUBLE toll,
	9	const MDOUBLE brLenIntialGuess ) : _sp(sp), _ctc(ctc) {
	10	_distance =brLenIntialGuess;
	11	_toll = toll;
	12	}
	13
	14	void fromCountTableComponentToDistanceProp::computeDistance() {
	15	likeDistProp likeDist1(alphabetSize(),_sp,_toll);
	16	_distance = likeDist1.giveDistance(_ctc,_likeDistance);
	17	}

+33

-0

libs/phylogeny/fromCountTableComponentToDistanceProp.h less more

	0	// $Id: fromCountTableComponentToDistanceProp.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8
	9
	10	class fromCountTableComponentToDistanceProp {
	11
	12	public:
	13	explicit fromCountTableComponentToDistanceProp(
	14	const vector<countTableComponentGam>& ctc,
	15	const vector<stochasticProcess> &sp,
	16	const MDOUBLE toll,
	17	const MDOUBLE brLenIntialGuess = 0.029);// =startingGuessForTreeBrLen
	18
	19	void computeDistance();// return the likelihood
	20	MDOUBLE getDistance() { return _distance;} // return the distance.
	21	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	22	private:
	23	const vector<stochasticProcess> & _sp;
	24	const vector<countTableComponentGam>& _ctc;
	25	MDOUBLE _toll;
	26	MDOUBLE _distance;
	27	MDOUBLE _likeDistance;
	28	int alphabetSize() {return (_ctc.empty()?0:_ctc[0].alphabetSize());}
	29	};
	30
	31	#endif
	32

+24

-0

libs/phylogeny/fromCountTableComponentToDistancePropEB.cpp less more

	0	// $Id: fromCountTableComponentToDistanceProp.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "fromCountTableComponentToDistancePropEB.h"
	3	#include "likeDistPropEB.h"
	4
	5	fromCountTableComponentToDistancePropEB::fromCountTableComponentToDistancePropEB(
	6	const vector< vector<countTableComponentGamProportional> >& ctc,
	7	const int nodeID,
	8	multipleStochasticProcess *msp,
	9	const gammaDistribution* pProportionDist,
	10	const MDOUBLE toll,
	11	const MDOUBLE brLenIntialGuess ) : _msp(msp), _ctc(ctc), _nodeID(nodeID), _pProportionDist(pProportionDist){
	12	_distance =brLenIntialGuess;
	13	_toll = toll;
	14	}
	15
	16	void fromCountTableComponentToDistancePropEB::computeDistance() {
	17	MDOUBLE maxPairwiseDistance = 10.0; // The default
	18	MDOUBLE minPairwiseDistance = 0.0000001; // The default
	19	likeDistPropEB likeDist1(_msp,_pProportionDist,_toll,maxPairwiseDistance,minPairwiseDistance);
	20	MDOUBLE initGuess = _distance;
	21	_distance = likeDist1.giveDistance(_ctc,_nodeID,_likeDistance,initGuess);
	22	assert(_distance>=0);
	23	}

+37

-0

libs/phylogeny/fromCountTableComponentToDistancePropEB.h less more

	0	// $Id: fromCountTableComponentToDistanceProp.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP_EB
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP_EB
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "multipleStochasticProcess.h"
	8	#include "gammaDistribution.h"
	9
	10	class fromCountTableComponentToDistancePropEB {
	11
	12	public:
	13	explicit fromCountTableComponentToDistancePropEB(
	14	const vector< vector<countTableComponentGamProportional> >& ctc,
	15	const int nodeID,
	16	multipleStochasticProcess* msp,
	17	const gammaDistribution* pProportionDist,
	18	const MDOUBLE toll,
	19	const MDOUBLE brLenIntialGuess = 0.029);// =startingGuessForTreeBrLen
	20
	21	void computeDistance();// return the likelihood
	22	MDOUBLE getDistance() { return _distance;} // return the distance.
	23	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	24	private:
	25	multipleStochasticProcess * _msp;
	26	const vector< vector<countTableComponentGamProportional> >& _ctc;
	27	const gammaDistribution* _pProportionDist;
	28	const int _nodeID;
	29	MDOUBLE _toll;
	30	MDOUBLE _distance;
	31	MDOUBLE _likeDistance;
	32	int alphabetSize() {return (_ctc.empty()?0:_ctc[0][_nodeID].alphabetSize());}
	33	};
	34
	35	#endif
	36

+27

-0

libs/phylogeny/fromCountTableComponentToDistancefixRoot.cpp less more

	0	// $Id: fromCountTableComponentToDistance.cpp 4471 2008-07-17 15:38:50Z cohenofi $
	1
	2	#include "fromCountTableComponentToDistancefixRoot.h"
	3	#include "likeDistfixRoot.h"
	4	#include <cassert>
	5
	6	fromCountTableComponentToDistancefixRoot::fromCountTableComponentToDistancefixRoot(
	7	const vector<countTableComponentGam>& ctc,
	8	const stochasticProcess &sp,
	9	const MDOUBLE toll,
	10	const MDOUBLE brLenIntialGuess,
	11	unObservableData* unObservableData_p)
	12	: _sp(sp), _ctc(ctc) {
	13	_distance =brLenIntialGuess ;//0.03;
	14	_toll = toll;
	15	_unObservableData_p = unObservableData_p;
	16
	17	}
	18
	19	void fromCountTableComponentToDistancefixRoot::computeDistance() {
	20	MDOUBLE maxPairwiseDistance = 5.0; // The default
	21	MDOUBLE minPairwiseDistance = 0.0000001; // The default
	22	likeDistfixRoot likeDist1(_sp,_toll,maxPairwiseDistance,minPairwiseDistance,_unObservableData_p);
	23	MDOUBLE initGuess = _distance;
	24	_distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess); // each ctc is per node, and include all letterAtRoot
	25	assert(_distance>=0);
	26	}

+39

-0

libs/phylogeny/fromCountTableComponentToDistancefixRoot.h less more

	0	// $Id: fromCountTableComponentToDistance.h 4471 2008-07-17 15:38:50Z cohenofi $
	1
	2	#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT
	3	#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8	#include "unObservableData.h"
	9
	10	static const MDOUBLE startingGuessForTreeBrLen = 0.029;
	11
	12	class fromCountTableComponentToDistancefixRoot {
	13
	14	public:
	15	explicit fromCountTableComponentToDistancefixRoot(
	16	const vector<countTableComponentGam>& ctc,
	17	const stochasticProcess &sp,
	18	const MDOUBLE toll,
	19	const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen
	20	unObservableData* unObservableData_p);
	21
	22	void computeDistance();// return the likelihood
	23	MDOUBLE getDistance() { return _distance;} // return the distance.
	24	MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
	25	private:
	26	const stochasticProcess & _sp;
	27	const vector<countTableComponentGam>& _ctc; //_ctc[letterAtRoot][rate][alph][alph]
	28	MDOUBLE _toll;
	29	MDOUBLE _distance;
	30	MDOUBLE _likeDistance;
	31	unObservableData* _unObservableData_p;
	32
	33	// int alphabetSize() {return _ctc.alphabetSize();}
	34	int alphabetSize() {return _ctc[0].alphabetSize();}
	35	};
	36
	37	#endif
	38

+555

-0

libs/phylogeny/fromInstructionFile.cpp less more

	0	// $Id: fromInstructionFile.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "fromInstructionFile.h"
	4	#include "treeUtil.h"
	5	#include "nucleotide.h"
	6	#include "amino.h"
	7	#include "uniDistribution.h"
	8	#include "gammaDistribution.h"
	9	#include "readDatMatrix.h"
	10	#include "aaJC.h"
	11	#include "nucJC.h"
	12	#include "hky.h"
	13	#include "trivialAccelerator.h"
	14	#include "chebyshevAccelerator.h"
	15	#include "phylipFormat.h"
	16	#include "maseFormat.h"
	17	#include "fastaFormat.h"
	18	#include "clustalFormat.h"
	19	#include "molphyFormat.h"
	20	#include "datMatrixHolder.h"
	21	#include "someUtil.h"
	22
	23	#include <iostream>
	24	#include <fstream>
	25	#include <memory>
	26	#include <iterator>
	27	#include <cstdio>
	28	using namespace std;
	29
	30	//#define VERBOS
	31
	32	void fromInstructionFile::readInstructionFile(const string& str){
	33	ifstream f;
	34	f.open(str.c_str());
	35	if (f==NULL) {
	36	string tmp = "Unable to open the instraction file : \""+str+"\"";
	37	errorMsg::reportError(tmp);
	38	}
	39	string key, value;
	40	while (!f.eof()){
	41	f >> key;
	42	if (!key.empty()){
	43	toLower(key);// put the key in lower case.
	44	getline(f,value);
	45	value.erase(0,value.find_first_not_of(" \t")); // clear leading white space
	46	_lines[key]=value;
	47	}
	48	}
	49	f.close();
	50	}
	51
	52	fromInstructionFile::fromInstructionFile(const string& str):_maxNumOfFiles(1000){
	53	readInstructionFile(str);
	54	}
	55
	56	// THIS IS NOT WORKING ON SOME OLD VERSIONS OF g++
	57	//string I2A(const int & v)
	58	//{
	59	// stringstream s("");
	60	// s<<v;
	61	// return(s.str());
	62	//}
	63	//
	64	//string F2A(const float & v)
	65	//{
	66	// stringstream s("");
	67	// s<<v;
	68	// return(s.str());
	69	//}
	70
	71	string I2A(const int & v)
	72	{
	73	char buf[100];
	74	sprintf(buf,"%d",v);
	75	return buf;
	76	}
	77
	78	string F2A(const float & v)
	79	{
	80	char buf[100];
	81	sprintf(buf,"%f",v);
	82	return buf;
	83	}
	84
	85
	86
	87
	88	bool fromInstructionFile::doesWordExistInLines(const string& key) const{
	89	return (_lines.count(key)>0);
	90	}
	91
	92	const string & fromInstructionFile::searchStringInLines(const string& key) const
	93	{
	94	#ifdef VERBOS
	95	map<string, string>::const_iterator pos;
	96	pos = _lines.begin();
	97	for (; pos != _lines.end(); ++pos) {
	98	cout << "key: \"" << pos->first << "\" "
	99	<< "value: " << pos->second << endl;
	100	}
	101	#endif
	102
	103
	104
	105	static const string emptystr("");
	106	if (_lines.count(key) > 0)
	107	return(_lines.find(key)->second);
	108	else
	109	return(emptystr);
	110	}
	111
	112	const string& fromInstructionFile::searchStringInLines(const string& key, const int index) const
	113	{
	114	static const string emptystr("");
	115
	116	string realKey(key+int2string(index));
	117
	118	if (_lines.count(realKey) > 0)
	119	return(_lines.find(realKey)->second);
	120	else
	121	return(emptystr);
	122	}
	123
	124	void fromInstructionFile::setLogFile() {
	125	string logfilename(searchStringInLines("logfile"));
	126	if (logfilename == "") logfilename = "-";
	127
	128	if (logfilename == "-") {
	129	myLog::setLogOstream(&cout);
	130	}
	131	else{
	132	ofstream* outLF = new ofstream(logfilename.c_str());
	133	if (!outLF) {
	134	errorMsg::reportError("unable to open file for reading");
	135	}
	136	myLog::setLogOstream(outLF);
	137	}
	138	string loglvl(searchStringInLines("loglvl"));
	139	if (loglvl=="") myLog::setLogLvl(3); // default value
	140	else myLog::setLogLvl(atoi(loglvl.c_str()));
	141	LOG(3,<<"START OF LOG FILE\n\n");
	142	}
	143
	144	bool fromInstructionFile::getIntValueConnectedWithWord(const string& wordToSearch,
	145	int & val){
	146	string p(searchStringInLines(wordToSearch));
	147	if (p == "") {
	148	return false;
	149	}
	150	val=atoi(p.c_str());
	151	return true;
	152	}
	153
	154	string fromInstructionFile::getOutFile() {
	155	string outfilename(searchStringInLines("outfile"));
	156	if (outfilename == "") outfilename = "-";
	157	return outfilename;
	158	}
	159
	160	void fromInstructionFile::getAlphabets(vector<alphabet* >& _alphabets) {
	161	if (_alphabets.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getAlphabetSize");}
	162	for (int i=1; i < _maxNumOfFiles; ++i ) {
	163	string p(searchStringInLines("alphabet",i));
	164	if (p == "") return;
	165	int alphRes = atoi(p.c_str());
	166	if (alphRes == 4) {
	167	alphabet* alp = new nucleotide;
	168	_alphabets.push_back(alp);
	169	}
	170	else if (alphRes == 20) {
	171	alphabet* alp = new amino;
	172	_alphabets.push_back(alp);
	173	}
	174	else errorMsg::reportError("No relaven number after the word alphabet in the instruction file.");
	175	}
	176	for (size_t z=1; z< _alphabets.size(); ++z) {
	177	if (_alphabets[z]!= _alphabets[0]) {
	178	errorMsg::reportError("currently all seq. must be of the same alphabet size");
	179	}
	180	}
	181	}
	182
	183	alphabet* fromInstructionFile::getOneAlphabet( ) {
	184	alphabet* _alphabet = NULL;
	185	int alphRes;
	186
	187	bool ok = getIntValueConnectedWithWord("alphabet",alphRes);
	188	if (!ok) {
	189	ok = getIntValueConnectedWithWord("alphabet1",alphRes);
	190
	191	if (!ok) errorMsg::reportError("didn't find alphabet size in instruction file");
	192	}if (ok==true) {
	193	if (alphRes == 4) {
	194	_alphabet = new nucleotide;
	195	}
	196	else if (alphRes == 20) {
	197	_alphabet = new amino;
	198	}
	199	else errorMsg::reportError("No number after the word alphabet in the instruction file.");
	200	}
	201	return _alphabet;
	202	}
	203
	204	void fromInstructionFile::getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs){
	205	bool useGamma = doesWordExistInLines("gamma");
	206	distribution *dist = NULL;
	207	if (!useGamma) dist = new uniDistribution;
	208	else dist = new gammaDistribution(1,4);
	209
	210	replacementModel *probMod=NULL;
	211	pijAccelerator *pijAcc=NULL;
	212
	213	string wordUse = "model";
	214	bool usemodel1 = doesWordExistInLines("model1");
	215	if (usemodel1 == true) wordUse="model1";
	216
	217	string modelName(searchStringInLines(wordUse));// we can use model or model1
	218	if (modelName == "") {
	219	errorMsg::reportError("could not find model name in instruction file");
	220	}
	221
	222	if (strcmp(modelName.c_str(),"day")==0) {
	223	(freqs==NULL)? probMod=new pupAll(datMatrixHolder::dayhoff) : probMod=new pupAll(datMatrixHolder::dayhoff,*freqs);
	224	pijAcc = new chebyshevAccelerator(probMod);
	225	}
	226	else if (strcmp(modelName.c_str(),"jtt")==0) {
	227	(freqs==NULL)? probMod=new pupAll(datMatrixHolder::jones):probMod=new pupAll(datMatrixHolder::jones,*freqs) ;
	228	pijAcc =new chebyshevAccelerator(probMod);
	229	}
	230	else if (strcmp(modelName.c_str(),"rev")==0) {
	231	(freqs==NULL)? probMod=new pupAll(datMatrixHolder::mtREV24) : probMod=new pupAll(datMatrixHolder::mtREV24,*freqs);
	232	pijAcc = new chebyshevAccelerator(probMod);
	233	}
	234	else if (strcmp(modelName.c_str(),"wag")==0) {
	235	(freqs==NULL)? probMod=new pupAll(datMatrixHolder::wag) : probMod=new pupAll(datMatrixHolder::wag, *freqs);
	236	pijAcc = new chebyshevAccelerator(probMod);
	237	}
	238	else if (strcmp(modelName.c_str(),"cprev")==0) {
	239	(freqs==NULL)? probMod=new pupAll(datMatrixHolder::cpREV45) : probMod=new pupAll(datMatrixHolder::cpREV45, *freqs);
	240	pijAcc = new chebyshevAccelerator(probMod);
	241	}
	242	else if (strcmp(modelName.c_str(),"nucjc")==0) {
	243	probMod=new nucJC; pijAcc = new trivialAccelerator(probMod);
	244	}
	245	else if (strcmp(modelName.c_str(),"aaJC")==0) {
	246	probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
	247	}
	248	else if (modelName=="hky"\|\|modelName=="k2p") {
	249	MDOUBLE ratio (atof(searchStringInLines("ratio").c_str())); // get alpha
	250	MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
	251	sscanf(searchStringInLines("ACGprob").c_str(),"%lf,%lf,%lf", &Ap, &Cp, &Gp);
	252	Tp=1.0-(Ap+Cp+Gp);
	253	probMod=new hky(Ap,Cp,Gp,Tp,ratio); pijAcc = new trivialAccelerator(probMod);
	254	}
	255	else {
	256	errorMsg::reportError("This replacement model is not yet available");
	257	}
	258
	259	stochasticProcess s1s(dist, pijAcc);
	260	if (probMod) delete probMod;
	261	if (pijAcc) delete pijAcc;
	262	if (dist) delete dist;
	263	sp = s1s;
	264	}
	265
	266	void fromInstructionFile::getStartingStochasticProcess(vector<stochasticProcess>& spPtrVec, VVdouble* freqs) {
	267	if (spPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");}
	268	bool useGamma = doesWordExistInLines("gamma");
	269	for (int i=0; i < _maxNumOfFiles; ++i) {
	270	Vdouble* freq_i = (freqs==NULL) ? NULL: &((*freqs)[i]);
	271
	272	distribution *dist = NULL;
	273	if (!useGamma) dist = new uniDistribution;
	274	else dist = new gammaDistribution(1,4);
	275
	276
	277	replacementModel *probMod=NULL;
	278	pijAccelerator *pijAcc=NULL;
	279	string model(searchStringInLines("model",i+1));
	280	if (model == "") return;
	281	if (model=="day") {
	282	if (freq_i == NULL) {
	283	probMod=new pupAll(datMatrixHolder::dayhoff);//pijAcc = new chebyshevAccelerator(probMod);
	284	} else {
	285	probMod=new pupAll(datMatrixHolder::dayhoff,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
	286	}
	287	pijAcc = new trivialAccelerator(probMod);
	288	}
	289	else if (model=="jtt") {
	290	if (freq_i == NULL) {
	291	probMod=new pupAll(datMatrixHolder::jones) ; //pijAcc =new chebyshevAccelerator(probMod);
	292	}
	293	else {
	294	probMod=new pupAll(datMatrixHolder::jones,*freq_i) ; //pijAcc =new chebyshevAccelerator(probMod);
	295	}
	296	pijAcc = new trivialAccelerator(probMod);
	297	}
	298	else if (model=="rev") {
	299	if (freq_i == NULL) {
	300	probMod=new pupAll(datMatrixHolder::mtREV24);//pijAcc = new chebyshevAccelerator(probMod);
	301	} else {
	302	probMod=new pupAll(datMatrixHolder::mtREV24,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
	303	}
	304	pijAcc = new trivialAccelerator(probMod);
	305	} else if (model=="wag") {
	306	if (freq_i == NULL) {
	307	probMod=new pupAll(datMatrixHolder::wag);//pijAcc = new chebyshevAccelerator(probMod);
	308	} else {
	309	probMod=new pupAll(datMatrixHolder::wag,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
	310	}
	311	pijAcc = new trivialAccelerator(probMod);
	312	} else if (model=="cprev") {
	313	if (freq_i == NULL) {
	314	probMod=new pupAll(datMatrixHolder::cpREV45);//pijAcc = new chebyshevAccelerator(probMod);
	315	} else {
	316	probMod=new pupAll(datMatrixHolder::cpREV45,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
	317	}
	318	pijAcc = new trivialAccelerator(probMod);
	319	}
	320	else if (model == "nucjc") {
	321	probMod=new nucJC; pijAcc = new trivialAccelerator(probMod);
	322	}
	323	else if (model == "aaJC") {
	324	probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
	325	}
	326	else {errorMsg::reportError("This replacement model is not yet available");
	327	}
	328
	329	stochasticProcess s1s(dist, pijAcc);
	330	spPtrVec.push_back(s1s);
	331	if (probMod) delete probMod;
	332	if (pijAcc) delete pijAcc;
	333	if (dist) delete dist;
	334	}
	335	}
	336
	337	bool fromInstructionFile::getStartingEvolTrees(vector<tree>& vtree,vector<char>& constraintsOfT0){
	338	if (vtree.size() !=0) {
	339	errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees");
	340	}
	341	string oneTreeFileName(searchStringInLines("treefile"));
	342	if (oneTreeFileName =="" ) {
	343	errorMsg::reportError("The tree file name must be given in the instruction file");
	344	}
	345	getStartingTreeVecFromFile(oneTreeFileName,vtree,constraintsOfT0);
	346	for (size_t k=0;k<vtree.size();++k) {
	347	if (!vtree[k].withBranchLength()) vtree[k].createFlatLengthMatrix(0.05);
	348	}
	349	return true;
	350	}
	351
	352
	353	bool fromInstructionFile::getStartingEvolTrees(vector<tree>& vtree){
	354	if (vtree.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees");}
	355	// for (int i=1; i < _maxNumOfFiles; ++i ) {
	356	// auto_ptr<string> treeFileName(searchStringInFile("treefile",i,_instructionFile));
	357	// if ((treeFileName.get() == NULL) && (i==1)) {
	358	string oneTreeFileName(searchStringInLines("treefile"));
	359	if (oneTreeFileName=="" ) {
	360	errorMsg::reportError("The tree file name must be given in the instruction file");
	361	}
	362	vtree = getStartingTreeVecFromFile(oneTreeFileName);
	363	//tree tmpT(*oneTreeFileName);
	364	//vtree.push_back(tmpT);
	365	for (size_t k=0;k<vtree.size();++k) {
	366	if (!vtree[k].withBranchLength())
	367	vtree[k].createFlatLengthMatrix(0.05);
	368	}
	369	return true;
	370	// }
	371	// if (treeFileName.get() == NULL) return true;// found some trees
	372	// tree t1(*treeFileName);
	373	// if (!t1.WithBranchLength()) t1.create_flat_length_matrix(0.05);
	374	// vtree.push_back(t1);
	375	// }
	376	// errorMsg::reportError("error in function fromInstructionFile::getStartingEvolTrees");
	377	// return false;
	378	}
	379
	380	void fromInstructionFile::getStartingSequenceData(vector<sequenceContainer>& sdPtrVec,
	381	const vector<alphabet* >& _alphabets){
	382	if (sdPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");}
	383	for (int i=1; i <= _maxNumOfFiles; ++i ) {
	384	string sequenceFileName(searchStringInLines("seqfile",i));
	385	if ((sequenceFileName == "") && (i==1)) sequenceFileName="-";
	386	else if (sequenceFileName == "") return;
	387
	388	istream* inPtr;
	389	if (sequenceFileName == "-") {
	390	LOG(5,<<"in this option, the sequences are inputed from cin\n...");
	391	inPtr = &cin;
	392	}else{
	393	inPtr = new ifstream(sequenceFileName.c_str());
	394	}
	395	istream& in = *inPtr;
	396	sequenceContainer original;
	397
	398	string sequenceFileFormat(searchStringInLines("format",i));
	399	if ((sequenceFileFormat == "") && (i>1)) {// it is probably the format of number 1.
	400	string sequenceFileFormatOf1(searchStringInLines("format",1));
	401	sequenceFileFormat = sequenceFileFormatOf1;
	402	}
	403	alphabet* currentAlphabet = NULL;
	404	if ((_alphabets.size() == 1) && (i > 1)) currentAlphabet = _alphabets[0];
	405	else {
	406	currentAlphabet = _alphabets[i-1];
	407	}
	408	if (sequenceFileFormat== "mase") original= maseFormat:: read(in,currentAlphabet);
	409	else if (sequenceFileFormat=="molphy") original= molphyFormat:: read(in,currentAlphabet);
	410	else if (sequenceFileFormat=="clustal") original= clustalFormat::read(in,currentAlphabet);
	411	else if (sequenceFileFormat=="fasta") original= fastaFormat:: read(in,currentAlphabet);
	412	else if (sequenceFileFormat=="phylip") original= phylipFormat:: read(in,currentAlphabet);
	413	else errorMsg::reportError(" format not implemented yet in this version... ");
	414
	415	// if (original == NULL) errorMsg::reportError(" unable to find/open input sequence file");
	416
	417	if (doesWordExistInLines("removeGapPositions")) {
	418	// vector<int> parCol;
	419	// original.getParticiantColVecAccordingToGapCols(parCol);
	420	// sequenceData _sd(*original,parCol);
	421	// sdPtrVec.push_back(_sd);
	422	// delete original;
	423	errorMsg::reportError("remove gap position is not implemented yet");
	424	} //else if (doesWordExistInLines("gapsToMissingData")) {
	425	//LOG(5,<<"gaps are changed to missing data..."<<endl);
	426	original.changeGaps2MissingData();
	427	sdPtrVec.push_back(original);
	428	//}
	429	}
	430
	431	}
	432
	433	tree* fromInstructionFile::getOneStartingEvolTree(vector<char>* constraintsOfT0) {
	434	tree* _tree = NULL;
	435
	436	string wordUse = "treefile";
	437	bool usetreefile1 = doesWordExistInLines("treefile1");
	438	if (usetreefile1 == true) wordUse="treefile1";
	439
	440	string treeFileName(searchStringInLines(wordUse)); // either treefile or treefile1 is OK.
	441	if (treeFileName=="" ) {
	442	_tree = NULL;
	443	constraintsOfT0 = NULL;
	444	return _tree;
	445	}
	446
	447	vector<char> constraints;
	448	_tree = new tree(treeFileName,constraints);
	449	constraintsOfT0 = new vector<char>(constraints);
	450	return _tree;
	451	}
	452
	453	void fromInstructionFile::getOneStartingSequenceData(sequenceContainer& sd,
	454	const alphabet* _alphabets) {
	455	ifstream ins;
	456	istream* inPtr = NULL;
	457
	458	string wordUse = "seqfile";
	459	bool useseqfile1 = doesWordExistInLines("seqfile1");
	460	if (useseqfile1 == true) wordUse="seqfile1";
	461
	462	string sequenceFileName(searchStringInLines(wordUse)); // so it can be used with both seqfile and seqfile1
	463	if (sequenceFileName == "") sequenceFileName="-";
	464	if (sequenceFileName == "-") {
	465	inPtr = &cin;
	466	}
	467	else{
	468	ins.open(sequenceFileName.c_str());
	469	if (! ins.is_open())
	470	errorMsg::reportError("can not open sequace file");
	471	inPtr = &ins;
	472	}
	473
	474	istream& in = *inPtr;
	475	sequenceContainer original;
	476
	477	wordUse = "format";
	478	bool useFormat1 = doesWordExistInLines("format1");
	479	if (useFormat1 == true) wordUse="format1";
	480
	481	string sequenceFileFormat(searchStringInLines(wordUse));
	482	if (sequenceFileFormat == "") {
	483	sequenceFileFormat = "fasta"; // default
	484	}
	485
	486	if (sequenceFileFormat == "mase") original= maseFormat::read(in,_alphabets);
	487	else if (sequenceFileFormat == "molphy") original= molphyFormat::read(in,_alphabets);
	488	else if (sequenceFileFormat == "clustal") original= clustalFormat::read(in,_alphabets);
	489	else if (sequenceFileFormat == "fasta") original= fastaFormat::read(in,_alphabets);
	490	else if (sequenceFileFormat == "phylip") original= phylipFormat::read(in,_alphabets);
	491	else errorMsg::reportError(" format not implemented yet in this version... ");
	492
	493	if (doesWordExistInLines("removeGapPositions")) {
	494	errorMsg::reportError("remove gap position is not implemented yet");
	495	}
	496	//LOG(5,<<"gaps are changed to missing data..."<<endl);
	497	original.changeGaps2MissingData();
	498	sd = original;
	499	}
	500
	501	void fromInstructionFile::getStartingGammaParameters(vector<stochasticProcess>& spPtrVec) {
	502	for (size_t i=0; i < spPtrVec.size(); ++i) {
	503	string alphaParam(searchStringInLines("alpha",i+1));
	504	if ((alphaParam == "") && (i==0)) {
	505	getStartingGammaParameter(spPtrVec);
	506	return;
	507	}
	508	if (alphaParam == "") {
	509	MDOUBLE alpha = atof(alphaParam.c_str());
	510	(static_cast<gammaDistribution*>(spPtrVec[i].distr()))->setAlpha(alpha);
	511	}
	512	}
	513	}
	514
	515	void fromInstructionFile::getOneStartingGammaParameter(stochasticProcess& sp) {
	516	MDOUBLE alpha = 0;
	517	string alphaParam0(searchStringInLines("alpha",0));
	518	if (alphaParam0 != "") {
	519	alpha = atof(alphaParam0.c_str());
	520	} else {
	521	string alphaParam1(searchStringInLines("alpha",1));
	522	if (alphaParam1 != "") {
	523	alpha = atof(alphaParam1.c_str());
	524	} else {
	525	string alphaParam2(searchStringInLines("alpha"));
	526	if (alphaParam2 != "") {
	527	alpha = atof(alphaParam2.c_str());
	528	} else { // no alpha parameter given,
	529	return;
	530	}
	531	}
	532	}
	533	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(alpha);
	534	}
	535
	536	void fromInstructionFile::getStartingGammaParameter(vector<stochasticProcess>& spPtrVec) {
	537	string alphaParam(searchStringInLines("alpha"));
	538	for (size_t i=0; i < spPtrVec.size(); ++i) {
	539	if (alphaParam != "") {
	540	MDOUBLE alpha = atof(alphaParam.c_str());
	541	(static_cast<gammaDistribution*>(spPtrVec[i].distr()))->setAlpha(alpha);
	542	}
	543	}
	544	}
	545
	546	void fromInstructionFile::getStartingGlobalRates(vector<stochasticProcess>& spPtrVec) {
	547	for (size_t i=0; i < spPtrVec.size(); ++i) {
	548	string rate(searchStringInLines("rate",i+1));
	549	if (rate != "") {
	550	MDOUBLE grate = atof(rate.c_str());
	551	spPtrVec[i].setGlobalRate(grate);
	552	}
	553	}
	554	}

+60

-0

libs/phylogeny/fromInstructionFile.h less more

	0	// $Id: fromInstructionFile.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ____FROM_INSTRUCTION__FILE
	3	#define ____FROM_INSTRUCTION__FILE
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "stochasticProcess.h"
	8	#include "alphabet.h"
	9	#include "sequenceContainer.h"
	10	#include "someUtil.h"
	11
	12	#include <string>
	13	#include <iostream>
	14	#include <vector>
	15	#include <map>
	16	using namespace std;
	17
	18
	19
	20	class fromInstructionFile {
	21	public:
	22	explicit fromInstructionFile(const string& instructionFileName);
	23	void readInstructionFile(const string& str);
	24	const string&searchStringInLines(const string& key) const;
	25	bool doesWordExistInLines(const string& key) const;
	26	const string& searchStringInLines(const string& key, const int index) const;
	27	bool getIntValueConnectedWithWord(const string& wordToSearch, int & res);
	28
	29
	30
	31	void setLogFile();
	32	void getStartingStochasticProcess(vector<stochasticProcess>& spPtrVec,VVdouble* freqs=NULL);
	33	void getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs = NULL);
	34	void getOneStartingGammaParameter(stochasticProcess& sp);
	35	bool getStartingEvolTrees(vector<tree>& vtree);// true if thelist tree1 file1, tree2 file2 is found.
	36	bool getStartingEvolTrees(vector<tree>& vtree, vector<char>& constraintsOfT0);// true if thelist tree1 file1, tree2 file2 is found.
	37	tree* getOneStartingEvolTree(vector<char>* constraintsOfT0);// ALOCATE NEW TREE AND NEW CONSTRAINT VECTOR.
	38	void getStartingSequenceData(vector<sequenceContainer>& sdPtrVec,
	39	const vector<alphabet* >& _alphabets);
	40	void getOneStartingSequenceData(sequenceContainer& sdPtrVec,
	41	const alphabet* _alphabets);
	42	void getAlphabets(vector<alphabet* >& _alphabets);// alocate with new
	43	// have to be deleted by the users!
	44	alphabet* getOneAlphabet();
	45	bool useGamma() {
	46	return doesWordExistInLines("gamma");
	47	}
	48	void getStartingGammaParameters(vector<stochasticProcess>& spPtrVec);
	49	void getStartingGlobalRates(vector<stochasticProcess>& spPtrVec);
	50	string getOutFile();
	51	protected:
	52
	53	map<string, string> _lines;
	54	const int _maxNumOfFiles;// = 1000;
	55	void getStartingGammaParameter(vector<stochasticProcess>& spPtrVec);
	56	// tree getStartingEvolTree();
	57
	58	};
	59	#endif

+303

-0

libs/phylogeny/fromQtoPt.cpp less more

	0	// $Id: fromQtoPt.cpp 5788 2009-01-19 22:24:16Z rubi $
	1
	2	#include "definitions.h"
	3	#include "fromQtoPt.h"
	4	#include "errorMsg.h"
	5	#include "numRec.h"
	6	#include "matrixUtils.h"
	7	#include <iostream>
	8	using namespace std;
	9	#include <cassert>
	10
	11	//#define VERBOS
	12
	13
	14
	15
	16	void q2pt::fillFromRateMatrix(const vector<MDOUBLE>& freq,
	17	const VVdouble & qMatrix) {
	18	// we first decompose Q to (F^0.5) M (F^-0.5)
	19	// F is a diagonal matrix of the frequencies
	20	// M is the symetrical matrix representation of Q.
	21
	22	VVdouble q_sym;
	23	const int matrix_size = qMatrix.size();
	24	q_sym.resize(matrix_size);
	25	int k=0;
	26	for (k=0; k < q_sym.size(); ++k) q_sym[k].resize(matrix_size);
	27	calc_symmetric_q(qMatrix,q_sym,freq);
	28	// now we have to find the eigen-vector decomposition of the q_sym.
	29	VVdouble v; // v is the eigen vectors of the symetrical matrix.
	30	v.resize(matrix_size);
	31	for (k=0; k < qMatrix.size(); ++k) v[k].resize(matrix_size);
	32	Vdouble eigenValues(matrix_size);
	33
	34	// symmetric_1pam = [v] [eigenValues] [transpose(v)]
	35	//MyJacobi(q_sym,v, eigenValues); // notice that inv([v]) = [v] transpose;
	36
	37
	38	/////i changed
	39	computeEigenSystem(q_sym,v,eigenValues);
	40
	41	////
	42	//#ifdef VERBOS
	43	// LOG(5,<<"The eigen-vector matrix of the decomposition of the symetric matrix\n");
	44	// for (int k1=0; k1 < v.size(); ++k1) {
	45	// for (int k2=0; k2<v[k1].size(); ++k2) {
	46	// LOG(5,<<v[k1][k2]<<" ");
	47	// }
	48	// LOG(5,<<endl);
	49	// }
	50	//#endif
	51
	52
	53	VVdouble left_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
	54	left_eig_of_pam.resize(matrix_size);
	55	for (k=0; k < left_eig_of_pam.size(); ++k) left_eig_of_pam[k].resize(matrix_size);
	56	VVdouble right_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
	57	right_eig_of_pam.resize(matrix_size);
	58	for (k=0; k < right_eig_of_pam.size(); ++k) right_eig_of_pam[k].resize(matrix_size);
	59
	60	calc_left_and_right_eig_of_pam(left_eig_of_pam,right_eig_of_pam,v,freq);
	61
	62	_leftEigen=left_eig_of_pam;
	63	_rightEigen=right_eig_of_pam;
	64	_eigenVector=eigenValues;
	65	Vdouble _freq=freq;
	66	// printing a pij(1);
	67	//MDOUBLE t = 1;
	68	//string fileName = "D://My Documents//adid//nimrod//inputs//inputs//aligned tce//aligned tce//P.F//P.F. vs P.F//eigenValues1.txt";
	69	// ofstream out(fileName.c_str());
	70	// for (int i=0;i<eigenValues.size();i++)
	71	// out<<eigenValues[i] <<" ";
	72	// out<<endl;
	73	//for (int aa1=0; aa1 < eigenValues.size(); ++aa1) {
	74	// for (int aa2=0; aa2 < eigenValues.size(); ++aa2) {
	75	/// MDOUBLE sum=0;
	76	// for (int k=0 ; k<eigenValues.size() ; ++k) {
	77	// sum+=( left_eig_of_pam[aa1][k]right_eig_of_pam[k][aa2]exp(eigenValues[k]*t) );
	78	// }
	79	// LOG(5,<<sum<<" ");
	80	// }
	81	// LOG(5,<<endl);
	82	// }
	83	}
	84
	85	void q2pt::fillFrom1PAMMatrix(const vector<MDOUBLE>& freq,const VVdouble & onePam)
	86	{
	87	fillFromRateMatrix(freq,onePam);
	88	for (int i=0; i < _eigenVector.size(); ++i) {
	89	assert(_eigenVector[i]>0);
	90	_eigenVector[i] = log(_eigenVector[i])* 100;
	91	}
	92	}
	93
	94	bool q2pt::currectFloatingPointProblems(MDOUBLE& sum) const {
	95	if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0;
	96	if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1;
	97	if (!((sum<=1) && (sum>=0)))
	98	return false;
	99	return true;
	100	}
	101
	102	// Pij(t) = Sigma[k]{ [V]ik * [V^-1]kj * e^(Lamda_k*t) }
	103	const MDOUBLE q2pt::Pij_t(const int i, const int j, const MDOUBLE t) const {
	104	if (t<0) errorMsg::reportError("negative length in routine Pij_t");
	105	// if ((_freq[i] == 0.0) \|\| (_freq[j] == 0.0)) return 0.0;
	106	MDOUBLE sum=0;
	107	for (int k=0 ; k<_eigenVector.size() ; ++k) {
	108	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]*t) );
	109	}
	110	if (currectFloatingPointProblems(sum)) return sum;
	111	// LOG(1,<<"err Pij_t i="<<i<<" j= "<<j<<" dis= "<<t<<" res= "<<sum<<endl);//sum is not in [0,1]
	112	errorMsg::reportError("q2pt::Pij_t error in function pijt... ");return 0;
	113	}
	114
	115	const MDOUBLE q2pt::dPij_dt(const int i,const int j, const MDOUBLE t) const {
	116	MDOUBLE sum=0;
	117	for (int k=0 ; k<_eigenVector.size() ; ++k) {
	118	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]t)_eigenVector[k]);
	119	}
	120	return sum;
	121	}
	122
	123
	124	const MDOUBLE q2pt::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
	125	MDOUBLE sum=0;;
	126	for (int k=0 ; k<_eigenVector.size() ; ++k) {
	127	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]t)_eigenVector[k]*_eigenVector[k]);
	128	}
	129	return sum;
	130	}
	131
	132	void q2pt::calc_symmetric_q(const VVdouble &q_matrix,
	133	VVdouble &symmetric_q,
	134	const Vdouble & freq)
	135	//----------------------------------------------------------------------------------
	136	//input: symmetric_1pam matrix is the output, pam1 is the input
	137	//output: non
	138	//doing: procedures to find eigen values work on symetrical matrices.
	139	// dayhoff 1 pam in a new basis is symetrical
	140	// the transformation is
	141	//
	142	// (1) [symmetric_1pam] = [sqrt(pi)] * [pam1] * [1/sqrt(pi)]
	143	//
	144	// [] for matrix. [sqrt(pi)] is a diagonal matrix were a[i][i] is the root of freq[i]
	145	//reference: JME (1997) 45:696-703 Estimation of reversible substitution matrices from
	146	// multiple pairs of sequences. Lars Arvestad and William J. Bruno.
	147	//----------------------------------------------------------------------------------
	148	{
	149	int i,j;
	150	for (i=0; i<q_matrix.size(); ++i) {
	151	for (j=0; j<q_matrix.size(); ++j) {
	152	if (q_matrix[i][j] != 0.0) {
	153	symmetric_q[i][j] = q_matrix[i][j]*sqrt(freq[i])/sqrt(freq[j]);
	154	}
	155	}
	156	}
	157	/*check OZ
	158	LOG(5,<<"sim matrix"<<endl);
	159	for (i=0;i<symmetric_q.size();++i) {
	160	for (j=0; j<symmetric_q.size(); ++j) {
	161	//LOG(5,<<symmetric_q[i][j]<<" ");
	162	LOG(5,<< setprecision(3) << setw(5) << symmetric_q[i][j]<<'\t');
	163
	164	}
	165	LOG(5,<<endl);
	166	} */
	167
	168	}
	169
	170	void q2pt::calc_left_and_right_eig_of_pam(
	171	VVdouble &left_eig_of_pam,
	172	VVdouble &right_eig_of_pam,
	173	const VVdouble &v,
	174	const Vdouble& freq) {
	175	//----------------------------------------------------------------------------------
	176	//input: left_eig_of_pam, right_eig_of_pam they will be the eigenvectors of pam1;
	177	// freq is the vector of amino acid frequencies of the model.
	178	// v is the eigen vector matrix of the symetrical matrix
	179	//output: non
	180	//doing: now [SYM] = [SqrtFreq] * [pam1] * inv([SqrtFreq])
	181	// so [pam1] = inv([SqrtFreq]) * [SYM] * [SqrtFreq]
	182	// SYM = [V] * [D] * transp([V])
	183	// hence [pam1] = {inv([SqrtFreq]) * [V]} * [D] * {transp([V]) * [SqrtFreq]}
	184	// {inv([SqrtFreq]) * [V]} is left_eig_of_pam, and the above one ^ is right.
	185	//----------------------------------------------------------------------------------
	186	int i,j;
	187	for (i=0;i<v.size();++i) {
	188	for (j=0;j<v.size();++j)
	189	{
	190	if ((freq[i] != 0.0) &&(freq[j] != 0.0)) {
	191	left_eig_of_pam[i][j] = (1/sqrt(freq[i]))* v[i][j];
	192	right_eig_of_pam[i][j]= sqrt(freq[j]) * v[j][i];
	193	}
	194	}
	195	}
	196
	197	// LOG(5,<<"left_eig_of_pam"<<endl);
	198	// for (i=0;i<4;++i) {
	199	// for (j=0; j<4; ++j) {
	200	// LOG(5,<<left_eig_of_pam[i][j]<<" ");
	201	// LOG(5,<<pam1[i][i]<<" ");
	202	// }
	203	// LOG(5,<<endl);
	204	// }
	205	//
	206	// LOG(5,<<"right eig_of_pam"<<endl);
	207	// for (i=0;i<4;++i) {
	208	// for (j=0; j<4; ++j) {
	209	// LOG(5,<<right_eig_of_pam[i][j]<<" ");
	210	// LOG(5,<<pam1[i][i]<<" ");
	211	// }
	212	// LOG(5,<<endl);
	213	// }
	214	//
	215	// LOG(5,<<"press anykey"<<endl);
	216	// char lll;
	217	// cin>>lll;
	218
	219
	220	}
	221
	222	VVdouble get1PamFromCountMatrix(const vector<MDOUBLE>& freq,
	223	const VVdouble & sub_matrix){
	224	//----------------------------------------------------------------------------------
	225	//input: pam1 : a pointer to the matrix where pam1 will be.
	226	// sub_matrix: the substitution matrix
	227	// freq vector: the amino acid's frequenceis.
	228	//output: non
	229	//doing: fill in 1 pam from sub matrix and freq vector
	230	//calculation: sub_matrix[a][b] is the substitution matrix, between a and b
	231	// (sub_matrix[a][b]=sub_matrix[b][a])
	232	// we use f[a][b] insted of sub_matrix[a][b] to be the same as the book
	233	//(reference) "introduction to computational molecular biology by setubal and meidanis pg 80;
	234	// let f[a] be sigma f[a][b] on all b (we made f[a][a] = 0;)
	235	// i.e. f[a] is the number of mutation from a observed
	236	// let f be sigma f[a] on all a; (=the total mutations*2)
	237	// now, the mutaibility of a is defined as
	238	//
	239	// (1) m[a] = f[a] / (100ffreq[a])
	240	//
	241	// 100*f is a scaling factor for 1 pam.
	242	// then pam1[a][b] will be pr(a->b/a changed) * pr(a changed)
	243	//
	244	// (2) pam1[a][b] = (f[a][b]/f[a])*m[a]
	245	//
	246	// (3) f[a][a] = 1-m[a] (easy to show)
	247	//
	248	// notice that sigma 1pam[a][b] over all b is 1 and that
	249	// sigma freq[a]*1pam[a][a] over all a is 0.99
	250	//----------------------------------------------------------------------------------
	251	const int _alphabetSize=sub_matrix.size();
	252	VVdouble pam1;
	253	pam1.resize(_alphabetSize);
	254	for (int z=0; z < _alphabetSize; ++z) {
	255	pam1[z].resize(_alphabetSize,0);
	256	}
	257
	258	int i,j;//indices
	259	MDOUBLE total=0; // i.e.f in the above explanation
	260	for (i=0;i<_alphabetSize;++i) {
	261	for (j=0; j<_alphabetSize; ++j){
	262	total+=sub_matrix[i][j];
	263	}
	264	}
	265
	266	MDOUBLE tmsum;
	267	for (i=0;i<_alphabetSize;++i) {
	268	tmsum = 0.0;
	269	for (j=i+1; j<_alphabetSize; ++j){
	270	if ((freq[i] == 0.0) \|\| (freq[j] == 0.0)) {
	271	pam1[i][j] = 0.0;pam1[j][i] = 0.0;
	272	} else {
	273	pam1[i][j] = sub_matrix[i][j]/(100.0totalfreq[i]);
	274	pam1[j][i] = sub_matrix[i][j]/(100.0totalfreq[j]);
	275	}
	276	}
	277	}
	278
	279	for (i=0;i<_alphabetSize;++i) {
	280	tmsum = 0.0;
	281	for (j=0;j<_alphabetSize;++j) {
	282	if (j!=i) tmsum += pam1[i][j];
	283	}
	284
	285	if (freq[i] != 0.0) {
	286	pam1[i][i]=1.0-tmsum;
	287	}
	288	}
	289
	290	#ifdef VERBOS
	291	LOG(5,<<" priting the 44 top-left corner of the 1pam matrix 10^6 "<<endl);
	292	for (int a=0; a < 4; ++a) {
	293	for (int b=0; b < 4; ++b) {
	294	LOG(5,<<pam1[a][b]*1000000.0<<" ");
	295	}
	296	LOG(5,<<endl);
	297	}
	298	#endif
	299	return pam1;
	300
	301	}
	302

+67

-0

libs/phylogeny/fromQtoPt.h less more

	0	// $Id: fromQtoPt.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___FROM_Q_TO_PT
	3	#define ___FROM_Q_TO_PT
	4
	5	#include "replacementModel.h"
	6	#include <cmath>
	7	#include <iomanip>
	8
	9	int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues);// num rec
	10
	11	VVdouble get1PamFromCountMatrix(const vector<MDOUBLE>& freq,
	12	const VVdouble & sub_matrix);
	13
	14	class q2pt : public replacementModel {
	15	public:
	16	void fillFromRateMatrix(const vector<MDOUBLE>& freq,
	17	const VVdouble & qMatrix);
	18	void fillFrom1PAMMatrix(const vector<MDOUBLE>& freq,
	19	const VVdouble & onePam);
	20
	21
	22	explicit q2pt(): err_allow_for_pijt_function(1e-4){}
	23
	24	// @@@@ I'm not sure why I had to implement this operator=, but it doesn't work without it.
	25	q2pt& operator=(const q2pt &other) {
	26	_freq = other._freq;
	27	_leftEigen = other._leftEigen;
	28	_rightEigen = other._rightEigen;
	29	_eigenVector = other._eigenVector;
	30	return (*this);
	31	}
	32
	33	virtual replacementModel* clone() const { return new q2pt(*this); }
	34	// virtual nucJC* clone() const { return new nucJC(*this); } // see note down:
	35
	36	const int alphabetSize() const {return _freq.size();}
	37
	38
	39	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
	40	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
	41	const MDOUBLE freq(const int i) const {return _freq[i];};
	42	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
	43	const MDOUBLE err_allow_for_pijt_function; //1e-4
	44
	45	VVdouble getLeftEigen() const {return _leftEigen;} ;
	46	VVdouble getRightEigen() const {return _rightEigen;};
	47	Vdouble getEigenVec() const {return _eigenVector;};
	48
	49	private:
	50	Vdouble _freq;
	51	VVdouble _leftEigen;
	52	VVdouble _rightEigen;
	53	Vdouble _eigenVector;
	54	bool currectFloatingPointProblems(MDOUBLE& sum) const;
	55
	56	public: // to become private:
	57	void calc_symmetric_q(const VVdouble &q_matrix,VVdouble &symmetric_q,const Vdouble & freq);
	58	void calc_left_and_right_eig_of_pam(
	59	VVdouble &left_eig_of_pam,
	60	VVdouble &right_eig_of_pam,
	61	const VVdouble &v,
	62	const Vdouble& freq);
	63	};
	64
	65	#endif
	66

+70

-0

libs/phylogeny/gainLossAlphabet.cpp less more

	0	#include "gainLossAlphabet.h"
	1
	2	gainLossAlphabet::gainLossAlphabet() {}
	3
	4	int gainLossAlphabet::fromChar(const char s) const{
	5	switch (s) {
	6	case '0': return 0; break;
	7	case '1': return 1; break;
	8	case '2': return 1; break; // added to read seq with paralogs
	9	case '3': return 1; break; // added to read seq with paralogs
	10	case '4': return 1; break; // added to read seq with paralogs
	11	case '5': return 1; break; // added to read seq with paralogs
	12	case '6': return 1; break; // added to read seq with paralogs
	13	case '7': return 1; break; // added to read seq with paralogs
	14	case '8': return 1; break; // added to read seq with paralogs
	15	case '9': return 1; break; // added to read seq with paralogs
	16	case '-' : case'_' : return -2; break;
	17	case '?' : case'*' : return -2; break;
	18	case 'x' : case'X' : return -2; break;
	19	default:
	20	vector<string> err;
	21	err.push_back(" The gainLoss sequences contained the character: ");
	22	err[0]+=s;
	23	err.push_back(" gainLoss was not one of the following: ");
	24	err.push_back(" 0, 1, or for unknown '?'/'-'");
	25	errorMsg::reportError(err);
	26	}// end of switch
	27	return -99; // never suppose to be here.
	28	}// end of function
	29
	30	vector<int> gainLossAlphabet::fromString(const string &str) const {
	31	vector<int> vec;
	32	for (unsigned int i=0;i<str.size();i++)
	33	vec.push_back(fromChar(str[i]));
	34	return vec;
	35	}
	36
	37	string gainLossAlphabet::fromInt(const int in_id) const{
	38	char res = 0;
	39	switch (in_id) {
	40	case 0 : res = '0' ; break;
	41	case 1 : res = '1' ; break;
	42	case -2 : res = '-'; break;
	43	default:
	44	vector<string> err;
	45	err.push_back("unable to print gainLoss_id. gainLossl_id was not one of the following: ");
	46	err.push_back("0,1");
	47	errorMsg::reportError(err);
	48	}//end of switch
	49	string vRes;
	50	vRes.append(1,res);
	51	return vRes;
	52	}// end of function
	53
	54	// There are no relations here.
	55	int gainLossAlphabet::relations(const int charInSeq, const int charToCheck) const{
	56	if (charInSeq == charToCheck)
	57	return 1;
	58	if(charInSeq == -1 \|\| charInSeq == -2)
	59	return 1 ;// missing data
	60	return 0;
	61	}
	62
	63	int gainLossAlphabet::fromChar(const string& str, const int pos) const{
	64	return fromChar(str[pos]);
	65	}
	66
	67
	68
	69

+25

-0

libs/phylogeny/gainLossAlphabet.h less more

	0	#ifndef ___GAIN_LOSS_ALPH
	1	#define ___GAIN_LOSS_ALPH
	2
	3	#include "alphabet.h"
	4	#include "errorMsg.h"
	5
	6	class gainLossAlphabet : public alphabet {
	7	public:
	8	explicit gainLossAlphabet();
	9	virtual ~gainLossAlphabet() {}
	10	virtual alphabet* clone() const { return new gainLossAlphabet(*this); }
	11	int unknown() const {return -2;}
	12	int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
	13	int size() const {return 2;} // presence or absence only
	14	int stringSize() const {return 1;} // one letter code.
	15	int relations(const int charInSeq, const int charToCheck) const;
	16	int fromChar(const string& str, const int pos) const;
	17	int fromChar(const char s) const;
	18	string fromInt(const int in_id) const;
	19	vector<int> fromString(const string& str) const;
	20	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	21
	22	};
	23
	24	#endif

+36

-0

libs/phylogeny/gammaDistribution.cpp less more

	0	// $Id: gammaDistribution.cpp 2862 2007-11-27 10:59:03Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "gammaDistribution.h"
	4	#include "gammaUtilities.h"
	5	#include "logFile.h"
	6	#include <cmath>
	7
	8
	9	gammaDistribution::gammaDistribution(MDOUBLE alpha,int in_number_of_categories) :
	10	generalGammaDistribution(alpha,alpha,in_number_of_categories) {}
	11
	12	gammaDistribution::gammaDistribution(const gammaDistribution& other) :
	13	generalGammaDistribution(other) {}
	14
	15	void gammaDistribution::setAlpha(MDOUBLE in_alpha) {
	16	if (in_alpha == _alpha) return;
	17	setGammaParameters( categories(), in_alpha);
	18	}
	19
	20	//this function builds the gamma distribution
	21	void gammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) {
	22	generalGammaDistribution::setGammaParameters(in_number_of_categories,in_alpha,in_alpha);
	23	}
	24
	25	void gammaDistribution::change_number_of_categories(int in_number_of_categories) {
	26	if (in_number_of_categories == categories())
	27	return;
	28	setGammaParameters( in_number_of_categories, _alpha, _alpha);
	29	}
	30
	31	void gammaDistribution::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) {
	32	if (alpha!=beta)
	33	errorMsg::reportError("gammaDistribution::setGammaParameters : can not set beta because alpha must be equal to beta");
	34	generalGammaDistribution::setGammaParameters(numOfCategories,alpha,beta);
	35	}

+33

-0

libs/phylogeny/gammaDistribution.h less more

	0	// $Id: gammaDistribution.h 2862 2007-11-27 10:59:03Z itaymay $
	1
	2	#ifndef ___GAMMA_DIST
	3	#define ___GAMMA_DIST
	4	/************************************************************
	5	This distribution can take several forms depending on its free parameter alpha
	6	(beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution
	7	see http://mathworld.wolfram.com/GammaDistribution.html.
	8	please note that the borders of the categories are defined according to calculation of
	9	the gamma integral, according to numerical recipes in gammaUtilities
	10	_globalRate represents the rate for two joint genes.
	11	************************************************************/
	12	#include "definitions.h"
	13	#include "generalGammaDistribution.h"
	14	#include "errorMsg.h"
	15
	16	class gammaDistribution : public generalGammaDistribution {
	17
	18	public:
	19	explicit gammaDistribution() {}
	20	explicit gammaDistribution(MDOUBLE alpha,int in_number_of_categories);
	21	explicit gammaDistribution(const gammaDistribution& other);
	22	virtual ~gammaDistribution() {}
	23	virtual distribution* clone() const { return new gammaDistribution(*this); }
	24
	25	virtual void setAlpha(MDOUBLE newAlpha);
	26	virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1);
	27	virtual void change_number_of_categories(int in_number_of_categories);
	28	// to prevent the user from using alpha!=beta
	29	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	30	virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistribution::setBeta : can not set beta because alpha=beta");}
	31	};
	32	#endif

+35

-0

libs/phylogeny/gammaDistributionFixedCategories.cpp less more

	0	#include "gammaDistributionFixedCategories.h"
	1	#include "errorMsg.h"
	2	#include "gammaUtilities.h"
	3	#include "matrixUtils.h"
	4
	5	gammaDistributionFixedCategories::gammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha)
	6	: generalGammaDistributionFixedCategories(fixedBoundaries,alpha,alpha)
	7	{
	8
	9	}
	10
	11	gammaDistributionFixedCategories::gammaDistributionFixedCategories(const gammaDistributionFixedCategories& other)
	12	: generalGammaDistributionFixedCategories(other) {
	13	}
	14
	15	gammaDistributionFixedCategories::gammaDistributionFixedCategories(MDOUBLE alpha, int catNum)
	16	: generalGammaDistributionFixedCategories(alpha, alpha,catNum)
	17	{
	18	}
	19
	20	void gammaDistributionFixedCategories::setGammaParameters(int in_number_of_categories, MDOUBLE alpha)
	21	{
	22	generalGammaDistributionFixedCategories::setGammaParameters(in_number_of_categories,alpha,alpha);
	23	}
	24
	25
	26	void gammaDistributionFixedCategories::setAlpha(MDOUBLE in_alpha) {
	27	if (in_alpha == _alpha) return;
	28	setGammaParameters( categories(), in_alpha);
	29	}
	30
	31	void gammaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
	32	{
	33	generalGammaDistributionFixedCategories::change_number_of_categories(in_number_of_categories);
	34	}

+38

-0

libs/phylogeny/gammaDistributionFixedCategories.h less more

	0	#ifndef ___GAMMA_DISTR_FIXED_CATEGORIES
	1	#define ___GAMMA_DISTR_FIXED_CATEGORIES
	2	/************************************************************
	3	This class differ from the regular GammaDistribution in that
	4	the rateCategories are fixed according to the user's decision.
	5	Thus, only the probability of each category changes for each specific alpha value but
	6	the rate categories themselves are constant.
	7	************************************************************/
	8	#include "definitions.h"
	9	#include "generalGammaDistributionFixedCategories.h"
	10	#include "errorMsg.h"
	11
	12	class gammaDistributionFixedCategories : public generalGammaDistributionFixedCategories {
	13
	14	public:
	15	explicit gammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha);
	16	explicit gammaDistributionFixedCategories(const gammaDistributionFixedCategories& other);
	17	explicit gammaDistributionFixedCategories(MDOUBLE alpha, int catNum);
	18	virtual ~gammaDistributionFixedCategories() {}
	19	virtual distribution* clone() const { return new gammaDistributionFixedCategories(*this); }
	20	virtual void setGammaParameters(int in_number_of_categories, MDOUBLE alpha);
	21	virtual void setAlpha(MDOUBLE newAlpha);
	22	virtual void change_number_of_categories(int in_number_of_categories);
	23	// to prevent the user from using alpha!=beta
	24	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) {
	25	if (alpha!=beta)
	26	errorMsg::reportError("gammaDistributionFixedCategories::setGammaParameters : can not set beta because alpha must be equal to beta");
	27	generalGammaDistributionFixedCategories::setGammaParameters(numOfCategories,alpha,beta);
	28	}
	29	virtual void setBeta(MDOUBLE newBeta) {
	30	errorMsg::reportError("generalGammaDistributionFixedCategories::setBeta : can not set beta because alpha=beta");
	31	}
	32	};
	33
	34
	35
	36	#endif
	37

+42

-0

libs/phylogeny/gammaDistributionLaguerre.cpp less more

	0	#include "gammaDistributionLaguerre.h"
	1	#include "gammaUtilities.h"
	2	#include "logFile.h"
	3	#include <cmath>
	4
	5
	6	gammaDistributionLaguerre::gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories)
	7	: generalGammaDistributionLaguerre(alpha,alpha,in_number_of_categories)
	8	{
	9	}
	10
	11	gammaDistributionLaguerre::gammaDistributionLaguerre(const gammaDistributionLaguerre& other)
	12	: generalGammaDistributionLaguerre(other)
	13	{
	14	}
	15
	16	void gammaDistributionLaguerre::setAlpha(MDOUBLE in_alpha)
	17	{
	18	if (in_alpha == _alpha)
	19	return;
	20	setGammaParameters(categories(), in_alpha);
	21	}
	22
	23	//this function builds the gamma distribution
	24	void gammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha)
	25	{
	26	generalGammaDistributionLaguerre::setGammaParameters(in_number_of_categories, in_alpha, in_alpha);
	27	}
	28
	29	void gammaDistributionLaguerre::change_number_of_categories(int in_number_of_categories)
	30	{
	31	if (in_number_of_categories == categories())
	32	return;
	33	setGammaParameters(in_number_of_categories, _alpha, _alpha);
	34	}
	35
	36	void gammaDistributionLaguerre::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta)
	37	{
	38	if (alpha != beta)
	39	errorMsg::reportError("gammaDistributionLaguerre::setGammaParameters : can not set beta because alpha must be equal to beta");
	40	generalGammaDistributionLaguerre::setGammaParameters(numOfCategories, alpha, alpha);
	41	}

+34

-0

libs/phylogeny/gammaDistributionLaguerre.h less more

	0	// $Id: gammaDistribution.h 2768 2007-11-22 12:57:44Z osnatz $
	1
	2	#ifndef ___GAMMA_DIST_LAGUERRE
	3	#define ___GAMMA_DIST_LAGUERRE
	4	/************************************************************
	5	This distribution can take several forms depending on its free parameter alpha
	6	(beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution
	7	see http://mathworld.wolfram.com/GammaDistribution.html.
	8	please note that the borders of the categories are defined according to calculation of
	9	the gamma integral, according to numerical recipes in gammaUtilities
	10	_globalRate represents the rate for two joint genes.
	11	************************************************************/
	12	#include "definitions.h"
	13	#include "generalGammaDistributionLaguerre.h"
	14	#include "errorMsg.h"
	15
	16	class gammaDistributionLaguerre : public generalGammaDistributionLaguerre {
	17
	18	public:
	19	explicit gammaDistributionLaguerre() {}
	20	explicit gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories);
	21	explicit gammaDistributionLaguerre(const gammaDistributionLaguerre& other);
	22	virtual ~gammaDistributionLaguerre() {}
	23	virtual distribution* clone() const { return new gammaDistributionLaguerre(*this); }
	24
	25	virtual void setAlpha(MDOUBLE newAlpha);
	26	virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1);
	27	virtual void change_number_of_categories(int in_number_of_categories);
	28	// to prevent the user from using alpha!=beta
	29	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	30	virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistributionLaguerre::setBeta : can not set beta because alpha=beta");
	31	}
	32	};
	33	#endif

+13

-0

libs/phylogeny/gammaDistributionPlusInvariant.cpp less more

	0	#include "gammaDistributionPlusInvariant.h"
	1
	2
	3
	4
	5	//#define RATE_INVARIANT 1e-10
	6
	7
	8
	9
	10
	11
	12

+35

-0

libs/phylogeny/gammaDistributionPlusInvariant.h less more

	0	#ifndef ___GAMMA_DIST_PLUSINV
	1	#define ___GAMMA_DIST_PLUSINV
	2	/************************************************************
	3	This class describes a combination of a predefined dsitrubtion ,
	4	with an additional invariant category of probability _Pinv
	5	This category is always the last rate category (i.e., rate(categories()) == 0)
	6	************************************************************/
	7	#include "definitions.h"
	8	#include "distributionPlusInvariant.h"
	9	#include "distribution.h"
	10	#include "gammaDistribution.h"
	11	#include "errorMsg.h"
	12	#include "gammaUtilities.h"
	13	#include "logFile.h"
	14	#include <cmath>
	15
	16
	17
	18	class gammaDistributionPlusInvariant : public distributionPlusInvariant {
	19	public:
	20	explicit gammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){}
	21	explicit gammaDistributionPlusInvariant();
	22	gammaDistributionPlusInvariant(const gammaDistributionPlusInvariant& other) {(*this) = other;}
	23	//virtual gammaDistributionPlusInvariant& operator=(const gammaDistributionPlusInvariant& other);
	24	gammaDistributionPlusInvariant* clone() const {return new gammaDistributionPlusInvariant(*this);}
	25	virtual ~gammaDistributionPlusInvariant(){}
	26
	27
	28
	29	// get GammaDistribution params
	30	virtual void setAlpha(MDOUBLE newAlpha) {return static_cast<gammaDistribution*>(_pBaseDist)->setAlpha(newAlpha);};
	31	virtual MDOUBLE getAlpha() const {return static_cast<gammaDistribution*>(_pBaseDist)->getAlpha();}
	32
	33	};
	34	#endif

+170

-0

libs/phylogeny/gammaUtilities.cpp less more

	0	// $Id: gammaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "gammaUtilities.h"
	3	#include "logFile.h"
	4	#include "errorMsg.h"
	5	#include <cmath>
	6
	7
	8	//gser: returns the incomplete Gamma function evaluated by its series representation
	9	void gser(MDOUBLE gamser, MDOUBLE a, MDOUBLE x, MDOUBLE gln)
	10	{
	11	//MDOUBLE gammln(MDOUBLE xx);
	12
	13	int n;
	14	MDOUBLE sum,del,ap;
	15
	16	*gln=gammln(a);
	17	if (x <= 0.0) {
	18	if (x < 0.0) LOG(1,<<"x less than 0 in routine gser");
	19	*gamser=0.0;
	20	return;
	21	} else {
	22	ap=a;
	23	del=sum=1.0/a;
	24	for (n=1;n<=ITMAX;n++) {
	25	++ap;
	26	del *= x/ap;
	27	sum += del;
	28	if (fabs(del) < fabs(sum)*EPS) {
	29	gamser=sumexp(-x+alog(x)-(gln));
	30	return;
	31	}
	32	}
	33	LOG(1,<<"Too many interations in routine gser");
	34	return;
	35	}
	36	}
	37
	38	//gcf: returns the complement of the incomplete Gamma function evaluated by its continued fraction representation
	39	void gcf(MDOUBLE gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE gln)
	40	{
	41	//MDOUBLE gammln(MDOUBLE xx);
	42	int i;
	43	MDOUBLE an,b,c,d,del,h;
	44
	45	*gln=gammln(a);
	46	b=x+1.0-a;
	47	c=1.0/FPMIN;
	48	d=1.0/b;
	49	h=d;
	50	for (i=1;i<=ITMAX;i++) {
	51	an = -i*(i-a);
	52	b += 2.0;
	53	d=an*d+b;
	54	if (fabs(d) < FPMIN) d=FPMIN;
	55	c=b+an/c;
	56	if (fabs(c) < FPMIN) c=FPMIN;
	57	d=1.0/d;
	58	del=d*c;
	59	h *= del;
	60	if (fabs(del-1.0) < EPS) break;
	61	}
	62	if (i > ITMAX) LOG(1,<<"a too large, ITMAX too small in gcf");
	63	gammcf=exp(-x+alog(x)-(gln))h;
	64	}
	65
	66	//gammp(a, x): computes the incomplete Gamma function which is:
	67	// 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt)
	68	//gammp can be computed in two different ways: by a series representation (gser(..))
	69	//or by a continued fraction representation (gcf(..))
	70	//gammp chooses to function will be used, according to the values of a and x
	71	MDOUBLE gammp(MDOUBLE a, MDOUBLE x)
	72	{
	73	//void gcf(MDOUBLE gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	74	//void gser(MDOUBLE gamser, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	75	MDOUBLE gamser,gammcf,gln;
	76
	77	if (x < 0.0 \|\| a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp");
	78	if (x < (a+1.0)) {
	79	gser(&gamser,a,x,&gln);
	80	return gamser;
	81	} else {
	82	gcf(&gammcf,a,x,&gln);
	83	return 1.0-gammcf;
	84	}
	85	}
	86
	87
	88
	89	//I add////////////
	90
	91
	92	MDOUBLE gammq(MDOUBLE a, MDOUBLE x)
	93	{
	94	void gcf(MDOUBLE gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	95	void gser(MDOUBLE gamser, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	96	MDOUBLE gamser,gammcf,gln;
	97
	98	if (x < 0.0 \|\| a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp");
	99	if (x < (a+1.0)) {
	100	gser(&gamser,a,x,&gln);
	101	return 1.0 - gamser;
	102	} else {
	103	gcf(&gammcf,a,x,&gln);
	104	return gammcf;
	105	}
	106	}
	107	/*************************************************************************
	108	// this function computed the ln of the gamma function
	109	// The Gamma funnction: Gamma(xx) = integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt.
	110	*************************************************************************/
	111	MDOUBLE gammln(MDOUBLE xx)
	112	{
	113	MDOUBLE x,y,tmp,ser;
	114	static MDOUBLE cof[6]={
	115	static_cast<MDOUBLE>(76.18009172947146),
	116	static_cast<MDOUBLE>(-86.50532032941677),
	117	static_cast<MDOUBLE>(24.01409824083091),
	118	static_cast<MDOUBLE>(-1.231739572450155),
	119	static_cast<MDOUBLE>(0.1208650973866179e-2),
	120	static_cast<MDOUBLE>(-0.5395239384953e-5)
	121	};
	122	int j;
	123
	124	y=x=xx;
	125	tmp=x+5.5;
	126	tmp -= (x+0.5)*log(tmp);
	127	ser=1.000000000190015f;
	128	for (j=0;j<6;j++) ser += cof[j]/++y;
	129	return -tmp+log(2.5066282746310005*ser/x);
	130	}
	131
	132	//
	133	MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson)
	134	{
	135	return (search_for_z_in_dis_with_beta_1(alpha,ahoson)/beta);
	136	}
	137
	138	MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson)
	139	{
	140	if ( ahoson>1 \|\| ahoson<0 ) errorMsg::reportError("Error in function search_for_z_in_dis_with_beta_1");
	141	MDOUBLE left=0;
	142	MDOUBLE right=99999.0;
	143	MDOUBLE tmp=5000.0;
	144	MDOUBLE results=0.0;
	145
	146	for (int i=0;i<100000000 ; i++)
	147	{
	148	results=gammp(alpha,tmp);
	149	if (fabs(ahoson-results)<ERR_FOR_GAMMA_CALC) {
	150	return tmp;
	151	}
	152	if (results>ahoson) {
	153	right=tmp;
	154	}
	155	else left=tmp;
	156	tmp=(right+left)/2;
	157	}
	158	cout << "ERROR in search_for_z_in_dis_with_beta_1() Alpha is: "<< alpha <<endl;
	159	errorMsg::reportError("Error in function search_for_z_in_dis_with_beta_1 - first bonderi is 0");// also quit the program
	160	return 0;
	161	}
	162
	163	MDOUBLE the_avarage_r_in_category_between_a_and_b(MDOUBLE left, MDOUBLE right, MDOUBLE alpha, MDOUBLE beta, int k)
	164	{// and and b are the border of percentile k)
	165	MDOUBLE tmp;
	166	tmp= gammp(alpha+1,rightbeta) - gammp(alpha+1,leftbeta);
	167	tmp= (tmpalpha/beta)k;
	168	return tmp;
	169	}

+48

-0

libs/phylogeny/gammaUtilities.h less more

	0	// $Id: gammaUtilities.h 10963 2012-09-19 04:39:35Z cohenofi $
	1
	2	#ifndef ___GAMMA_UTILITIES
	3	#define ___GAMMA_UTILITIES
	4
	5	#include "definitions.h"
	6	#include "numRec.h" //fot the ITMAX
	7
	8	/******************************************************************************
	9	gamma utilities include calculating ln gamma and integral of gamma.
	10	used mainly in building the gamma function and creating categories within it
	11	******************************************************************************/
	12
	13	//gammln(xx): computes the ln of the Gamma function
	14	//the Gamma function is the integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt.
	15	MDOUBLE gammln(MDOUBLE xx);
	16
	17	//gammp(a, x): computes the incomplete Gamma function which is:
	18	// 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt)
	19	//gammp can be computed in two different ways: by a series representation (gser(..))
	20	//or by a continued fraction representation (gcf(..))
	21	//gammp chooses to function will be used, according to the values of a and x
	22	MDOUBLE gammp(MDOUBLE a, MDOUBLE x);
	23	void gser(MDOUBLE gamser, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	24	void gcf(MDOUBLE gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE gln);
	25
	26	MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson);
	27	MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson);
	28	MDOUBLE the_avarage_r_in_category_between_a_and_b(MDOUBLE a, MDOUBLE b, MDOUBLE alpha, MDOUBLE beta, int k);
	29
	30	//const int ITMAX = 100;
	31	const MDOUBLE EPS = static_cast<MDOUBLE>(0.0000003);
	32	const MDOUBLE FPMIN = static_cast<MDOUBLE>(1.0e-30);
	33	const MDOUBLE ERR_FOR_GAMMA_CALC = static_cast<MDOUBLE>(0.00001);
	34	const MDOUBLE MINIMUM_ALPHA_PARAM = static_cast<MDOUBLE>(0.01); //was 0.05
	35	const MDOUBLE MAXIMUM_ALPHA_PARAM = static_cast<MDOUBLE>(20.0); //was 10.0, when the distribution is more 'gaussian' and uniform , need higher alpha
	36	const MDOUBLE MINIMUM_BETA_PARAM = static_cast<MDOUBLE>(0.01); //was 0.05
	37	const MDOUBLE MAXIMUM_BETA_PARAM = static_cast<MDOUBLE>(20.0); // was 5.0, require high values for scaling
	38
	39
	40
	41	//gammq(a, x) : computes 1 - the incomplete Gamma function (1-gammp(a,x)) which is:
	42	//1/Gamma(a) * (the integral from infinite to x of (t^(a-1)*e^(-t)) dt).
	43	//use for computing Chi-Square probability function (for the LRT):
	44	//chiSquareProb(df,chiSquare) = gammq(df/2.0,chiSquare/2.0)
	45	MDOUBLE gammq(MDOUBLE a, MDOUBLE x);
	46
	47	#endif

+115

-0

libs/phylogeny/generalGammaDistribution.cpp less more

	0	// $Id: generalGammaDistribution.cpp 2768 2007-11-22 12:57:44Z osnatz $
	1
	2	#include "generalGammaDistribution.h"
	3	#include "gammaUtilities.h"
	4	#include "errorMsg.h"
	5	#include "logFile.h"
	6	#include <cmath>
	7
	8
	9	generalGammaDistribution::generalGammaDistribution() :
	10	_alpha(0.0),
	11	_beta(0.0),
	12	_globalRate(1.0)
	13	{
	14	_bonderi.resize(0,0);
	15	_rates.resize(0,0);
	16	_ratesProb.resize(0,0);
	17	}
	18
	19	generalGammaDistribution::generalGammaDistribution(const generalGammaDistribution& other) :
	20
	21	_alpha(other._alpha),
	22	_beta(other._beta),
	23	_rates(other._rates),
	24	_ratesProb(other._ratesProb),
	25	_globalRate(other._globalRate),
	26	_bonderi(other._bonderi)
	27	{}
	28
	29
	30	generalGammaDistribution::generalGammaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories) :
	31	_globalRate(1.0)
	32	{
	33	setGammaParameters(in_number_of_categories,alpha,beta);
	34	}
	35
	36	void generalGammaDistribution::setAlpha(MDOUBLE in_alpha) {
	37	if (in_alpha == _alpha)
	38	return;
	39	setGammaParameters(categories(), in_alpha, _beta);
	40	}
	41
	42	void generalGammaDistribution::setBeta(MDOUBLE in_beta) {
	43	if (in_beta == _beta)
	44	return;
	45	setGammaParameters( categories(), _alpha, in_beta);
	46	}
	47
	48	void generalGammaDistribution::change_number_of_categories(int in_number_of_categories) {
	49	if (in_number_of_categories == categories())
	50	return;
	51	setGammaParameters( in_number_of_categories, _alpha, _beta);
	52	}
	53
	54	void generalGammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
	55	if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
	56	return;
	57
	58
	59	if (in_alpha < MINIMUM_ALPHA_PARAM)
	60	in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
	61	if (in_beta < MINIMUM_ALPHA_PARAM)
	62	in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
	63
	64	_alpha = in_alpha;
	65	_beta = in_beta;
	66	_rates.clear();
	67	_rates.resize(in_number_of_categories);
	68	_ratesProb.clear();
	69	_ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories);
	70	_bonderi.clear();
	71	_bonderi.resize(in_number_of_categories+1);
	72	if (in_number_of_categories==1) {
	73	_rates[0] = 1.0;
	74	return;
	75	}
	76	if (categories() > 1) {
	77	fill_mean();
	78	return ;
	79	}
	80
	81	}
	82	void generalGammaDistribution::fill_mean() {
	83	fill_bonderi();
	84	int i;
	85	//for (i=0; i<=categories(); ++i) cout<<endl<<bonderi[i];
	86	//LOG(5,<<"\n====== the r categories are =====\n");
	87	for (i=0; i<categories(); ++i) {
	88	_rates[i]=the_avarage_r_in_category_between_a_and_b(_bonderi[i], _bonderi[i+1], _alpha, _beta, categories());
	89	//LOG(5,<<meanG[i]<<endl);
	90	}
	91	//LOG(5,<<endl<<alpha<<endl);
	92	//return 0;
	93	}
	94
	95	void generalGammaDistribution::fill_bonderi() {
	96	int i;
	97	for (i=1; i<categories(); ++i)
	98	{
	99	_bonderi[i]=search_for_z_in_dis_with_any_beta(_alpha, _beta,static_cast<MDOUBLE>(i)/categories());
	100	}
	101	_bonderi[0]=0;
	102	_bonderi[i]=VERYBIG/10000.0;// this is becuase we multiply bondei[i] by alpha or beta, and
	103	// by this manipulation we avoid overflows...;
	104
	105	//return 0;
	106	}
	107
	108
	109	const MDOUBLE generalGammaDistribution::getCumulativeProb(const MDOUBLE x) const
	110	{//
	111	//since r~gamma(alpha, beta) then beta*r~ gamma(alpha,1)=gammp
	112	//here we assume alpha=beta
	113	return gammp(_alpha, x*_beta);
	114	}

+61

-0

libs/phylogeny/generalGammaDistribution.h less more

	0	// $Id: generalGammaDistribution.h 3044 2007-12-18 15:54:50Z itaymay $
	1
	2	#ifndef ___GENERAL_GAMMA_DIST
	3	#define ___GENERAL_GAMMA_DIST
	4	/************************************************************
	5	This distribution can take several forms depending on its free parameters alpha,beta
	6	(unalike gammaDist. alpha is not necessarily equal to beta).
	7	For an extensive exlpanation of this distribution
	8	see http://mathworld.wolfram.com/GammaDistribution.html
	9	************************************************************/
	10	#include "definitions.h"
	11	#include "distribution.h"
	12
	13	enum quadratureType {QUANTILE, LAGUERRE};
	14
	15	class generalGammaDistribution : public distribution {
	16
	17	public:
	18	explicit generalGammaDistribution();
	19	explicit generalGammaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories);
	20	explicit generalGammaDistribution(const generalGammaDistribution& other);
	21	virtual ~generalGammaDistribution() {}
	22	virtual distribution* clone() const { return new generalGammaDistribution(*this); }
	23
	24	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	25	virtual const int categories() const {return _rates.size();}
	26	virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
	27	virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
	28
	29	virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
	30	virtual MDOUBLE getGlobalRate()const {return _globalRate;}
	31	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	32	virtual void setAlpha(MDOUBLE newAlpha);
	33	virtual MDOUBLE getAlpha() const {return _alpha;}
	34	virtual void setBeta(MDOUBLE newBeta);
	35	virtual MDOUBLE getBeta() const {return _beta;}
	36	virtual void change_number_of_categories(int in_number_of_categories);
	37	virtual MDOUBLE getBorder(const int i) const {return _bonderi[i];} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
	38
	39	virtual Vdouble getBorders() const {return _bonderi;}
	40	virtual Vdouble getRates() const {return _rates;}
	41
	42	protected:
	43	virtual void fill_mean();
	44	virtual void fill_bonderi();
	45
	46
	47	protected:
	48	MDOUBLE _alpha;
	49	MDOUBLE _beta;
	50
	51	vector<MDOUBLE> _rates;
	52	vector<MDOUBLE> _ratesProb;
	53	MDOUBLE _globalRate;
	54	vector<MDOUBLE> _bonderi; //Note: _bonderi[0] = 0, _bondery[categories()] = infinite
	55	};
	56
	57
	58
	59	#endif
	60

+360

-0

libs/phylogeny/generalGammaDistributionFixedCategories.cpp less more

	0	#include "generalGammaDistributionFixedCategories.h"
	1	#include "errorMsg.h"
	2	#include "gammaUtilities.h"
	3
	4
	5	generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta) :
	6	generalGammaDistribution()
	7	{
	8	_alpha = alpha;
	9	_beta = beta;
	10	setFixedCategories(fixedBoundaries);
	11	}
	12
	13	generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) :
	14	generalGammaDistribution()
	15	{
	16	if ((fixedRates.size() + 1) != boundaries.size())
	17	errorMsg::reportError("error in generalGammaDistributionFixedCategories constructor");
	18	_alpha = alpha;
	19	_beta = beta;
	20	_rates = fixedRates;
	21	_bonderi = boundaries;
	22	computeRatesProbs();
	23	}
	24
	25
	26
	27	generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum)
	28	: generalGammaDistribution()
	29	{
	30	_alpha = alpha;
	31	_beta = beta;
	32	setDefaultBoundaries(catNum);
	33	}
	34
	35
	36
	37	generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const generalGammaDistributionFixedCategories& other)
	38	: generalGammaDistribution(other)
	39	{}
	40	void generalGammaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
	41	{
	42	setDefaultBoundaries(in_number_of_categories);
	43	}
	44
	45
	46	void generalGammaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){
	47
	48	if (fixedBoundaries.size()<2)
	49	errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required");
	50	if (fixedBoundaries[0] > 0.0)
	51	errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero");
	52
	53	_bonderi = fixedBoundaries;
	54	if (_bonderi[_bonderi.size()] > VERYBIG/10000.0)
	55	_bonderi[_bonderi.size()] = VERYBIG/10000.0; // to avoid overflow
	56
	57	setFixedCategories();
	58	}
	59
	60	void generalGammaDistributionFixedCategories::setFixedCategories() {
	61	fill_mean();
	62	computeRatesProbs();
	63	}
	64
	65	void generalGammaDistributionFixedCategories::fill_mean()
	66	{
	67	int numOfCategories = _bonderi.size()-1;
	68	if (numOfCategories == 0)
	69	errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized");
	70	_rates.clear();
	71	_rates.resize(numOfCategories,0.0);
	72	int cat;
	73	for (cat=0; cat<numOfCategories-1; ++cat) {
	74	_rates[cat] = (_bonderi[cat]+_bonderi[cat+1])/2.0;
	75	}
	76	if (numOfCategories>1) {
	77	//the rate of the last category cannot be the middle of its boundaries, since the upper bound is infinite
	78	MDOUBLE increment = _bonderi[cat] - _rates[cat-1];
	79	_rates[cat] = _bonderi[cat] + 2*increment;
	80	} else {
	81	_rates[0] = 1;
	82	}
	83	}
	84
	85
	86	// this function is here to override the inherited function
	87	// note that the rates themselves and the boundaries do not change.
	88	// the number of categories cannot be changed, since fixed categories must be given before
	89	void generalGammaDistributionFixedCategories::setGammaParameters (int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
	90	if (in_number_of_categories==1) {
	91	_rates[0] = 1.0;
	92	return;
	93	}
	94	if (in_number_of_categories != categories())
	95	errorMsg::reportError("generalGammaDistributionFixedCategories::setGammaParameters: the number of categories cannot be changed, first call setFixedCategories");
	96	if ((in_alpha == _alpha) && (in_beta == _beta))
	97	return;
	98
	99	if (in_alpha < MINIMUM_ALPHA_PARAM)
	100	in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflow problems
	101	if (in_beta < MINIMUM_ALPHA_PARAM)
	102	in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
	103
	104	_alpha = in_alpha;
	105	_beta = in_beta;
	106	computeRatesProbs();
	107	}
	108
	109	void generalGammaDistributionFixedCategories::computeRatesProbs(){
	110	MDOUBLE totalProb = 0.0;
	111	MDOUBLE catProb = 0.0;
	112	MDOUBLE lowerBoundaryProb = 0.0;
	113	MDOUBLE upperBoundaryProb = 0.0;
	114	int cat;
	115	_ratesProb.clear();
	116	_ratesProb.resize(categories());
	117	for (cat = 0; cat < categories()-1; ++cat) {
	118	upperBoundaryProb = getCumulativeProb(_bonderi[cat+1]);
	119	catProb = upperBoundaryProb - lowerBoundaryProb;
	120	_ratesProb[cat] = catProb;
	121	totalProb += catProb;
	122	lowerBoundaryProb = upperBoundaryProb;
	123	}
	124	_ratesProb[cat] = 1.0 - totalProb;
	125	}
	126
	127	void generalGammaDistributionFixedCategories::setDefaultBoundaries(int catNum)
	128	{
	129	_bonderi.clear();
	130	_bonderi.resize(catNum+1,0.0);
	131	_bonderi[0] = 0;
	132	_bonderi[catNum] = VERYBIG/10000.0; //to avoid overflow
	133	switch (catNum)
	134	{
	135	case 1:
	136	break;
	137	case 2:
	138	_bonderi[1] = 1.0;
	139	break;
	140	case 3:
	141	_bonderi[1] = 0.5;
	142	_bonderi[2] = 1.0;
	143	break;
	144	case 4:
	145	_bonderi[1] = 0.5;
	146	_bonderi[2] = 1.0;
	147	_bonderi[3] = 1.5;
	148	break;
	149	case 5:
	150	_bonderi[1] = 0.4;
	151	_bonderi[2] = 0.8;
	152	_bonderi[3] = 1.2;
	153	_bonderi[4] = 1.6;
	154	break;
	155	case 10:
	156	_bonderi[1] = 0.01;
	157	_bonderi[2] = 0.1;
	158	_bonderi[3] = 0.25;
	159	_bonderi[4] = 0.55;
	160	_bonderi[5] = 0.95;
	161	_bonderi[6] = 1.5;
	162	_bonderi[7] = 3.0;
	163	_bonderi[8] = 5.0;
	164	_bonderi[9] = 7.0;
	165	break;
	166	case 16:
	167	_bonderi[1] = 0.001;
	168	_bonderi[2] = 0.01;
	169	_bonderi[3] = 0.1;
	170	_bonderi[4] = 0.15;
	171	_bonderi[5] = 0.35;
	172	_bonderi[6] = 0.55;
	173	_bonderi[7] = 0.75;
	174	_bonderi[8] = 0.95;
	175	_bonderi[9] = 1.5;
	176	_bonderi[10] = 3.0;
	177	_bonderi[11] = 4.5;
	178	_bonderi[12] = 6.0;
	179	_bonderi[13] = 7.5;
	180	_bonderi[14] = 9.0;
	181	_bonderi[15] = 12.0;
	182	break;
	183	default:
	184	errorMsg::reportError("error in generalGammaDistributionFixedCategories::setDefaultBoundaries");
	185	}
	186
	187	setFixedCategories();
	188	}
	189
	190	//void generalGammaDistributionFixedCategories::getDefaultRates(int catNum, Vdouble& fixedRates)
	191	//{
	192	// fixedRates.resize(catNum, 0.0);
	193	// switch (catNum)
	194	// {
	195	// case 1:
	196	// fixedRates[0] = 1.0;
	197	// break;
	198	// case 2:
	199	// fixedRates[0] = 0.5;
	200	// fixedRates[1] = 1.5;
	201	// break;
	202	// case 3:
	203	// fixedRates[0] = 0.05;
	204	// fixedRates[1] = 0.5;
	205	// fixedRates[2] = 1.5;
	206	// break;
	207	// case 5:
	208	// fixedRates[0] = 0.05;
	209	// fixedRates[1] = 0.3;
	210	// fixedRates[2] = 0.6;
	211	// fixedRates[3] = 1.5;
	212	// fixedRates[4] = 5.0;
	213	// break;
	214	// case 8:
	215	// fixedRates[0] = 0.05;
	216	// fixedRates[1] = 0.15;
	217	// fixedRates[2] = 0.35;
	218	// fixedRates[3] = 0.6;
	219	// fixedRates[4] = 0.85;
	220	// fixedRates[5] = 1.5;
	221	// fixedRates[6] = 3.0;
	222	// fixedRates[7] = 5.0;
	223	// break;
	224	// case 12:
	225	// fixedRates[0] = 0.05;
	226	// fixedRates[1] = 0.15;
	227	// fixedRates[2] = 0.35;
	228	// fixedRates[3] = 0.55;
	229	// fixedRates[4] = 0.75;
	230	// fixedRates[5] = 0.95;
	231	// fixedRates[6] = 1.5;
	232	// fixedRates[7] = 3.0;
	233	// fixedRates[8] = 4.5;
	234	// fixedRates[9] = 6.0;
	235	// fixedRates[10] = 7.5;
	236	// fixedRates[11] = 9.0;
	237	// break;
	238	// case 16:
	239	// fixedRates[0] = 0.00000001;
	240	// fixedRates[1] = 0.001;
	241	// fixedRates[2] = 0.01;
	242	// fixedRates[3] = 0.1;
	243	// fixedRates[4] = 0.15;
	244	// fixedRates[5] = 0.35;
	245	// fixedRates[6] = 0.55;
	246	// fixedRates[7] = 0.75;
	247	// fixedRates[8] = 0.95;
	248	// fixedRates[9] = 1.5;
	249	// fixedRates[10] = 3.0;
	250	// fixedRates[11] = 4.5;
	251	// fixedRates[12] = 6.0;
	252	// fixedRates[13] = 7.5;
	253	// fixedRates[14] = 9.0;
	254	// fixedRates[15] = 12.0;
	255	// break;
	256	// case 24:
	257	// fixedRates[0] = 0.000000000000001;
	258	// fixedRates[1] = 1;
	259	// fixedRates[2] = 2;
	260	// fixedRates[3] = 3;
	261	// fixedRates[4] = 4;
	262	// fixedRates[5] = 5;
	263	// fixedRates[6] = 6;
	264	// fixedRates[7] = 7;
	265	// fixedRates[8] = 8;
	266	// fixedRates[9] = 9;
	267	// fixedRates[10] = 10;
	268	// fixedRates[11] = 11;
	269	// fixedRates[12] = 12;
	270	// fixedRates[13] = 13;
	271	// fixedRates[14] = 14;
	272	// fixedRates[15] = 15;
	273	// fixedRates[16] = 16;
	274	// fixedRates[17] = 17;
	275	// fixedRates[18] = 18;
	276	// fixedRates[19] = 19;
	277	// fixedRates[20] = 20;
	278	// fixedRates[21] = 21;
	279	// fixedRates[22] = 22;
	280	// fixedRates[23] = 23;
	281	// break;
	282	// case 32:
	283	// fixedRates[0] = 0.00000001;
	284	// fixedRates[1] = 0.0000001;
	285	// fixedRates[2] = 0.000001;
	286	// fixedRates[3] = 0.00001;
	287	// fixedRates[4] = 0.0001;
	288	// fixedRates[5] = 0.001;
	289	// fixedRates[6] = 0.01;
	290	// fixedRates[7] = 0.1;
	291	// fixedRates[8] = 0.15;
	292	// fixedRates[9] = 0.2;
	293	// fixedRates[10] = 0.25;
	294	// fixedRates[11] = 0.3;
	295	// fixedRates[12] = 0.35;
	296	// fixedRates[13] = 0.4;
	297	// fixedRates[14] = 0.45;
	298	// fixedRates[15] = 0.5;
	299	// fixedRates[16] = 0.6;
	300	// fixedRates[17] = 0.7;
	301	// fixedRates[18] = 0.8;
	302	// fixedRates[19] = 0.9;
	303	// fixedRates[20] = 1.0;
	304	// fixedRates[21] = 1.2;
	305	// fixedRates[22] = 1.4;
	306	// fixedRates[23] = 1.6;
	307	// fixedRates[24] = 1.8;
	308	// fixedRates[25] = 2.0;
	309	// fixedRates[26] = 2.5;
	310	// fixedRates[27] = 3.0;
	311	// fixedRates[28] = 4.0;
	312	// fixedRates[29] = 5.0;
	313	// fixedRates[30] = 7.5;
	314	// fixedRates[31] = 15.0;
	315	// break;
	316	// case 36:
	317	// fixedRates[0] = 0.00000001;
	318	// fixedRates[1] = 0.0000001;
	319	// fixedRates[2] = 0.000001;
	320	// fixedRates[3] = 0.00001;
	321	// fixedRates[4] = 0.0001;
	322	// fixedRates[5] = 0.001;
	323	// fixedRates[6] = 0.01;
	324	// fixedRates[7] = 0.1;
	325	// fixedRates[8] = 0.15;
	326	// fixedRates[9] = 0.2;
	327	// fixedRates[10] = 0.25;
	328	// fixedRates[11] = 0.3;
	329	// fixedRates[12] = 0.35;
	330	// fixedRates[13] = 0.4;
	331	// fixedRates[14] = 0.45;
	332	// fixedRates[15] = 0.5;
	333	// fixedRates[16] = 0.6;
	334	// fixedRates[17] = 0.7;
	335	// fixedRates[18] = 0.8;
	336	// fixedRates[19] = 0.9;
	337	// fixedRates[20] = 1.0;
	338	// fixedRates[21] = 1.2;
	339	// fixedRates[22] = 1.4;
	340	// fixedRates[23] = 1.6;
	341	// fixedRates[24] = 1.8;
	342	// fixedRates[25] = 2.0;
	343	// fixedRates[26] = 2.5;
	344	// fixedRates[27] = 3.0;
	345	// fixedRates[28] = 4.0;
	346	// fixedRates[29] = 5.0;
	347	// fixedRates[30] = 7.5;
	348	// fixedRates[31] = 10.0;
	349	// fixedRates[32] = 12.5;
	350	// fixedRates[33] = 15.0;
	351	// fixedRates[34] = 20.0;
	352	// fixedRates[35] = 30.0;
	353	// break;
	354	//
	355	// default:
	356	// errorMsg::reportError("error in generalGammaDistributionFixedCategories::getFixedCategories");
	357	// }
	358	//
	359	//}

+36

-0

libs/phylogeny/generalGammaDistributionFixedCategories.h less more

	0	#ifndef ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES
	1	#define ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES
	2	/************************************************************
	3	This class differ from the regular generalGammaDistribution in that
	4	the rateCategories are fixed according to the user's decision.
	5	Thus, only the probability of each category change for each specific alpha and beta values but
	6	the rate categories themselves are constant.
	7	************************************************************/
	8	#include "definitions.h"
	9	#include "generalGammaDistribution.h"
	10	#include "errorMsg.h"
	11	class generalGammaDistributionFixedCategories : public generalGammaDistribution {
	12
	13	public:
	14	explicit generalGammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta);
	15	explicit generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta);
	16	explicit generalGammaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum);
	17	explicit generalGammaDistributionFixedCategories(const generalGammaDistributionFixedCategories& other);
	18	virtual ~generalGammaDistributionFixedCategories() {}
	19	virtual distribution* clone() const { return new generalGammaDistributionFixedCategories(*this); }
	20	virtual void change_number_of_categories(int in_number_of_categories);
	21	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	22	virtual void setFixedCategories(const Vdouble& fixedBoundaries);
	23
	24	protected:
	25	virtual void setDefaultBoundaries(int catNum);
	26	virtual void setFixedCategories();
	27	virtual void fill_mean();
	28	virtual void computeRatesProbs();
	29
	30	};
	31
	32
	33
	34	#endif
	35

+113

-0

libs/phylogeny/generalGammaDistributionLaguerre.cpp less more

	0	// $Id: generalGammaDistributionLaguerre.cpp 2865 2007-11-27 11:00:26Z itaymay $
	1	#include "generalGammaDistributionLaguerre.h"
	2	#include "gammaUtilities.h"
	3	#include "errorMsg.h"
	4	#include "GLaguer.h"
	5	#include <cmath>
	6
	7	generalGammaDistributionLaguerre::generalGammaDistributionLaguerre()
	8	: generalGammaDistribution()
	9	{
	10	}
	11
	12	generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other) :
	13	generalGammaDistribution(other)
	14	{
	15	}
	16
	17	generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories)
	18	: generalGammaDistribution()
	19	{
	20	//The Laguerre function returns NULL values for very large numebr of categories (for example 700 categories with alpha = 1.5 and beta = 1.3)
	21	// if (in_number_of_categories > 200)
	22	// errorMsg::reportError("generalGammaDistributionLaguerre cannot work with more than 200 categories");
	23	_globalRate=1.0;
	24	setGammaParameters(in_number_of_categories,alpha,beta);
	25	}
	26
	27	generalGammaDistributionLaguerre::~generalGammaDistributionLaguerre()
	28	{
	29	}
	30
	31
	32	void generalGammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
	33	if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
	34	return;
	35
	36
	37	if (in_alpha < MINIMUM_ALPHA_PARAM)
	38	in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
	39	if (in_beta < MINIMUM_ALPHA_PARAM)
	40	in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
	41
	42	_alpha = in_alpha;
	43	_beta = in_beta;
	44	_rates.clear();
	45	//_rates.resize(in_number_of_categories);
	46	_rates.resize(0);
	47	_ratesProb.clear();
	48	//_ratesProb.resize(in_number_of_categories);
	49	_ratesProb.resize(0);
	50	if (in_number_of_categories==1) {
	51	_rates.push_back(1.0);
	52	_ratesProb.push_back(1.0);
	53	return;
	54	}
	55	if (in_number_of_categories > 1) {
	56	fillRatesAndProbs(in_number_of_categories);
	57	return ;
	58	}
	59
	60	}
	61
	62
	63	MDOUBLE generalGammaDistributionLaguerre::getBorder(const int i) const
	64	{
	65	errorMsg::reportError("With the Laguerre method the categories do not have a well defined border");
	66	return -1;
	67	}
	68
	69
	70	void generalGammaDistributionLaguerre::fillRatesAndProbs(int catNum)
	71	{
	72	Vdouble weights, abscissas;
	73	GLaguer lg(catNum, _alpha - 1, abscissas, weights);
	74	MDOUBLE sumP = 0.0;
	75
	76	MDOUBLE gamAlpha = exp(gammln(_alpha));
	77	for (int i = 0; i < catNum; ++i)
	78	{
	79	//if (sumP > 0.99)
	80	//{
	81	// _ratesProb.push_back(1-sumP);
	82	// _rates.push_back(abscissas[i] / _beta);
	83	// break;
	84	//}
	85
	86	_ratesProb.push_back(weights[i] / gamAlpha);
	87	_rates.push_back(abscissas[i] / _beta);
	88	sumP += _ratesProb[i];
	89	//cerr<<i<<" rate = "<<_rates[i]<<" Pr = "<<_ratesProb[i]<<" sum = "<<sumP<<endl;
	90	}
	91	for (int j = 0; j < _ratesProb.size(); ++j)
	92	{
	93	_ratesProb[j] /= sumP;
	94	}
	95	}
	96
	97
	98	/*
	99	void generalGammaDistributionLaguerre::fillRatesAndProbs(int catNum)
	100	{
	101	Vdouble weights, abscissas;
	102	GLaguer lg(categories(), _alpha - 1, abscissas, weights);
	103
	104	MDOUBLE gamAlpha = exp(gammln(_alpha));
	105	for (int i = 0; i < categories(); ++i)
	106	{
	107	_ratesProb[i] = weights[i] / gamAlpha;
	108	_rates[i] = abscissas[i] / _beta;
	109	}
	110	}
	111	*/
	112

+47

-0

libs/phylogeny/generalGammaDistributionLaguerre.h less more

	0	// $Id: generalGammaDistributionLaguerre.h 2865 2007-11-27 11:00:26Z itaymay $
	1	// version 1.00
	2	// last modified Sep 2004
	3
	4	#ifndef ___GENERAL_GAMMA_DIST_LAGUERRE
	5	#define ___GENERAL_GAMMA_DIST_LAGUERRE
	6	/************************************************************
	7	This class differ from the regular generalGammaDistribution in that
	8	the rateCategories and their probabilities are not constructed using Yang's quantile method.
	9	Instead the general Guass-Laguerre quadrature method is used.
	10	For example, if we want to compute the likelihood over the rate distribution,
	11	then we need to solve the integral
	12
	13	I[0_to_infinity]{P(data\|r)*P(r)}
	14	= I[0_to_infinity]{P(data\|r)b^a / Gamma(a) exp(-br) r^(a-1)dr} //a = alpha, b = beta
	15	= b^(a)/Gamma(a) * I[0_to_infinity]{P(data\|m/b) * exp(-m) * (m/b)^(a')/bdm} ///substitute m=b*r, a'=a-1
	16	= 1/Gamma(a) * I[0_to_infinity]{P(data\|m/b) * exp(-m) * m^a' dm} //
	17	Now - we can use the Guass-Laguerre formula, to get an approximation for the Integral.
	18	The Xj and Wj are the absicassas and weights of the Laguerre polynoms
	19	= 1/Gamma(a) * sum[j = 0_to_catNum]{P(data\|Xj/b) * Wj}
	20
	21	The rates are the Xj/b and their priors is Wj/Gamma(a)
	22	The quadrature method is explained in Numerical Recipes (Press et al.; chapter 4.5)
	23	and is also mentioned in Felsenstein 2001 (JME 53: 447-455).
	24	************************************************************/
	25	#include "definitions.h"
	26	#include "generalGammaDistribution.h"
	27	class generalGammaDistributionLaguerre : public generalGammaDistribution {
	28
	29	public:
	30	explicit generalGammaDistributionLaguerre();
	31	explicit generalGammaDistributionLaguerre(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories);
	32	explicit generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other);
	33	virtual ~generalGammaDistributionLaguerre();
	34	virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
	35
	36	virtual distribution* clone() const { return new generalGammaDistributionLaguerre(*this); }
	37	virtual MDOUBLE getBorder(const int i) const;
	38
	39	protected:
	40	virtual void fillRatesAndProbs(int catNum);
	41	};
	42
	43
	44
	45	#endif
	46

+13

-0

libs/phylogeny/generalGammaDistributionPlusInvariant.cpp less more

	0	#include "generalGammaDistributionPlusInvariant.h"
	1
	2
	3
	4
	5	//#define RATE_INVARIANT 1e-8 //1e-10
	6
	7
	8
	9
	10
	11
	12

+51

-0

libs/phylogeny/generalGammaDistributionPlusInvariant.h less more

	0	#ifndef __GENERAL_GAMMA_DIST_PLUSINV
	1	#define __GENERAL_GAMMA_DIST_PLUSINV
	2	/************************************************************
	3	This class describes a combination of a predefined dsitrubtion ,
	4	with an additional invariant category of probability _Pinv
	5	This category is always the last rate category (i.e., rate(categories()) == 0)
	6	************************************************************/
	7	#include "definitions.h"
	8	#include "distributionPlusInvariant.h"
	9	#include "distribution.h"
	10	#include "generalGammaDistribution.h"
	11	#include "errorMsg.h"
	12	#include "gammaUtilities.h"
	13	#include "logFile.h"
	14	#include <cmath>
	15
	16
	17
	18	class generalGammaDistributionPlusInvariant : public distributionPlusInvariant {
	19	public:
	20	explicit generalGammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){}
	21	explicit generalGammaDistributionPlusInvariant();
	22	generalGammaDistributionPlusInvariant(const generalGammaDistributionPlusInvariant& other) {(*this) = other;}
	23	//virtual generalGammaDistributionPlusInvariant& operator=(const generalGammaDistributionPlusInvariant& other);
	24	generalGammaDistributionPlusInvariant* clone() const {return new generalGammaDistributionPlusInvariant(*this);}
	25	virtual ~generalGammaDistributionPlusInvariant(){}
	26
	27	// distribution* getBaseDistribution(){return _pBaseDist;}
	28	////get/set the parameters of the mixture
	29	// const int categories() const;
	30	// void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
	31	// MDOUBLE getGlobalRate() const {return _globalRate;}
	32	// virtual void setInvProb(const MDOUBLE p) {_Pinv = p;}
	33	// const MDOUBLE getInvProb() const {return _Pinv;}
	34	//
	35	////get distribution statistics
	36	// virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	37	// virtual const MDOUBLE rates(const int category) const;
	38	// virtual const MDOUBLE ratesProb(const int i) const;
	39
	40	// get generalGammaDistribution params
	41	virtual void setAlpha(MDOUBLE newAlpha) {return static_cast<generalGammaDistribution*>(_pBaseDist)->setAlpha(newAlpha);};
	42	virtual MDOUBLE getAlpha() const {return static_cast<generalGammaDistribution*>(_pBaseDist)->getAlpha();}
	43	virtual void setBeta(MDOUBLE newBeta) {return static_cast<generalGammaDistribution*>(_pBaseDist)->setBeta(newBeta);};
	44	virtual MDOUBLE getBeta() const {return static_cast<generalGammaDistribution*>(_pBaseDist)->getBeta();}
	45	//protected:
	46	//MDOUBLE _globalRate;
	47	//MDOUBLE _Pinv;
	48	//distribution* _pBaseDist;
	49	};
	50	#endif

+49

-0

libs/phylogeny/geneticCodeHolder.cpp less more

	0	// $Id: geneticCodeHolder.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2
	3	#include "geneticCodeHolder.h"
	4
	5	const geneticCodeString geneticCodeHolder::nuclearStandard(
	6	#include "replacementMatrixSource/nuclearStandard.code"
	7	);
	8
	9	const geneticCodeString geneticCodeHolder::nuclearEuplotid(
	10	#include "replacementMatrixSource/nuclearEuplotid.code"
	11	);
	12
	13	const geneticCodeString geneticCodeHolder::nuclearCiliate(
	14	#include "replacementMatrixSource/nuclearCiliate.code"
	15	);
	16
	17	const geneticCodeString geneticCodeHolder::nuclearBlepharisma(
	18	#include "replacementMatrixSource/nuclearBlepharisma.code"
	19	);
	20
	21	const geneticCodeString geneticCodeHolder::mitochondriaYeast(
	22	#include "replacementMatrixSource/mitochondriaYeast.code"
	23	);
	24
	25	const geneticCodeString geneticCodeHolder::mitochondriaVertebrate(
	26	#include "replacementMatrixSource/mitochondriaVertebrate.code"
	27	);
	28
	29	const geneticCodeString geneticCodeHolder::mitochondriaProtozoan(
	30	#include "replacementMatrixSource/mitochondriaProtozoan.code"
	31	);
	32
	33	const geneticCodeString geneticCodeHolder::mitochondriaInvertebrate(
	34	#include "replacementMatrixSource/mitochondriaInvertebrate.code"
	35	);
	36
	37	const geneticCodeString geneticCodeHolder::mitochondriaFlatworm(
	38	#include "replacementMatrixSource/mitochondriaFlatworm.code"
	39	);
	40
	41	const geneticCodeString geneticCodeHolder::mitochondriaEchinoderm(
	42	#include "replacementMatrixSource/mitochondriaEchinoderm.code"
	43	);
	44
	45	const geneticCodeString geneticCodeHolder::mitochondriaAscidian(
	46	#include "replacementMatrixSource/mitochondriaAscidian.code"
	47	);
	48

+33

-0

libs/phylogeny/geneticCodeHolder.h less more

	0	// $Id: geneticCodeHolder.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___GENMATRIXHOLDER
	3	#define ___GENMATRIXHOLDER
	4
	5	#include <string>
	6	using namespace std;
	7
	8	// THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX
	9	// THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES.
	10
	11	class geneticCodeString {
	12	public:
	13	const string Val;
	14	explicit geneticCodeString(const char * str): Val(str){};
	15	};
	16
	17	class geneticCodeHolder {
	18	public:
	19	static const geneticCodeString nuclearStandard;
	20	static const geneticCodeString nuclearEuplotid;
	21	static const geneticCodeString nuclearCiliate;
	22	static const geneticCodeString nuclearBlepharisma;
	23	static const geneticCodeString mitochondriaYeast;
	24	static const geneticCodeString mitochondriaVertebrate;
	25	static const geneticCodeString mitochondriaProtozoan;
	26	static const geneticCodeString mitochondriaInvertebrate;
	27	static const geneticCodeString mitochondriaFlatworm;
	28	static const geneticCodeString mitochondriaEchinoderm;
	29	static const geneticCodeString mitochondriaAscidian;
	30	};
	31
	32	#endif // ___GENMATRIXHOLDER

+53

-0

libs/phylogeny/getRandomWeights.cpp less more

	0	// $Id: getRandomWeights.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "getRandomWeights.h"
	3	#include "talRandom.h"
	4
	5
	6
	7	void swapRand(Vdouble& weights) {
	8	int j;
	9	int i = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
	10	do {
	11	j = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
	12	} while ( weights[j] <= 0 );
	13
	14	weights[i]++;
	15	weights[j]--;
	16	}
	17
	18	void getRandomWeights::randomWeights(Vdouble& weights,
	19	const MDOUBLE expectedNumberOfSwapsPerPosition) {
	20	// note that some positions will change more than once, and some won't.
	21	// thus the second argument is an average of sites swaped
	22	int i;
	23	const double DefaultWeight = 1;
	24	for (i=0; i< weights.size(); ++i) weights[i] = DefaultWeight;
	25
	26	for ( i = 0 ; i < expectedNumberOfSwapsPerPosition*weights.size() ; ++i ) {
	27	swapRand(weights);
	28	}
	29	}
	30
	31	void getRandomWeights::standardBPWeights(Vdouble& weights) {
	32	int i;
	33	for (i=0; i< weights.size(); ++i) weights[i] = 0.0;
	34	for (i=0; i< weights.size(); ++i) {
	35	int k = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
	36	weights[k]++;
	37	}
	38	}
	39
	40	#define MIN_WEIGHT (0.00001)
	41	void getRandomWeights::randomWeightsGamma(Vdouble& weights,
	42	const MDOUBLE temperature) {
	43	int i;
	44	const double oneOverT = 1.0/temperature;
	45	for (i=0; i< weights.size(); ++i) {
	46	weights[i] = talRandom::SampleGamma(oneOverT,oneOverT);
	47	if (weights[i]<MIN_WEIGHT) {
	48	weights[i] = MIN_WEIGHT;
	49	}
	50	}
	51	}
	52

+31

-0

libs/phylogeny/getRandomWeights.h less more

	0	// $Id: getRandomWeights.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef __GET_RANDOM_WEIGHTS
	3	#define __GET_RANDOM_WEIGHTS
	4
	5	#include "definitions.h"
	6
	7
	8	class getRandomWeights {
	9	public:
	10	// this function starts with a vector of weights like that (1,1,1,1,1,1,...1)
	11	// it then take two positions by random
	12	// add 1 to the first, and substract 1 from the second.
	13	// if it can not substract 1 from the second, it draw a new "second"
	14	static void randomWeights(Vdouble& weights,
	15	const MDOUBLE expectedNumberOfSwapsPerPosition);
	16
	17	// a position is chosen randomly and the weight of this position is
	18	// sampled from a gamma distribution with parameters alpha = 1/temperature
	19	// and beta = 1/temperature.
	20	static void randomWeightsGamma(Vdouble& weights,
	21	const MDOUBLE temperature);
	22
	23	// this function starts with a vector of weights like that (0,0,0,...,0)
	24	// a position is chosen randomly and the weight of this position
	25	// is increased by 1. This process is repeated weights.size() times.
	26	static void standardBPWeights(Vdouble& weights);
	27	};
	28
	29	#endif
	30

+1055

-0

libs/phylogeny/getopt.c less more

	0	/* Getopt for GNU.
	1	NOTE: getopt is now part of the C library, so if you don't know what
	2	"Keep this file name-space clean" means, talk to drepper@gnu.org
	3	before changing it!
	4	Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
	5	Free Software Foundation, Inc.
	6	This file is part of the GNU C Library.
	7
	8	The GNU C Library is free software; you can redistribute it and/or
	9	modify it under the terms of the GNU Lesser General Public
	10	License as published by the Free Software Foundation; either
	11	version 2.1 of the License, or (at your option) any later version.
	12
	13	The GNU C Library is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public
	19	License along with the GNU C Library; if not, write to the Free
	20	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	21	02111-1307 USA. */
	22
	23	/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
	24	Ditto for AIX 3.2 and <stdlib.h>. */
	25	#ifndef _NO_PROTO
	26	# define _NO_PROTO
	27	#endif
	28
	29	#ifdef HAVE_CONFIG_H
	30	# include <config.h>
	31	#endif
	32
	33	#if !defined __STDC__ \|\| !__STDC__
	34	/* This is a separate conditional since some stdc systems
	35	reject `defined (const)'. */
	36	# ifndef const
	37	# define const
	38	# endif
	39	#endif
	40
	41	#include <stdio.h>
	42
	43	/* Comment out all this code if we are using the GNU C Library, and are not
	44	actually compiling the library itself. This code is part of the GNU C
	45	Library, but also included in many other GNU distributions. Compiling
	46	and linking in this code is a waste when using the GNU C library
	47	(especially if it is a shared library). Rather than having every GNU
	48	program understand `configure --with-gnu-libc' and omit the object files,
	49	it is simpler to just do this in the source for each such file. */
	50
	51	#define GETOPT_INTERFACE_VERSION 2
	52	#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
	53	# include <gnu-versions.h>
	54	# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
	55	# define ELIDE_CODE
	56	# endif
	57	#endif
	58
	59	#ifndef ELIDE_CODE
	60
	61
	62	/* This needs to come after some library #include
	63	to get __GNU_LIBRARY__ defined. */
	64	#ifdef __GNU_LIBRARY__
	65	/* Don't include stdlib.h for non-GNU C libraries because some of them
	66	contain conflicting prototypes for getopt. */
	67	# include <stdlib.h>
	68	# include <unistd.h>
	69	#endif /* GNU C library. */
	70
	71	#ifdef VMS
	72	# include <unixlib.h>
	73	# if HAVE_STRING_H - 0
	74	# include <string.h>
	75	# endif
	76	#endif
	77
	78	#ifndef _
	79	/* This is for other GNU distributions with internationalized messages. */
	80	# if defined HAVE_LIBINTL_H \|\| defined _LIBC
	81	# include <libintl.h>
	82	# ifndef _
	83	# define _(msgid) gettext (msgid)
	84	# endif
	85	# else
	86	# define _(msgid) (msgid)
	87	# endif
	88	#endif
	89
	90	/* This version of `getopt' appears to the caller like standard Unix `getopt'
	91	but it behaves differently for the user, since it allows the user
	92	to intersperse the options with the other arguments.
	93
	94	As `getopt' works, it permutes the elements of ARGV so that,
	95	when it is done, all the options precede everything else. Thus
	96	all application programs are extended to handle flexible argument order.
	97
	98	Setting the environment variable POSIXLY_CORRECT disables permutation.
	99	Then the behavior is completely standard.
	100
	101	GNU application programs can use a third alternative mode in which
	102	they can distinguish the relative order of options and other arguments. */
	103
	104	#include "getopt.h"
	105
	106	/* For communication from `getopt' to the caller.
	107	When `getopt' finds an option that takes an argument,
	108	the argument value is returned here.
	109	Also, when `ordering' is RETURN_IN_ORDER,
	110	each non-option ARGV-element is returned here. */
	111
	112	char *optarg;
	113
	114	/* Index in ARGV of the next element to be scanned.
	115	This is used for communication to and from the caller
	116	and for communication between successive calls to `getopt'.
	117
	118	On entry to `getopt', zero means this is the first call; initialize.
	119
	120	When `getopt' returns -1, this is the index of the first of the
	121	non-option elements that the caller should itself scan.
	122
	123	Otherwise, `optind' communicates from one call to the next
	124	how much of ARGV has been scanned so far. */
	125
	126	/* 1003.2 says this must be 1 before any call. */
	127	int optind = 1;
	128
	129	/* Formerly, initialization of getopt depended on optind==0, which
	130	causes problems with re-calling getopt as programs generally don't
	131	know that. */
	132
	133	int __getopt_initialized;
	134
	135	/* The next char to be scanned in the option-element
	136	in which the last option character we returned was found.
	137	This allows us to pick up the scan where we left off.
	138
	139	If this is zero, or a null string, it means resume the scan
	140	by advancing to the next ARGV-element. */
	141
	142	static char *nextchar;
	143
	144	/* Callers store zero here to inhibit the error message
	145	for unrecognized options. */
	146
	147	int opterr = 1;
	148
	149	/* Set to an option character which was unrecognized.
	150	This must be initialized on some systems to avoid linking in the
	151	system's own getopt implementation. */
	152
	153	int optopt = '?';
	154
	155	/* Describe how to deal with options that follow non-option ARGV-elements.
	156
	157	If the caller did not specify anything,
	158	the default is REQUIRE_ORDER if the environment variable
	159	POSIXLY_CORRECT is defined, PERMUTE otherwise.
	160
	161	REQUIRE_ORDER means don't recognize them as options;
	162	stop option processing when the first non-option is seen.
	163	This is what Unix does.
	164	This mode of operation is selected by either setting the environment
	165	variable POSIXLY_CORRECT, or using `+' as the first character
	166	of the list of option characters.
	167
	168	PERMUTE is the default. We permute the contents of ARGV as we scan,
	169	so that eventually all the non-options are at the end. This allows options
	170	to be given in any order, even with programs that were not written to
	171	expect this.
	172
	173	RETURN_IN_ORDER is an option available to programs that were written
	174	to expect options and other ARGV-elements in any order and that care about
	175	the ordering of the two. We describe each non-option ARGV-element
	176	as if it were the argument of an option with character code 1.
	177	Using `-' as the first character of the list of option characters
	178	selects this mode of operation.
	179
	180	The special argument `--' forces an end of option-scanning regardless
	181	of the value of `ordering'. In the case of RETURN_IN_ORDER, only
	182	`--' can cause `getopt' to return -1 with `optind' != ARGC. */
	183
	184	static enum
	185	{
	186	REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
	187	} ordering;
	188
	189	/* Value of POSIXLY_CORRECT environment variable. */
	190	static char *posixly_correct;
	191
	192	#ifdef __GNU_LIBRARY__
	193	/* We want to avoid inclusion of string.h with non-GNU libraries
	194	because there are many ways it can cause trouble.
	195	On some systems, it contains special magic macros that don't work
	196	in GCC. */
	197	# include <string.h>
	198	# define my_index strchr
	199	#else
	200
	201	//# if HAVE_STRING_H
	202	# include <string.h>
	203	//# else
	204	//# include <strings.h>
	205	//# endif
	206
	207	/* Avoid depending on library functions or files
	208	whose names are inconsistent. */
	209
	210	#ifndef getenv
	211	extern char *getenv ();
	212	#endif
	213
	214	static char *
	215	my_index (str, chr)
	216	const char *str;
	217	int chr;
	218	{
	219	while (*str)
	220	{
	221	if (*str == chr)
	222	return (char *) str;
	223	str++;
	224	}
	225	return 0;
	226	}
	227
	228	/* If using GCC, we can safely declare strlen this way.
	229	If not using GCC, it is ok not to declare it. */
	230	#ifdef __GNUC__
	231	/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
	232	That was relevant to code that was here before. */
	233	# if (!defined __STDC__ \|\| !__STDC__) && !defined strlen
	234	/* gcc with -traditional declares the built-in strlen to return int,
	235	and has done so at least since version 2.4.5. -- rms. */
	236	extern int strlen (const char *);
	237	# endif /* not __STDC__ */
	238	#endif /* __GNUC__ */
	239
	240	#endif /* not __GNU_LIBRARY__ */
	241
	242	/* Handle permutation of arguments. */
	243
	244	/* Describe the part of ARGV that contains non-options that have
	245	been skipped. `first_nonopt' is the index in ARGV of the first of them;
	246	`last_nonopt' is the index after the last of them. */
	247
	248	static int first_nonopt;
	249	static int last_nonopt;
	250
	251	#ifdef _LIBC
	252	/* Stored original parameters.
	253	XXX This is no good solution. We should rather copy the args so
	254	that we can compare them later. But we must not use malloc(3). */
	255	extern int __libc_argc;
	256	extern char **__libc_argv;
	257
	258	/* Bash 2.0 gives us an environment variable containing flags
	259	indicating ARGV elements that should not be considered arguments. */
	260
	261	# ifdef USE_NONOPTION_FLAGS
	262	/* Defined in getopt_init.c */
	263	extern char *__getopt_nonoption_flags;
	264
	265	static int nonoption_flags_max_len;
	266	static int nonoption_flags_len;
	267	# endif
	268
	269	# ifdef USE_NONOPTION_FLAGS
	270	# define SWAP_FLAGS(ch1, ch2) \
	271	if (nonoption_flags_len > 0) \
	272	{ \
	273	char __tmp = __getopt_nonoption_flags[ch1]; \
	274	__getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
	275	__getopt_nonoption_flags[ch2] = __tmp; \
	276	}
	277	# else
	278	# define SWAP_FLAGS(ch1, ch2)
	279	# endif
	280	#else /* !_LIBC */
	281	# define SWAP_FLAGS(ch1, ch2)
	282	#endif /* _LIBC */
	283
	284	/* Exchange two adjacent subsequences of ARGV.
	285	One subsequence is elements [first_nonopt,last_nonopt)
	286	which contains all the non-options that have been skipped so far.
	287	The other is elements [last_nonopt,optind), which contains all
	288	the options processed since those non-options were skipped.
	289
	290	`first_nonopt' and `last_nonopt' are relocated so that they describe
	291	the new indices of the non-options in ARGV after they are moved. */
	292
	293	#if defined __STDC__ && __STDC__
	294	static void exchange (char **);
	295	#endif
	296
	297	static void
	298	exchange (argv)
	299	char **argv;
	300	{
	301	int bottom = first_nonopt;
	302	int middle = last_nonopt;
	303	int top = optind;
	304	char *tem;
	305
	306	/* Exchange the shorter segment with the far end of the longer segment.
	307	That puts the shorter segment into the right place.
	308	It leaves the longer segment in the right place overall,
	309	but it consists of two parts that need to be swapped next. */
	310
	311	#if defined _LIBC && defined USE_NONOPTION_FLAGS
	312	/* First make sure the handling of the `__getopt_nonoption_flags'
	313	string can work normally. Our top argument must be in the range
	314	of the string. */
	315	if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
	316	{
	317	/* We must extend the array. The user plays games with us and
	318	presents new arguments. */
	319	char *new_str = malloc (top + 1);
	320	if (new_str == NULL)
	321	nonoption_flags_len = nonoption_flags_max_len = 0;
	322	else
	323	{
	324	memset (__mempcpy (new_str, __getopt_nonoption_flags,
	325	nonoption_flags_max_len),
	326	'\0', top + 1 - nonoption_flags_max_len);
	327	nonoption_flags_max_len = top + 1;
	328	__getopt_nonoption_flags = new_str;
	329	}
	330	}
	331	#endif
	332
	333	while (top > middle && middle > bottom)
	334	{
	335	if (top - middle > middle - bottom)
	336	{
	337	/* Bottom segment is the short one. */
	338	int len = middle - bottom;
	339	register int i;
	340
	341	/* Swap it with the top part of the top segment. */
	342	for (i = 0; i < len; i++)
	343	{
	344	tem = argv[bottom + i];
	345	argv[bottom + i] = argv[top - (middle - bottom) + i];
	346	argv[top - (middle - bottom) + i] = tem;
	347	SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
	348	}
	349	/* Exclude the moved bottom segment from further swapping. */
	350	top -= len;
	351	}
	352	else
	353	{
	354	/* Top segment is the short one. */
	355	int len = top - middle;
	356	register int i;
	357
	358	/* Swap it with the bottom part of the bottom segment. */
	359	for (i = 0; i < len; i++)
	360	{
	361	tem = argv[bottom + i];
	362	argv[bottom + i] = argv[middle + i];
	363	argv[middle + i] = tem;
	364	SWAP_FLAGS (bottom + i, middle + i);
	365	}
	366	/* Exclude the moved top segment from further swapping. */
	367	bottom += len;
	368	}
	369	}
	370
	371	/* Update records for the slots the non-options now occupy. */
	372
	373	first_nonopt += (optind - last_nonopt);
	374	last_nonopt = optind;
	375	}
	376
	377	/* Initialize the internal data when the first call is made. */
	378
	379	#if defined __STDC__ && __STDC__
	380	static const char _getopt_initialize (int, char const , const char );
	381	#endif
	382	static const char *
	383	_getopt_initialize (argc, argv, optstring)
	384	int argc;
	385	char const argv;
	386	const char *optstring;
	387	{
	388	/* Start processing options with ARGV-element 1 (since ARGV-element 0
	389	is the program name); the sequence of previously skipped
	390	non-option ARGV-elements is empty. */
	391
	392	first_nonopt = last_nonopt = optind;
	393
	394	nextchar = NULL;
	395
	396	posixly_correct = getenv ("POSIXLY_CORRECT");
	397
	398	/* Determine how to handle the ordering of options and nonoptions. */
	399
	400	if (optstring[0] == '-')
	401	{
	402	ordering = RETURN_IN_ORDER;
	403	++optstring;
	404	}
	405	else if (optstring[0] == '+')
	406	{
	407	ordering = REQUIRE_ORDER;
	408	++optstring;
	409	}
	410	else if (posixly_correct != NULL)
	411	ordering = REQUIRE_ORDER;
	412	else
	413	ordering = PERMUTE;
	414
	415	#if defined _LIBC && defined USE_NONOPTION_FLAGS
	416	if (posixly_correct == NULL
	417	&& argc == __libc_argc && argv == __libc_argv)
	418	{
	419	if (nonoption_flags_max_len == 0)
	420	{
	421	if (__getopt_nonoption_flags == NULL
	422	\|\| __getopt_nonoption_flags[0] == '\0')
	423	nonoption_flags_max_len = -1;
	424	else
	425	{
	426	const char *orig_str = __getopt_nonoption_flags;
	427	int len = nonoption_flags_max_len = strlen (orig_str);
	428	if (nonoption_flags_max_len < argc)
	429	nonoption_flags_max_len = argc;
	430	__getopt_nonoption_flags =
	431	(char *) malloc (nonoption_flags_max_len);
	432	if (__getopt_nonoption_flags == NULL)
	433	nonoption_flags_max_len = -1;
	434	else
	435	memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
	436	'\0', nonoption_flags_max_len - len);
	437	}
	438	}
	439	nonoption_flags_len = nonoption_flags_max_len;
	440	}
	441	else
	442	nonoption_flags_len = 0;
	443	#endif
	444
	445	return optstring;
	446	}
	447
	448	/* Scan elements of ARGV (whose length is ARGC) for option characters
	449	given in OPTSTRING.
	450
	451	If an element of ARGV starts with '-', and is not exactly "-" or "--",
	452	then it is an option element. The characters of this element
	453	(aside from the initial '-') are option characters. If `getopt'
	454	is called repeatedly, it returns successively each of the option characters
	455	from each of the option elements.
	456
	457	If `getopt' finds another option character, it returns that character,
	458	updating `optind' and `nextchar' so that the next call to `getopt' can
	459	resume the scan with the following option character or ARGV-element.
	460
	461	If there are no more option characters, `getopt' returns -1.
	462	Then `optind' is the index in ARGV of the first ARGV-element
	463	that is not an option. (The ARGV-elements have been permuted
	464	so that those that are not options now come last.)
	465
	466	OPTSTRING is a string containing the legitimate option characters.
	467	If an option character is seen that is not listed in OPTSTRING,
	468	return '?' after printing an error message. If you set `opterr' to
	469	zero, the error message is suppressed but we still return '?'.
	470
	471	If a char in OPTSTRING is followed by a colon, that means it wants an arg,
	472	so the following text in the same ARGV-element, or the text of the following
	473	ARGV-element, is returned in `optarg'. Two colons mean an option that
	474	wants an optional arg; if there is text in the current ARGV-element,
	475	it is returned in `optarg', otherwise `optarg' is set to zero.
	476
	477	If OPTSTRING starts with `-' or `+', it requests different methods of
	478	handling the non-option ARGV-elements.
	479	See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
	480
	481	Long-named options begin with `--' instead of `-'.
	482	Their names may be abbreviated as long as the abbreviation is unique
	483	or is an exact match for some defined option. If they have an
	484	argument, it follows the option name in the same ARGV-element, separated
	485	from the option name by a `=', or else the in next ARGV-element.
	486	When `getopt' finds a long-named option, it returns 0 if that option's
	487	`flag' field is nonzero, the value of the option's `val' field
	488	if the `flag' field is zero.
	489
	490	The elements of ARGV aren't really const, because we permute them.
	491	But we pretend they're const in the prototype to be compatible
	492	with other systems.
	493
	494	LONGOPTS is a vector of `struct option' terminated by an
	495	element containing a name which is zero.
	496
	497	LONGIND returns the index in LONGOPT of the long-named option found.
	498	It is only valid when a long-named option has been found by the most
	499	recent call.
	500
	501	If LONG_ONLY is nonzero, '-' as well as '--' can introduce
	502	long-named options. */
	503
	504	int
	505	_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
	506	int argc;
	507	char const argv;
	508	const char *optstring;
	509	const struct option *longopts;
	510	int *longind;
	511	int long_only;
	512	{
	513	int print_errors = opterr;
	514	if (optstring[0] == ':')
	515	print_errors = 0;
	516
	517	if (argc < 1)
	518	return -1;
	519
	520	optarg = NULL;
	521
	522	if (optind == 0 \|\| !__getopt_initialized)
	523	{
	524	if (optind == 0)
	525	optind = 1; /* Don't scan ARGV[0], the program name. */
	526	optstring = _getopt_initialize (argc, argv, optstring);
	527	__getopt_initialized = 1;
	528	}
	529
	530	/* Test whether ARGV[optind] points to a non-option argument.
	531	Either it does not have option syntax, or there is an environment flag
	532	from the shell indicating it is not an option. The later information
	533	is only used when the used in the GNU libc. */
	534	#if defined _LIBC && defined USE_NONOPTION_FLAGS
	535	# define NONOPTION_P (argv[optind][0] != '-' \|\| argv[optind][1] == '\0' \
	536	\|\| (optind < nonoption_flags_len \
	537	&& __getopt_nonoption_flags[optind] == '1'))
	538	#else
	539	# define NONOPTION_P (argv[optind][0] != '-' \|\| argv[optind][1] == '\0')
	540	#endif
	541
	542	if (nextchar == NULL \|\| *nextchar == '\0')
	543	{
	544	/* Advance to the next ARGV-element. */
	545
	546	/* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
	547	moved back by the user (who may also have changed the arguments). */
	548	if (last_nonopt > optind)
	549	last_nonopt = optind;
	550	if (first_nonopt > optind)
	551	first_nonopt = optind;
	552
	553	if (ordering == PERMUTE)
	554	{
	555	/* If we have just processed some options following some non-options,
	556	exchange them so that the options come first. */
	557
	558	if (first_nonopt != last_nonopt && last_nonopt != optind)
	559	exchange ((char **) argv);
	560	else if (last_nonopt != optind)
	561	first_nonopt = optind;
	562
	563	/* Skip any additional non-options
	564	and extend the range of non-options previously skipped. */
	565
	566	while (optind < argc && NONOPTION_P)
	567	optind++;
	568	last_nonopt = optind;
	569	}
	570
	571	/* The special ARGV-element `--' means premature end of options.
	572	Skip it like a null option,
	573	then exchange with previous non-options as if it were an option,
	574	then skip everything else like a non-option. */
	575
	576	if (optind != argc && !strcmp (argv[optind], "--"))
	577	{
	578	optind++;
	579
	580	if (first_nonopt != last_nonopt && last_nonopt != optind)
	581	exchange ((char **) argv);
	582	else if (first_nonopt == last_nonopt)
	583	first_nonopt = optind;
	584	last_nonopt = argc;
	585
	586	optind = argc;
	587	}
	588
	589	/* If we have done all the ARGV-elements, stop the scan
	590	and back over any non-options that we skipped and permuted. */
	591
	592	if (optind == argc)
	593	{
	594	/* Set the next-arg-index to point at the non-options
	595	that we previously skipped, so the caller will digest them. */
	596	if (first_nonopt != last_nonopt)
	597	optind = first_nonopt;
	598	return -1;
	599	}
	600
	601	/* If we have come to a non-option and did not permute it,
	602	either stop the scan or describe it to the caller and pass it by. */
	603
	604	if (NONOPTION_P)
	605	{
	606	if (ordering == REQUIRE_ORDER)
	607	return -1;
	608	optarg = argv[optind++];
	609	return 1;
	610	}
	611
	612	/* We have found another option-ARGV-element.
	613	Skip the initial punctuation. */
	614
	615	nextchar = (argv[optind] + 1
	616	+ (longopts != NULL && argv[optind][1] == '-'));
	617	}
	618
	619	/* Decode the current option-ARGV-element. */
	620
	621	/* Check whether the ARGV-element is a long option.
	622
	623	If long_only and the ARGV-element has the form "-f", where f is
	624	a valid short option, don't consider it an abbreviated form of
	625	a long option that starts with f. Otherwise there would be no
	626	way to give the -f short option.
	627
	628	On the other hand, if there's a long option "fubar" and
	629	the ARGV-element is "-fu", do consider that an abbreviation of
	630	the long option, just like "--fu", and not "-f" with arg "u".
	631
	632	This distinction seems to be the most useful approach. */
	633
	634	if (longopts != NULL
	635	&& (argv[optind][1] == '-'
	636	\|\| (long_only && (argv[optind][2] \|\| !my_index (optstring, argv[optind][1])))))
	637	{
	638	char *nameend;
	639	const struct option *p;
	640	const struct option *pfound = NULL;
	641	int exact = 0;
	642	int ambig = 0;
	643	int indfound = -1;
	644	int option_index;
	645
	646	for (nameend = nextchar; nameend && nameend != '='; nameend++)
	647	/* Do nothing. */ ;
	648
	649	/* Test all long options for either exact match
	650	or abbreviated matches. */
	651	for (p = longopts, option_index = 0; p->name; p++, option_index++)
	652	if (!strncmp (p->name, nextchar, nameend - nextchar))
	653	{
	654	if ((unsigned int) (nameend - nextchar)
	655	== (unsigned int) strlen (p->name))
	656	{
	657	/* Exact match found. */
	658	pfound = p;
	659	indfound = option_index;
	660	exact = 1;
	661	break;
	662	}
	663	else if (pfound == NULL)
	664	{
	665	/* First nonexact match found. */
	666	pfound = p;
	667	indfound = option_index;
	668	}
	669	else if (long_only
	670	\|\| pfound->has_arg != p->has_arg
	671	\|\| pfound->flag != p->flag
	672	\|\| pfound->val != p->val)
	673	/* Second or later nonexact match found. */
	674	ambig = 1;
	675	}
	676
	677	if (ambig && !exact)
	678	{
	679	if (print_errors)
	680	fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
	681	argv[0], argv[optind]);
	682	nextchar += strlen (nextchar);
	683	optind++;
	684	optopt = 0;
	685	return '?';
	686	}
	687
	688	if (pfound != NULL)
	689	{
	690	option_index = indfound;
	691	optind++;
	692	if (*nameend)
	693	{
	694	/* Don't test has_arg with >, because some C compilers don't
	695	allow it to be used on enums. */
	696	if (pfound->has_arg)
	697	optarg = nameend + 1;
	698	else
	699	{
	700	if (print_errors)
	701	{
	702	if (argv[optind - 1][1] == '-')
	703	/* --option */
	704	fprintf (stderr,
	705	_("%s: option `--%s' doesn't allow an argument\n"),
	706	argv[0], pfound->name);
	707	else
	708	/* +option or -option */
	709	fprintf (stderr,
	710	_("%s: option `%c%s' doesn't allow an argument\n"),
	711	argv[0], argv[optind - 1][0], pfound->name);
	712	}
	713
	714	nextchar += strlen (nextchar);
	715
	716	optopt = pfound->val;
	717	return '?';
	718	}
	719	}
	720	else if (pfound->has_arg == 1)
	721	{
	722	if (optind < argc)
	723	optarg = argv[optind++];
	724	else
	725	{
	726	if (print_errors)
	727	fprintf (stderr,
	728	_("%s: option `%s' requires an argument\n"),
	729	argv[0], argv[optind - 1]);
	730	nextchar += strlen (nextchar);
	731	optopt = pfound->val;
	732	return optstring[0] == ':' ? ':' : '?';
	733	}
	734	}
	735	nextchar += strlen (nextchar);
	736	if (longind != NULL)
	737	*longind = option_index;
	738	if (pfound->flag)
	739	{
	740	*(pfound->flag) = pfound->val;
	741	return 0;
	742	}
	743	return pfound->val;
	744	}
	745
	746	/* Can't find it as a long option. If this is not getopt_long_only,
	747	or the option starts with '--' or is not a valid short
	748	option, then it's an error.
	749	Otherwise interpret it as a short option. */
	750	if (!long_only \|\| argv[optind][1] == '-'
	751	\|\| my_index (optstring, *nextchar) == NULL)
	752	{
	753	if (print_errors)
	754	{
	755	if (argv[optind][1] == '-')
	756	/* --option */
	757	fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
	758	argv[0], nextchar);
	759	else
	760	/* +option or -option */
	761	fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
	762	argv[0], argv[optind][0], nextchar);
	763	}
	764	nextchar = (char *) "";
	765	optind++;
	766	optopt = 0;
	767	return '?';
	768	}
	769	}
	770
	771	/* Look at and handle the next short option-character. */
	772
	773	{
	774	char c = *nextchar++;
	775	char *temp = my_index (optstring, c);
	776
	777	/* Increment `optind' when we start to process its last character. */
	778	if (*nextchar == '\0')
	779	++optind;
	780
	781	if (temp == NULL \|\| c == ':')
	782	{
	783	if (print_errors)
	784	{
	785	if (posixly_correct)
	786	/* 1003.2 specifies the format of this message. */
	787	fprintf (stderr, _("%s: illegal option -- %c\n"),
	788	argv[0], c);
	789	else
	790	fprintf (stderr, _("%s: invalid option -- %c\n"),
	791	argv[0], c);
	792	}
	793	optopt = c;
	794	return '?';
	795	}
	796	/* Convenience. Treat POSIX -W foo same as long option --foo */
	797	if (temp[0] == 'W' && temp[1] == ';')
	798	{
	799	char *nameend;
	800	const struct option *p;
	801	const struct option *pfound = NULL;
	802	int exact = 0;
	803	int ambig = 0;
	804	int indfound = 0;
	805	int option_index;
	806
	807	/* This is an option that requires an argument. */
	808	if (*nextchar != '\0')
	809	{
	810	optarg = nextchar;
	811	/* If we end this ARGV-element by taking the rest as an arg,
	812	we must advance to the next element now. */
	813	optind++;
	814	}
	815	else if (optind == argc)
	816	{
	817	if (print_errors)
	818	{
	819	/* 1003.2 specifies the format of this message. */
	820	fprintf (stderr, _("%s: option requires an argument -- %c\n"),
	821	argv[0], c);
	822	}
	823	optopt = c;
	824	if (optstring[0] == ':')
	825	c = ':';
	826	else
	827	c = '?';
	828	return c;
	829	}
	830	else
	831	/* We already incremented `optind' once;
	832	increment it again when taking next ARGV-elt as argument. */
	833	optarg = argv[optind++];
	834
	835	/* optarg is now the argument, see if it's in the
	836	table of longopts. */
	837
	838	for (nextchar = nameend = optarg; nameend && nameend != '='; nameend++)
	839	/* Do nothing. */ ;
	840
	841	/* Test all long options for either exact match
	842	or abbreviated matches. */
	843	for (p = longopts, option_index = 0; p->name; p++, option_index++)
	844	if (!strncmp (p->name, nextchar, nameend - nextchar))
	845	{
	846	if ((unsigned int) (nameend - nextchar) == strlen (p->name))
	847	{
	848	/* Exact match found. */
	849	pfound = p;
	850	indfound = option_index;
	851	exact = 1;
	852	break;
	853	}
	854	else if (pfound == NULL)
	855	{
	856	/* First nonexact match found. */
	857	pfound = p;
	858	indfound = option_index;
	859	}
	860	else
	861	/* Second or later nonexact match found. */
	862	ambig = 1;
	863	}
	864	if (ambig && !exact)
	865	{
	866	if (print_errors)
	867	fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
	868	argv[0], argv[optind]);
	869	nextchar += strlen (nextchar);
	870	optind++;
	871	return '?';
	872	}
	873	if (pfound != NULL)
	874	{
	875	option_index = indfound;
	876	if (*nameend)
	877	{
	878	/* Don't test has_arg with >, because some C compilers don't
	879	allow it to be used on enums. */
	880	if (pfound->has_arg)
	881	optarg = nameend + 1;
	882	else
	883	{
	884	if (print_errors)
	885	fprintf (stderr, _("\
	886	%s: option `-W %s' doesn't allow an argument\n"),
	887	argv[0], pfound->name);
	888
	889	nextchar += strlen (nextchar);
	890	return '?';
	891	}
	892	}
	893	else if (pfound->has_arg == 1)
	894	{
	895	if (optind < argc)
	896	optarg = argv[optind++];
	897	else
	898	{
	899	if (print_errors)
	900	fprintf (stderr,
	901	_("%s: option `%s' requires an argument\n"),
	902	argv[0], argv[optind - 1]);
	903	nextchar += strlen (nextchar);
	904	return optstring[0] == ':' ? ':' : '?';
	905	}
	906	}
	907	nextchar += strlen (nextchar);
	908	if (longind != NULL)
	909	*longind = option_index;
	910	if (pfound->flag)
	911	{
	912	*(pfound->flag) = pfound->val;
	913	return 0;
	914	}
	915	return pfound->val;
	916	}
	917	nextchar = NULL;
	918	return 'W'; /* Let the application handle it. */
	919	}
	920	if (temp[1] == ':')
	921	{
	922	if (temp[2] == ':')
	923	{
	924	/* This is an option that accepts an argument optionally. */
	925	if (*nextchar != '\0')
	926	{
	927	optarg = nextchar;
	928	optind++;
	929	}
	930	else
	931	optarg = NULL;
	932	nextchar = NULL;
	933	}
	934	else
	935	{
	936	/* This is an option that requires an argument. */
	937	if (*nextchar != '\0')
	938	{
	939	optarg = nextchar;
	940	/* If we end this ARGV-element by taking the rest as an arg,
	941	we must advance to the next element now. */
	942	optind++;
	943	}
	944	else if (optind == argc)
	945	{
	946	if (print_errors)
	947	{
	948	/* 1003.2 specifies the format of this message. */
	949	fprintf (stderr,
	950	_("%s: option requires an argument -- %c\n"),
	951	argv[0], c);
	952	}
	953	optopt = c;
	954	if (optstring[0] == ':')
	955	c = ':';
	956	else
	957	c = '?';
	958	}
	959	else
	960	/* We already incremented `optind' once;
	961	increment it again when taking next ARGV-elt as argument. */
	962	optarg = argv[optind++];
	963	nextchar = NULL;
	964	}
	965	}
	966	return c;
	967	}
	968	}
	969
	970	int
	971	getopt (argc, argv, optstring)
	972	int argc;
	973	char const argv;
	974	const char *optstring;
	975	{
	976	return _getopt_internal (argc, argv, optstring,
	977	(const struct option *) 0,
	978	(int *) 0,
	979	0);
	980	}
	981
	982	#endif /* Not ELIDE_CODE. */
	983
	984	#ifdef TEST
	985
	986	/* Compile with -DTEST to make an executable for use in testing
	987	the above definition of `getopt'. */
	988
	989	int
	990	main (argc, argv)
	991	int argc;
	992	char **argv;
	993	{
	994	int c;
	995	int digit_optind = 0;
	996
	997	while (1)
	998	{
	999	int this_option_optind = optind ? optind : 1;
	1000
	1001	c = getopt (argc, argv, "abc:d:0123456789");
	1002	if (c == -1)
	1003	break;
	1004
	1005	switch (c)
	1006	{
	1007	case '0':
	1008	case '1':
	1009	case '2':
	1010	case '3':
	1011	case '4':
	1012	case '5':
	1013	case '6':
	1014	case '7':
	1015	case '8':
	1016	case '9':
	1017	if (digit_optind != 0 && digit_optind != this_option_optind)
	1018	printf ("digits occur in two different argv-elements.\n");
	1019	digit_optind = this_option_optind;
	1020	printf ("option %c\n", c);
	1021	break;
	1022
	1023	case 'a':
	1024	printf ("option a\n");
	1025	break;
	1026
	1027	case 'b':
	1028	printf ("option b\n");
	1029	break;
	1030
	1031	case 'c':
	1032	printf ("option c with value `%s'\n", optarg);
	1033	break;
	1034
	1035	case '?':
	1036	break;
	1037
	1038	default:
	1039	printf ("?? getopt returned character code 0%o ??\n", c);
	1040	}
	1041	}
	1042
	1043	if (optind < argc)
	1044	{
	1045	printf ("non-option ARGV-elements: ");
	1046	while (optind < argc)
	1047	printf ("%s ", argv[optind++]);
	1048	printf ("\n");
	1049	}
	1050
	1051	exit (0);
	1052	}
	1053
	1054	#endif /* TEST */

+180

-0

libs/phylogeny/getopt.h less more

	0	/* Declarations for getopt.
	1	Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
	2	This file is part of the GNU C Library.
	3
	4	The GNU C Library is free software; you can redistribute it and/or
	5	modify it under the terms of the GNU Lesser General Public
	6	License as published by the Free Software Foundation; either
	7	version 2.1 of the License, or (at your option) any later version.
	8
	9	The GNU C Library is distributed in the hope that it will be useful,
	10	but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	Lesser General Public License for more details.
	13
	14	You should have received a copy of the GNU Lesser General Public
	15	License along with the GNU C Library; if not, write to the Free
	16	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	17	02111-1307 USA. */
	18
	19	#ifndef _GETOPT_H
	20
	21	#ifndef __need_getopt
	22	# define _GETOPT_H 1
	23	#endif
	24
	25	/* If __GNU_LIBRARY__ is not already defined, either we are being used
	26	standalone, or this is the first header included in the source file.
	27	If we are being used with glibc, we need to include <features.h>, but
	28	that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
	29	not defined, include <ctype.h>, which will pull in <features.h> for us
	30	if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
	31	doesn't flood the namespace with stuff the way some other headers do.) */
	32	#if !defined __GNU_LIBRARY__
	33	# include <ctype.h>
	34	#endif
	35
	36	#ifdef __cplusplus
	37	extern "C" {
	38	#endif
	39
	40	/* For communication from `getopt' to the caller.
	41	When `getopt' finds an option that takes an argument,
	42	the argument value is returned here.
	43	Also, when `ordering' is RETURN_IN_ORDER,
	44	each non-option ARGV-element is returned here. */
	45
	46	extern char *optarg;
	47
	48	/* Index in ARGV of the next element to be scanned.
	49	This is used for communication to and from the caller
	50	and for communication between successive calls to `getopt'.
	51
	52	On entry to `getopt', zero means this is the first call; initialize.
	53
	54	When `getopt' returns -1, this is the index of the first of the
	55	non-option elements that the caller should itself scan.
	56
	57	Otherwise, `optind' communicates from one call to the next
	58	how much of ARGV has been scanned so far. */
	59
	60	extern int optind;
	61
	62	/* Callers store zero here to inhibit the error message `getopt' prints
	63	for unrecognized options. */
	64
	65	extern int opterr;
	66
	67	/* Set to an option character which was unrecognized. */
	68
	69	extern int optopt;
	70
	71	#ifndef __need_getopt
	72	/* Describe the long-named options requested by the application.
	73	The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
	74	of `struct option' terminated by an element containing a name which is
	75	zero.
	76
	77	The field `has_arg' is:
	78	no_argument (or 0) if the option does not take an argument,
	79	required_argument (or 1) if the option requires an argument,
	80	optional_argument (or 2) if the option takes an optional argument.
	81
	82	If the field `flag' is not NULL, it points to a variable that is set
	83	to the value given in the field `val' when the option is found, but
	84	left unchanged if the option is not found.
	85
	86	To have a long-named option do something other than set an `int' to
	87	a compiled-in constant, such as set a value from `optarg', set the
	88	option's `flag' field to zero and its `val' field to a nonzero
	89	value (the equivalent single-letter option character, if there is
	90	one). For long options that have a zero `flag' field, `getopt'
	91	returns the contents of the `val' field. */
	92
	93	struct option
	94	{
	95	# if (defined __STDC__ && __STDC__) \|\| defined __cplusplus
	96	const char *name;
	97	# else
	98	char *name;
	99	# endif
	100	/* has_arg can't be an enum because some compilers complain about
	101	type mismatches in all the code that assumes it is an int. */
	102	int has_arg;
	103	int *flag;
	104	int val;
	105	};
	106
	107	/* Names for the values of the `has_arg' field of `struct option'. */
	108
	109	# define no_argument 0
	110	# define required_argument 1
	111	# define optional_argument 2
	112	#endif /* need getopt */
	113
	114
	115	/* Get definitions and prototypes for functions to process the
	116	arguments in ARGV (ARGC of them, minus the program name) for
	117	options given in OPTS.
	118
	119	Return the option character from OPTS just read. Return -1 when
	120	there are no more options. For unrecognized options, or options
	121	missing arguments, `optopt' is set to the option letter, and '?' is
	122	returned.
	123
	124	The OPTS string is a list of characters which are recognized option
	125	letters, optionally followed by colons, specifying that that letter
	126	takes an argument, to be placed in `optarg'.
	127
	128	If a letter in OPTS is followed by two colons, its argument is
	129	optional. This behavior is specific to the GNU `getopt'.
	130
	131	The argument `--' causes premature termination of argument
	132	scanning, explicitly telling `getopt' that there are no more
	133	options.
	134
	135	If OPTS begins with `--', then non-option arguments are treated as
	136	arguments to the option '\0'. This behavior is specific to the GNU
	137	`getopt'. */
	138
	139	#if (defined __STDC__ && __STDC__) \|\| defined __cplusplus
	140	# ifdef __GNU_LIBRARY__
	141	/* Many other libraries have conflicting prototypes for getopt, with
	142	differences in the consts, in stdlib.h. To avoid compilation
	143	errors, only prototype getopt for the GNU C library. */
	144	extern int getopt (int __argc, char const __argv, const char *__shortopts);
	145	# else /* not __GNU_LIBRARY__ */
	146	extern int getopt ();
	147	# endif /* __GNU_LIBRARY__ */
	148
	149	# ifndef __need_getopt
	150	extern int getopt_long (int __argc, char const __argv, const char *__shortopts,
	151	const struct option __longopts, int __longind);
	152	extern int getopt_long_only (int __argc, char const __argv,
	153	const char *__shortopts,
	154	const struct option __longopts, int __longind);
	155
	156	/* Internal only. Users should not call this directly. */
	157	extern int _getopt_internal (int __argc, char const __argv,
	158	const char *__shortopts,
	159	const struct option __longopts, int __longind,
	160	int __long_only);
	161	# endif
	162	#else /* not __STDC__ */
	163	extern int getopt ();
	164	# ifndef __need_getopt
	165	extern int getopt_long ();
	166	extern int getopt_long_only ();
	167
	168	extern int _getopt_internal ();
	169	# endif
	170	#endif /* __STDC__ */
	171
	172	#ifdef __cplusplus
	173	}
	174	#endif
	175
	176	/* Make sure we later can get all the definitions and declarations. */
	177	#undef __need_getopt
	178
	179	#endif /* getopt.h */

+188

-0

libs/phylogeny/getopt1.c less more

	0	/* getopt_long and getopt_long_only entry points for GNU getopt.
	1	Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
	2	Free Software Foundation, Inc.
	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, write to the Free
	17	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	18	02111-1307 USA. */
	19
	20	#ifdef HAVE_CONFIG_H
	21	#include <config.h>
	22	#endif
	23
	24	#include "getopt.h"
	25
	26	#if !defined __STDC__ \|\| !__STDC__
	27	/* This is a separate conditional since some stdc systems
	28	reject `defined (const)'. */
	29	#ifndef const
	30	#define const
	31	#endif
	32	#endif
	33
	34	#include <stdio.h>
	35
	36	/* Comment out all this code if we are using the GNU C Library, and are not
	37	actually compiling the library itself. This code is part of the GNU C
	38	Library, but also included in many other GNU distributions. Compiling
	39	and linking in this code is a waste when using the GNU C library
	40	(especially if it is a shared library). Rather than having every GNU
	41	program understand `configure --with-gnu-libc' and omit the object files,
	42	it is simpler to just do this in the source for each such file. */
	43
	44	#define GETOPT_INTERFACE_VERSION 2
	45	#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
	46	#include <gnu-versions.h>
	47	#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
	48	#define ELIDE_CODE
	49	#endif
	50	#endif
	51
	52	#ifndef ELIDE_CODE
	53
	54
	55	/* This needs to come after some library #include
	56	to get __GNU_LIBRARY__ defined. */
	57	#ifdef __GNU_LIBRARY__
	58	#include <stdlib.h>
	59	#endif
	60
	61	#ifndef NULL
	62	#define NULL 0
	63	#endif
	64
	65	int
	66	getopt_long (argc, argv, options, long_options, opt_index)
	67	int argc;
	68	char const argv;
	69	const char *options;
	70	const struct option *long_options;
	71	int *opt_index;
	72	{
	73	return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
	74	}
	75
	76	/* Like getopt_long, but '-' as well as '--' can indicate a long option.
	77	If an option that starts with '-' (not '--') doesn't match a long option,
	78	but does match a short option, it is parsed as a short option
	79	instead. */
	80
	81	int
	82	getopt_long_only (argc, argv, options, long_options, opt_index)
	83	int argc;
	84	char const argv;
	85	const char *options;
	86	const struct option *long_options;
	87	int *opt_index;
	88	{
	89	return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
	90	}
	91
	92
	93	#endif /* Not ELIDE_CODE. */
	94
	95	#ifdef TEST
	96
	97	#include <stdio.h>
	98
	99	int
	100	main (argc, argv)
	101	int argc;
	102	char **argv;
	103	{
	104	int c;
	105	int digit_optind = 0;
	106
	107	while (1)
	108	{
	109	int this_option_optind = optind ? optind : 1;
	110	int option_index = 0;
	111	static struct option long_options[] =
	112	{
	113	{"add", 1, 0, 0},
	114	{"append", 0, 0, 0},
	115	{"delete", 1, 0, 0},
	116	{"verbose", 0, 0, 0},
	117	{"create", 0, 0, 0},
	118	{"file", 1, 0, 0},
	119	{0, 0, 0, 0}
	120	};
	121
	122	c = getopt_long (argc, argv, "abc:d:0123456789",
	123	long_options, &option_index);
	124	if (c == -1)
	125	break;
	126
	127	switch (c)
	128	{
	129	case 0:
	130	printf ("option %s", long_options[option_index].name);
	131	if (optarg)
	132	printf (" with arg %s", optarg);
	133	printf ("\n");
	134	break;
	135
	136	case '0':
	137	case '1':
	138	case '2':
	139	case '3':
	140	case '4':
	141	case '5':
	142	case '6':
	143	case '7':
	144	case '8':
	145	case '9':
	146	if (digit_optind != 0 && digit_optind != this_option_optind)
	147	printf ("digits occur in two different argv-elements.\n");
	148	digit_optind = this_option_optind;
	149	printf ("option %c\n", c);
	150	break;
	151
	152	case 'a':
	153	printf ("option a\n");
	154	break;
	155
	156	case 'b':
	157	printf ("option b\n");
	158	break;
	159
	160	case 'c':
	161	printf ("option c with value `%s'\n", optarg);
	162	break;
	163
	164	case 'd':
	165	printf ("option d with value `%s'\n", optarg);
	166	break;
	167
	168	case '?':
	169	break;
	170
	171	default:
	172	printf ("?? getopt returned character code 0%o ??\n", c);
	173	}
	174	}
	175
	176	if (optind < argc)
	177	{
	178	printf ("non-option ARGV-elements: ");
	179	while (optind < argc)
	180	printf ("%s ", argv[optind++]);
	181	printf ("\n");
	182	}
	183
	184	exit (0);
	185	}
	186
	187	#endif /* TEST */

+139

-0

libs/phylogeny/givenRatesMLDistance.cpp less more

	0	// $Id: givenRatesMLDistance.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "givenRatesMLDistance.h"
	2	#include "numRec.h"
	3
	4	class C_eval_likelihoodOfDistanceGivenRates{
	5	private:
	6	const stochasticProcess& _sp;
	7	const sequence& _s1;
	8	const sequence& _s2;
	9	const Vdouble& _rates;
	10	const Vdouble* _weights;
	11
	12	public:
	13	C_eval_likelihoodOfDistanceGivenRates(const stochasticProcess& sp,
	14	const sequence& s1,
	15	const sequence& s2,
	16	const Vdouble& rates,
	17	const Vdouble * weights)
	18	: _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
	19	{};
	20
	21	MDOUBLE operator() (MDOUBLE dist) const {
	22	MDOUBLE sumL=0.0; // sum of log likelihoods
	23	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	24	for (int pos=0; pos < _s1.seqLen(); ++pos){
	25	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	26	posLikelihood = 0.0;
	27	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	28	// this is the more complicated case, where _s1 = ?, _s2 = specific
	29	posLikelihood = _sp.freq(_s2[pos]);
	30	} else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	31	posLikelihood = _sp.freq(_s1[pos]);
	32	} else {
	33	MDOUBLE rate = _rates[pos];
	34	MDOUBLE pij= 0.0;
	35	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
	36	// the simple case, where AA i is changing to AA j
	37	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	38	posLikelihood += pij * _sp.freq(_s1[pos]);
	39	} else {// this is the most complicated case, when you have
	40	// combinations of letters, for example B in one
	41	// sequence and ? in the other.
	42	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	43	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	44	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	45	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	46	posLikelihood += _sp.freq(iS1)_sp.Pij_t(iS1,iS2,distrate);
	47	}
	48	}
	49	}
	50	}
	51	}
	52	assert(posLikelihood>0.0);
	53	sumL += log(posLikelihood)(_weights ? (_weights)[pos]:1.0);
	54	}
	55	return -sumL;
	56	};
	57	};
	58
	59	class C_eval_likelihoodOfDistanceGivenRates_d{ // derivative.
	60	private:
	61	const stochasticProcess& _sp;
	62	const sequence& _s1;
	63	const sequence& _s2;
	64	const Vdouble& _rates;
	65	const Vdouble* _weights;
	66
	67	public:
	68	C_eval_likelihoodOfDistanceGivenRates_d(const stochasticProcess& sp,
	69	const sequence& s1,
	70	const sequence& s2,
	71	const Vdouble& rates,
	72	const Vdouble * weights)
	73	: _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
	74	{};
	75
	76	MDOUBLE operator() (MDOUBLE dist) const {
	77	MDOUBLE sumL=0.0; // sum of log likelihoods
	78	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	79	MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
	80	for (int pos=0; pos < _s1.seqLen(); ++pos){
	81	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	82	posLikelihood = 0.0;
	83	posLikelihood_d = 0.0;
	84	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	85	// this is the more complicated case, where _s1 = ?, _s2 = specific
	86	posLikelihood = _sp.freq(_s2[pos]);
	87	posLikelihood_d =0.0;
	88	} else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	89	posLikelihood = _sp.freq(_s1[pos]);
	90	posLikelihood_d =0.0;
	91	} else {
	92	MDOUBLE rate = _rates[pos];
	93	MDOUBLE pij= 0.0;
	94	MDOUBLE dpij=0.0;
	95	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
	96	// the simple case, where AA i is changing to AA j
	97	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	98	dpij= _sp.dPij_dt(_s1[pos],_s2[pos],distrate)rate;
	99	MDOUBLE tmp = _sp.freq(_s1[pos]);
	100	posLikelihood += pij *tmp;
	101	posLikelihood_d += dpij*tmp;
	102	} else {// this is the most complicated case, when you have
	103	// combinations of letters, for example B in one
	104	// sequence and ? in the other.
	105	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	106	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	107	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	108	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	109	MDOUBLE exp = _sp.freq(iS1);
	110	posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
	111	posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,distrate)rate;
	112	}
	113	}
	114	}
	115	}
	116	}
	117	assert(posLikelihood>0.0);
	118	sumL += (posLikelihood_d/posLikelihood)(_weights ? (_weights)[pos]:1.0);
	119	}
	120	return -sumL;
	121	};
	122	};
	123
	124	const MDOUBLE givenRatesMLDistance::giveDistance(const sequence& s1,
	125	const sequence& s2,
	126	const vector<MDOUBLE> * weights,
	127	MDOUBLE* score) const
	128	{
	129	const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance;
	130	MDOUBLE dist=-1.0;
	131	MDOUBLE resL = -dbrent(ax,bx,cx,
	132	C_eval_likelihoodOfDistanceGivenRates(_sp,s1,s2,_rates,weights),
	133	C_eval_likelihoodOfDistanceGivenRates_d(_sp,s1,s2,_rates,weights),
	134	_toll,
	135	&dist);
	136	if (score) *score = resL;
	137	return dist;
	138	};

+61

-0

libs/phylogeny/givenRatesMLDistance.h less more

	0	// $Id: givenRatesMLDistance.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___GIVEN_RATES_ML_DISTANCE_H
	3	#define ___GIVEN_RATES_ML_DISTANCE_H
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "likeDist.h"
	8	#include "stochasticProcess.h"
	9	#include "logFile.h"
	10	#include <cmath>
	11	using namespace std;
	12
	13	class givenRatesMLDistance : public likeDist {
	14	public:
	15	explicit givenRatesMLDistance(const stochasticProcess& sp,
	16	const Vdouble& rates,
	17	const MDOUBLE toll =0.0001,
	18	const MDOUBLE maxPairwiseDistance = 5.0
	19	)
	20	: likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {}
	21
	22	explicit givenRatesMLDistance(stochasticProcess& sp,
	23	const Vdouble& rates,
	24	const MDOUBLE toll =0.0001,
	25	const MDOUBLE maxPairwiseDistance = 5.0
	26	)
	27	: likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {}
	28
	29	explicit givenRatesMLDistance(const stochasticProcess& sp,
	30	const MDOUBLE toll =0.0001,
	31	const MDOUBLE maxPairwiseDistance = 5.0
	32	)
	33	: likeDist(sp, toll,maxPairwiseDistance),_rates(0) {}
	34
	35	explicit givenRatesMLDistance(stochasticProcess& sp,
	36	const MDOUBLE toll =0.0001,
	37	const MDOUBLE maxPairwiseDistance = 5.0
	38	)
	39	: likeDist(sp, toll,maxPairwiseDistance),_rates(0) {}
	40
	41	givenRatesMLDistance(const givenRatesMLDistance& other):
	42	likeDist(static_cast<likeDist>(other)), _rates(other._rates) {}
	43
	44	virtual givenRatesMLDistance* clone() const {return new givenRatesMLDistance(*this);}
	45
	46	void setRates(const Vdouble &rates) {_rates = rates;}
	47
	48	// Returns the estimated ML distance between the 2 sequences.
	49	// if score is given, it will be assigned the log-likelihood.
	50	const MDOUBLE giveDistance(const sequence& s1,
	51	const sequence& s2,
	52	const vector<MDOUBLE> * weights,
	53	MDOUBLE* score=NULL) const;
	54
	55	private:
	56	Vdouble _rates;
	57	};
	58
	59	#endif
	60

+144

-0

libs/phylogeny/goldmanYangModel.cpp less more

	0	// $Id: goldmanYangModel.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "goldmanYangModel.h"
	3	#include "codon.h"
	4	#include "readDatMatrix.h" // for the normalizeQ function.
	5
	6
	7	goldmanYangModel::goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const bool globalV):
	8	_v(inV),_k(inK),_globalV(_globalV),_codonAlph(inCodonAlph){
	9	homogenousFreq();
	10	_Q.resize(_codonAlph.size());
	11	for (int z=0; z < _Q.size();++z) _Q[z].resize(_codonAlph.size(),0);
	12	updateQ();
	13
	14	}
	15
	16
	17	goldmanYangModel::goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK, codon & inCodonAlph,const Vdouble& freq,const bool globalV):
	18	_freq(freq),_v(inV),_k(inK),_globalV(_globalV),_codonAlph(inCodonAlph){
	19	_Q.resize(_codonAlph.size());
	20	for (int z=0; z < _Q.size();++z) _Q[z].resize(_codonAlph.size(),0);
	21	updateQ();
	22	}
	23
	24
	25	void goldmanYangModel::updateQ() {
	26
	27	// building q.
	28	int i,j;
	29	MDOUBLE sum=0.0;
	30	MDOUBLE epsilon=0.00000001;//0.00000000001;
	31	MDOUBLE factor = 1000.0;
	32	for (i=0; i < _Q.size();++i) {
	33	sum=0;
	34	for (j=0; j < _Q.size();++j) {
	35	if (j==i) continue; //same codon
	36	if (codonUtility::codonDiff(i,j,_codonAlph) == codonUtility::tr) {
	37	_Q[i][j] = _kexp(-(1/factor)_gcd.getGranthamDistance(codonUtility::aaOf(i,_codonAlph),codonUtility::aaOf(j,_codonAlph))*_v);
	38	if (_Q[i][j]<epsilon) _Q[i][j] = epsilon;
	39	}else if (codonUtility::codonDiff(i,j,_codonAlph) == codonUtility::tv) {
	40	_Q[i][j] = exp(-(1/factor)_gcd.getGranthamDistance(codonUtility::aaOf(i,_codonAlph),codonUtility::aaOf(j,_codonAlph))_v);
	41	if (_Q[i][j]<epsilon) _Q[i][j] = epsilon;
	42	}
	43	else _Q[i][j] = 0;//more than one substitution.
	44
	45	_Q[i][j]*=_freq[j];
	46	sum += _Q[i][j];
	47
	48	}
	49	_Q[i][i]=-sum;
	50	}
	51
	52
	53	// check:
	54	/* LOG(5,<<"\n\n\n ===================================== \n");
	55	int a1,a2;
	56	for (a1=0;a1<4;++a1){
	57	for (a2=0;a2<4;++a2){
	58	LOG(5,<<qMatrix[a1][a2]<<"\t");
	59	}
	60	LOG(5,<<endl);
	61	}
	62	*/
	63
	64
	65	if (_globalV == true)
	66	normalizeQ(_Q,_freq);
	67
	68	// check:
	69	/* LOG(5,<<"\n\n\n ===================================== \n");
	70	for (a1=0;a1<4;++a1){
	71	for ( a2=0;a2<4;++a2){
	72	LOG(5,<<qMatrix[a1][a2]<<"\t");
	73	}
	74	LOG(5,<<endl);
	75	}
	76	*/
	77
	78
	79	// updating _q2Pt;
	80	// _Q = qMatrix;
	81	_q2pt.fillFromRateMatrix(_freq,_Q);
	82
	83
	84
	85	}
	86
	87
	88
	89	// original with V and not 1/V
	90	/*
	91	void goldmanYangModel::updateQ() {
	92	// building q.
	93	VVdouble qMatrix(_codonAlph.size());
	94	int i,j,z;
	95	MDOUBLE sum=0.0;
	96	for (z=0; z < qMatrix.size();++z) qMatrix[z].resize(_codonAlph.size(),0);
	97	for (i=0; i < qMatrix.size();++i) {
	98	sum=0;
	99	for (j=0; j < qMatrix.size();++j) {
	100	if (j==i) continue;
	101	if (codonUtility::codonDiff(i,j) == codonUtility::different) {
	102	qMatrix[i][j] =0;
	103	} else if (codonUtility::codonDiff(i,j) == codonUtility::transition) {
	104	qMatrix[i][j] =_k*exp(-_gcd.getGranthamDistance(codonUtility::aaOf(i),codonUtility::aaOf(j))/_v);
	105	} else if (codonUtility::codonDiff(i,j) == codonUtility::transversion) {
	106	qMatrix[i][j] = exp(-_gcd.getGranthamDistance(codonUtility::aaOf(i),codonUtility::aaOf(j))/_v);
	107	}
	108	qMatrix[i][j]*=_freq[j];
	109	sum += qMatrix[i][j];
	110	}
	111	qMatrix[i][i]=-sum;
	112	}
	113	// check:
	114	//LOG(5,<<"\n\n\n ===================================== \n");
	115	//int a1,a2;
	116	//for (a1=0;a1<4;++a1){
	117	// for (a2=0;a2<4;++a2){
	118	// LOG(5,<<qMatrix[a1][a2]<<"\t");
	119	// }
	120	// LOG(5,<<endl);
	121	//}
	122
	123	if (_globalV == true)
	124	normalizeQ(qMatrix,_freq);
	125
	126	//LOG(5,<<"\n\n\n ===================================== \n");
	127	//LOG(5,<<endl<<endl);
	128	//for (a1=0;a1<4;++a1){
	129	// for (a2=0;a2<4;++a2){
	130	// LOG(5,<<qMatrix[a1][a2]<<"\t");
	131	// }
	132	// LOG(5,<<endl);
	133	//}
	134
	135	// updating _q2Pt;
	136	_Q = qMatrix;
	137	_q2pt.fillFromRateMatrix(_freq,qMatrix);
	138	}
	139
	140
	141	*/
	142
	143

+56

-0

libs/phylogeny/goldmanYangModel.h less more

	0	// $Id: goldmanYangModel.h 1841 2007-03-11 15:19:14Z adist $
	1
	2	#ifndef ___GOLDMAN_YANG_MODEL
	3	#define ___GOLDMAN_YANG_MODEL
	4
	5	#include "definitions.h"
	6	#include "replacementModel.h"
	7	#include "fromQtoPt.h"
	8	#include "granthamChemicalDistances.h"
	9	#include "codon.h"
	10
	11	class goldmanYangModel : public replacementModel {
	12	public:
	13	explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const bool globalV=true);
	14	explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const Vdouble& freq,const bool globalV=true);
	15	virtual replacementModel* clone() const { return new goldmanYangModel(*this); }
	16	const int alphabetSize() const {return _codonAlph.size();}
	17	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	18	return _q2pt.Pij_t(i,j,d);
	19	}
	20	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	21	return _q2pt.dPij_dt(i,j,d);
	22	}
	23	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	24	return _q2pt.d2Pij_dt2(i,j,d);
	25	}
	26	const MDOUBLE freq(const int i) const {return _freq[i];};
	27	void setK(const MDOUBLE newK) { _k = newK;updateQ();}
	28	void setV(const MDOUBLE newV) { _v = newV;updateQ();}
	29	void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(_codonAlph.size(),1.0/_codonAlph.size());}
	30
	31	MDOUBLE getK() {return _k;}
	32	MDOUBLE getV() {return _v;}
	33
	34	void setGlobalV(const bool globalV){ _globalV=globalV;}
	35	const granthamChemicalDistances& getGCD(){return _gcd;}
	36	MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];}
	37
	38	VVdouble getQ() const { return _Q;}
	39	Vdouble getFreqs() const {return _freq;}
	40
	41	private:
	42	Vdouble _freq;
	43	MDOUBLE _v; //selection factor.
	44	MDOUBLE _k; // Tr/Tv ratio.
	45	void updateQ();
	46	q2pt _q2pt;
	47	granthamChemicalDistances _gcd;
	48	bool _globalV; //false when compute v per site
	49	VVdouble _Q;
	50	codon & _codonAlph;
	51
	52	};
	53
	54
	55	#endif

+187

-0

libs/phylogeny/granthamChemicalDistances.cpp less more

	0	// $Id: granthamChemicalDistances.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "granthamChemicalDistances.h"
	3	#include <cmath>
	4
	5	granthamChemicalDistances::granthamChemicalDistances() {
	6	for (int i=0; i<20;++i) GranChemDist[i][i]=0;
	7	GranChemDist[0][1]=112; GranChemDist[0][2]=111; GranChemDist[0][3]=126; GranChemDist[0][4]=195; GranChemDist[0][5]=91; GranChemDist[0][6]=107;
	8	GranChemDist[0][7]=60; GranChemDist[0][8]=86; GranChemDist[0][9]=94; GranChemDist[0][10]=96; GranChemDist[0][11]=106; GranChemDist[0][12]=84;
	9	GranChemDist[0][13]=113; GranChemDist[0][14]=27; GranChemDist[0][15]=99; GranChemDist[0][16]=58; GranChemDist[0][17]=148; GranChemDist[0][18]=112;
	10	GranChemDist[0][19]=64;
	11
	12	GranChemDist[1][2]=86; GranChemDist[1][3]=96; GranChemDist[1][4]=180; GranChemDist[1][5]=43; GranChemDist[1][6]=54; GranChemDist[1][7]=125;
	13	GranChemDist[1][8]=29; GranChemDist[1][9]=97; GranChemDist[1][10]=102; GranChemDist[1][11]=26; GranChemDist[1][12]=91; GranChemDist[1][13]=97;
	14	GranChemDist[1][14]=103; GranChemDist[1][15]=110; GranChemDist[1][16]=71; GranChemDist[1][17]=101; GranChemDist[1][18]=77; GranChemDist[1][19]=96;
	15
	16	GranChemDist[2][3]=23; GranChemDist[2][4]=139; GranChemDist[2][5]=46; GranChemDist[2][6]=42; GranChemDist[2][7]=80; GranChemDist[2][8]=68;
	17	GranChemDist[2][9]=149; GranChemDist[2][10]=153; GranChemDist[2][11]=94; GranChemDist[2][12]=142; GranChemDist[2][13]=158; GranChemDist[2][14]=91;
	18	GranChemDist[2][15]=46; GranChemDist[2][16]=65; GranChemDist[2][17]=174; GranChemDist[2][18]=143; GranChemDist[2][19]=133;
	19
	20	GranChemDist[3][4]=154; GranChemDist[3][5]=61; GranChemDist[3][6]=45; GranChemDist[3][7]=94; GranChemDist[3][8]=81;
	21	GranChemDist[3][9]=168; GranChemDist[3][10]=172; GranChemDist[3][11]=101; GranChemDist[3][12]=160; GranChemDist[3][13]=177; GranChemDist[3][14]=108;
	22	GranChemDist[3][15]=65; GranChemDist[3][16]=85; GranChemDist[3][17]=181; GranChemDist[3][18]=160; GranChemDist[3][19]=152;
	23
	24	GranChemDist[4][5]=154; GranChemDist[4][6]=170; GranChemDist[4][7]=159; GranChemDist[4][8]=174;
	25	GranChemDist[4][9]=198; GranChemDist[4][10]=198; GranChemDist[4][11]=202; GranChemDist[4][12]=196; GranChemDist[4][13]=205; GranChemDist[4][14]=169;
	26	GranChemDist[4][15]=112; GranChemDist[4][16]=149; GranChemDist[4][17]=215; GranChemDist[4][18]=194; GranChemDist[4][19]=192;
	27
	28	GranChemDist[5][6]=29; GranChemDist[5][7]=87; GranChemDist[5][8]=24;
	29	GranChemDist[5][9]=109; GranChemDist[5][10]=113; GranChemDist[5][11]=53; GranChemDist[5][12]=101; GranChemDist[5][13]=116; GranChemDist[5][14]=76;
	30	GranChemDist[5][15]=68; GranChemDist[5][16]=42; GranChemDist[5][17]=130; GranChemDist[5][18]=99; GranChemDist[5][19]=96;
	31
	32	GranChemDist[6][7]=98; GranChemDist[6][8]=40;
	33	GranChemDist[6][9]=134; GranChemDist[6][10]=138; GranChemDist[6][11]=56; GranChemDist[6][12]=126; GranChemDist[6][13]=140; GranChemDist[6][14]=93;
	34	GranChemDist[6][15]=80; GranChemDist[6][16]=65; GranChemDist[6][17]=152; GranChemDist[6][18]=122; GranChemDist[6][19]=121;
	35
	36	GranChemDist[7][8]=89;
	37	GranChemDist[7][9]=135; GranChemDist[7][10]=138; GranChemDist[7][11]=127; GranChemDist[7][12]=127; GranChemDist[7][13]=153; GranChemDist[7][14]=42;
	38	GranChemDist[7][15]=56; GranChemDist[7][16]=59; GranChemDist[7][17]=184; GranChemDist[7][18]=147; GranChemDist[7][19]=109;
	39
	40	GranChemDist[8][9]=94; GranChemDist[8][10]=99; GranChemDist[8][11]=32; GranChemDist[8][12]=87; GranChemDist[8][13]=100; GranChemDist[8][14]=77;
	41	GranChemDist[8][15]=89; GranChemDist[8][16]=47; GranChemDist[8][17]=115; GranChemDist[8][18]=83; GranChemDist[8][19]=84;
	42
	43	GranChemDist[9][10]=5; GranChemDist[9][11]=102; GranChemDist[9][12]=10; GranChemDist[9][13]=21; GranChemDist[9][14]=95;
	44	GranChemDist[9][15]=142; GranChemDist[9][16]=89; GranChemDist[9][17]=61; GranChemDist[9][18]=33; GranChemDist[9][19]=29;
	45
	46	GranChemDist[10][11]=107; GranChemDist[10][12]=15; GranChemDist[10][13]=22; GranChemDist[10][14]=98;
	47	GranChemDist[10][15]=145; GranChemDist[10][16]=92; GranChemDist[10][17]=61; GranChemDist[10][18]=36; GranChemDist[10][19]=32;
	48
	49	GranChemDist[11][12]=95; GranChemDist[11][13]=102; GranChemDist[11][14]=103;
	50	GranChemDist[11][15]=121; GranChemDist[11][16]=78; GranChemDist[11][17]=110; GranChemDist[11][18]=85; GranChemDist[11][19]=97;
	51
	52	GranChemDist[12][13]=28; GranChemDist[12][14]=87;
	53	GranChemDist[12][15]=135; GranChemDist[12][16]=81; GranChemDist[12][17]=67; GranChemDist[12][18]=36; GranChemDist[12][19]=21;
	54
	55	GranChemDist[13][14]=114;
	56	GranChemDist[13][15]=155; GranChemDist[13][16]=103; GranChemDist[13][17]=40; GranChemDist[13][18]=22; GranChemDist[13][19]=50;
	57
	58	GranChemDist[14][15]=74; GranChemDist[14][16]=38; GranChemDist[14][17]=147; GranChemDist[14][18]=110; GranChemDist[14][19]=68;
	59
	60	GranChemDist[15][16]=58; GranChemDist[15][17]=177; GranChemDist[15][18]=144; GranChemDist[15][19]=124;
	61
	62	GranChemDist[16][17]=128; GranChemDist[16][18]=92; GranChemDist[16][19]=69;
	63
	64	GranChemDist[17][18]=37; GranChemDist[17][19]=88;
	65
	66	GranChemDist[18][19]=55;
	67
	68
	69	GranPolarityTable[0]=8.1 ; //A
	70	GranPolarityTable[1]=10.5 ; //R
	71	GranPolarityTable[2]=11.6 ; //N
	72	GranPolarityTable[3]=13.0 ; //D
	73	GranPolarityTable[4]=5.5 ; //C
	74	GranPolarityTable[5]=10.5 ; //Q
	75	GranPolarityTable[6]=12.3 ; //E
	76	GranPolarityTable[7]=9.0 ; //G
	77	GranPolarityTable[8]=10.4 ; //H
	78	GranPolarityTable[9]=5.2 ; //I
	79	GranPolarityTable[10]=4.9 ; //L
	80	GranPolarityTable[11]=11.3; //K
	81	GranPolarityTable[12]=5.7 ; //M
	82	GranPolarityTable[13]=5.2 ; //F
	83	GranPolarityTable[14]=8.0 ; //P
	84	GranPolarityTable[15]=9.2 ; //S
	85	GranPolarityTable[16]=8.6 ; //T
	86	GranPolarityTable[17]=5.4 ; //W
	87	GranPolarityTable[18]=6.2 ; //Y
	88	GranPolarityTable[19]=5.9 ; //V
	89
	90	/*
	91	GranVolumeTable[0]=8.1 ; //A
	92	GranVolumeTable[1]=10.5 ; //R
	93	GranVolumeTable[2]=11.6 ; //N
	94	GranVolumeTable[3]=13.0 ; //D
	95	GranVolumeTable[4]=5.5 ; //C
	96	GranVolumeTable[5]=10.5 ; //Q
	97	GranVolumeTable[6]=12.3 ; //E
	98	GranVolumeTable[7]=9.0 ; //G
	99	GranVolumeTable[8]=10.4 ; //H
	100	GranVolumeTable[9]=5.2 ; //I
	101	GranVolumeTable[10]=4.9 ; //L
	102	GranVolumeTable[11]=11.3; //K
	103	GranVolumeTable[12]=5.7 ; //M
	104	GranVolumeTable[13]=5.2 ; //F
	105	GranVolumeTable[14]=8.0 ; //P
	106	GranVolumeTable[15]=9.2 ; //S
	107	GranVolumeTable[16]=8.6 ; //T
	108	GranVolumeTable[17]=5.4 ; //W
	109	GranVolumeTable[18]=6.2 ; //Y
	110	GranVolumeTable[19]=5.9 ; //V
	111	*/
	112	}
	113
	114	MDOUBLE granthamChemicalDistances::getHughesHydrophobicityDistance(
	115	const int aa1,const int aa2) const {
	116	int v1=0;
	117	int v2=0;
	118	if ((aa1==0) \|\| (aa1==4) \|\| (aa1==13) \|\| //acf
	119	(aa1==7) \|\| (aa1==8) \|\| (aa1==9) \|\| //ghi
	120	(aa1==11) \|\| (aa1==10) \|\| (aa1==12) \|\| //klm
	121	(aa1==16) \|\| (aa1==19) \|\| (aa1==17)
	122	\|\| (aa1==18)) //tvwy
	123	v1=1;
	124	if ((aa2==0) \|\| (aa2==4) \|\| (aa2==13) \|\| //acf
	125	(aa2==7) \|\| (aa2==8) \|\| (aa2==9) \|\| //ghi
	126	(aa2==11) \|\| (aa2==10) \|\| (aa2==12) \|\| //klm
	127	(aa2==16) \|\| (aa2==19) \|\| (aa2==17)
	128	\|\| (aa2==18)) //tvwy
	129	v2=1;
	130
	131	if (v1!=v2) return 1;
	132	return 0;
	133	}
	134
	135	MDOUBLE granthamChemicalDistances::getHughesPolarityDistance(
	136	const int aa1,const int aa2) const {
	137	int v1=0;
	138	int v2=0;
	139	if ((aa1==4) \|\| (aa1==3) \|\| (aa1==6) \|\| //cde
	140	(aa1==8) \|\| (aa1==11) \|\| (aa1==2) \|\| //hkn
	141	(aa1==5) \|\| (aa1==1) \|\| (aa1==15) \|\| //qrs
	142	(aa1==16) \|\| (aa1==17) \|\| (aa1==18)) //tyw
	143	v1=1;
	144	if ((aa2==4) \|\| (aa2==3) \|\| (aa2==6) \|\| //cde
	145	(aa2==8) \|\| (aa2==11) \|\| (aa2==2) \|\| //hkn
	146	(aa2==5) \|\| (aa2==1) \|\| (aa2==15) \|\| //qrs
	147	(aa2==16) \|\| (aa2==17) \|\| (aa2==18)) //tyw
	148	v2=1;
	149
	150	if (v1!=v2) return 1;
	151	return 0;
	152	}
	153	MDOUBLE granthamChemicalDistances::getHughesChargeDistance(
	154	const int aa1,const int aa2) const {
	155	int v1=0;
	156	int v2=0;
	157	if ((aa1==8) \|\| (aa1==11) \|\| (aa1==1)) v1=1;
	158	if ( (aa1==3) \|\| (aa1==6)) v1=2;
	159	else v1=3;
	160
	161	if ((aa2==8) \|\| (aa2==11) \|\| (aa2==1)) v2=1;
	162	if ( (aa2==3) \|\| (aa2==6)) v2=2;
	163	else v2=3;
	164
	165	if (v1!=v2) return 1;
	166	return 0;
	167	}
	168
	169
	170
	171	MDOUBLE granthamChemicalDistances::getGranthamDistance(const int aa1, const int aa2) const {
	172	if (aa1>aa2) return GranChemDist[aa2][aa1] ;
	173	else return GranChemDist[aa1][aa2];
	174	}
	175
	176	MDOUBLE granthamChemicalDistances::getGranthamPolarityDistance(const int aa1,const int aa2) const{
	177	return fabs(GranPolarityTable[aa1]-GranPolarityTable[aa2]);
	178	}
	179
	180	MDOUBLE granthamChemicalDistances::getGranthamPolarity(const int aa1) const{
	181	return GranPolarityTable[aa1];
	182	}
	183
	184
	185
	186

+32

-0

libs/phylogeny/granthamChemicalDistances.h less more

	0	// $Id: granthamChemicalDistances.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___GRANTHAM_CHEMICAL_DISTANCES
	3	#define ___GRANTHAM_CHEMICAL_DISTANCES
	4
	5	#include "definitions.h"
	6
	7	class granthamChemicalDistances {
	8	public:
	9	explicit granthamChemicalDistances();
	10	MDOUBLE getGranthamDistance(const int aa1,const int aa2) const ;
	11	MDOUBLE getGranthamPolarityDistance(const int aa1,const int aa2) const;
	12	MDOUBLE getGranthamPolarity(const int aa1) const;
	13	virtual ~granthamChemicalDistances() {}
	14
	15	MDOUBLE getHughesChargeDistance(const int aa1,const int aa2) const;// page 520
	16	MDOUBLE getHughesPolarityDistance(const int aa1,const int aa2) const;// page 520
	17	MDOUBLE getHughesHydrophobicityDistance(const int aa1,const int aa2) const;// page 520
	18
	19
	20	private:
	21
	22	// private members:
	23	MDOUBLE GranChemDist[20][20];
	24	MDOUBLE GranPolarityTable[20];
	25
	26	};
	27
	28
	29	#endif
	30
	31

+210

-0

libs/phylogeny/gtrModel.cpp less more

	0	#include "gtrModel.h"
	1	#include "readDatMatrix.h" // for the normalizeQ function.
	2	#include "matrixUtils.h"
	3
	4	gtrModel::gtrModel(const Vdouble& freq,
	5	const MDOUBLE a2c,
	6	const MDOUBLE a2g,
	7	const MDOUBLE a2t,
	8	const MDOUBLE c2g,
	9	const MDOUBLE c2t,
	10	const MDOUBLE g2t)
	11	:_a2c(a2c),_a2g(a2g),_a2t(a2t),_c2g(c2g),_c2t(c2t),_g2t(g2t),_freq(freq)
	12	{
	13	_Q.resize(alphabetSize());
	14	for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
	15	updateQ(a2c,a2g,a2t,c2g,c2t,g2t);
	16	}
	17
	18
	19	gtrModel& gtrModel::operator=(const gtrModel &other)
	20	{
	21	_Q = other._Q;
	22	_freq = other._freq;
	23	_q2pt = other._q2pt;
	24	_a2c = other._a2c;
	25	_a2g = other._a2g;
	26	_a2t = other._a2t;
	27	_c2g = other._c2g;
	28	_c2t = other._c2t;
	29	_g2t = other._g2t;
	30	return *this;
	31	}
	32
	33	gtrModel::gtrModel(const gtrModel &other)
	34	{
	35	_Q = other._Q;
	36	_freq = other._freq;
	37	_q2pt = other._q2pt;
	38	_a2c = other._a2c;
	39	_a2g = other._a2g;
	40	_a2t = other._a2t;
	41	_c2g = other._c2g;
	42	_c2t = other._c2t;
	43	_g2t = other._g2t;
	44	}
	45
	46	void gtrModel::norm(const MDOUBLE scale)
	47	{
	48	for (int i=0; i < _Q.size(); ++i) {
	49	for (int j=0; j < _Q.size(); ++j) {
	50	_Q[i][j] *= scale;
	51	}
	52	}
	53	}
	54
	55	MDOUBLE gtrModel::sumPijQij(){
	56	MDOUBLE sum=0.0;
	57	for (int i=0; i < _Q.size(); ++i) {
	58	sum -= (_Q[i][i])*_freq[i];
	59	}
	60	return sum;
	61	}
	62
	63	void gtrModel::updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t)
	64	{
	65	_a2c = a2c;
	66	_Q[a][c] = (_a2c);
	67	_Q[c][a] = (_freq[a]*_a2c/_freq[c]);
	68	_a2g = a2g;
	69	_Q[a][g] = (_a2g);
	70	_Q[g][a] = (_freq[a]*_a2g/_freq[g]);
	71	_a2t = a2t;
	72	_Q[a][t] = (_a2t);
	73	_Q[t][a] = (_freq[a]*_a2t/_freq[t]);
	74	_c2g = c2g;
	75	_Q[c][g] = (_c2g);
	76	_Q[g][c] = (_freq[c]*_c2g/_freq[g]);
	77	_c2t = c2t;
	78	_Q[c][t] = (_c2t);
	79	_Q[t][c] = (_freq[c]*_c2t/_freq[t]);
	80	_g2t = g2t;
	81	_Q[g][t] = (_g2t);
	82	_Q[t][g] = (_freq[g]*_g2t/_freq[t]);
	83	_Q[a][a] = -1.0*(_Q[a][c]+_Q[a][g]+_Q[a][t]);
	84	_Q[c][c] = -1.0*(_Q[c][a]+_Q[c][g]+_Q[c][t]);
	85	_Q[g][g] = -1.0*(_Q[g][a]+_Q[g][c]+_Q[g][t]);
	86	_Q[t][t] = -1.0*(_Q[t][a]+_Q[t][c]+_Q[t][g]);
	87	norm(1.0/sumPijQij());
	88	_q2pt.fillFromRateMatrix(_freq,_Q);
	89	}
	90
	91	void gtrModel::set_a2c(const MDOUBLE a2c)
	92	{
	93	_a2c = a2c;
	94	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	95	}
	96
	97	void gtrModel::set_a2g(const MDOUBLE a2g)
	98	{
	99	_a2g = a2g;
	100	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	101	}
	102
	103	void gtrModel::set_a2t(const MDOUBLE a2t)
	104	{
	105	_a2t = a2t;
	106	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	107	}
	108
	109	void gtrModel::set_c2g(const MDOUBLE c2g)
	110	{
	111	_c2g = c2g;
	112	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	113	}
	114
	115	void gtrModel::set_c2t(const MDOUBLE c2t)
	116	{
	117	_c2t = c2t;
	118	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	119	}
	120
	121	void gtrModel::set_g2t(const MDOUBLE g2t)
	122	{
	123	_g2t = g2t;
	124	updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
	125	}
	126
	127	MDOUBLE gtrModel::get_a2c() const
	128	{
	129	MDOUBLE result;
	130	if(_Q.size() < alphabetSize())
	131	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2c");
	132	else{
	133	if((_Q[a].size() < alphabetSize())\|\|(_Q[c].size() < alphabetSize()))
	134	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2c");
	135	else
	136	result = _a2c;
	137	}
	138	return result;
	139	}
	140
	141	MDOUBLE gtrModel::get_a2g() const
	142	{
	143	MDOUBLE result;
	144	if(_Q.size() < alphabetSize())
	145	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2g");
	146	else{
	147	if((_Q[a].size() < alphabetSize())\|\|(_Q[g].size() < alphabetSize()))
	148	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2g");
	149	else
	150	result = _a2g;
	151	}
	152	return result;
	153	}
	154
	155	MDOUBLE gtrModel::get_a2t() const
	156	{
	157	MDOUBLE result;
	158	if(_Q.size() < alphabetSize())
	159	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2t");
	160	else{
	161	if((_Q[a].size() < alphabetSize())\|\|(_Q[t].size() < alphabetSize()))
	162	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2t");
	163	else
	164	result = _a2t;
	165	}
	166	return result;
	167	}
	168
	169	MDOUBLE gtrModel::get_c2g() const
	170	{
	171	MDOUBLE result;
	172	if(_Q.size() < alphabetSize())
	173	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2g");
	174	else{
	175	if((_Q[c].size() < alphabetSize())\|\|(_Q[g].size() < alphabetSize()))
	176	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2g");
	177	else
	178	result = _c2g;
	179	}
	180	return result;
	181	}
	182
	183	MDOUBLE gtrModel::get_c2t() const
	184	{
	185	MDOUBLE result;
	186	if(_Q.size() < alphabetSize())
	187	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2t");
	188	else{
	189	if((_Q[c].size() < alphabetSize())\|\|(_Q[t].size() < alphabetSize()))
	190	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2t");
	191	else
	192	result = _c2t;
	193	}
	194	return result;
	195	}
	196
	197	MDOUBLE gtrModel::get_g2t() const
	198	{
	199	MDOUBLE result;
	200	if(_Q.size() < alphabetSize())
	201	errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_g2t");
	202	else{
	203	if((_Q[g].size() < alphabetSize())\|\|(_Q[t].size() < alphabetSize()))
	204	errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_g2t");
	205	else
	206	result = _g2t;
	207	}
	208	return result;
	209	}

+62

-0

libs/phylogeny/gtrModel.h less more

	0	#ifndef _GTR_MODEL
	1	#define _GTR_MODEL
	2
	3	#include "replacementModel.h"
	4	#include "fromQtoPt.h"
	5
	6	class gtrModel : public replacementModel {
	7	public:
	8	enum modelElements {a = 0,c,g,t};
	9	explicit gtrModel(const Vdouble& freq,
	10	const MDOUBLE a2c = 0.25,
	11	const MDOUBLE a2g = 0.25,
	12	const MDOUBLE a2t = 0.25,
	13	const MDOUBLE c2g = 0.25,
	14	const MDOUBLE c2t = 0.25,
	15	const MDOUBLE g2t = 0.25);
	16	virtual replacementModel* clone() const { return new gtrModel(*this); }
	17	virtual gtrModel& operator=(const gtrModel &other);
	18	explicit gtrModel(const gtrModel &other);
	19	const int alphabetSize() const {return _freq.size();}
	20	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _q2pt.Pij_t(i,j,d);}
	21	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _q2pt.dPij_dt(i,j,d);}
	22	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _q2pt.d2Pij_dt2(i,j,d);}
	23	const MDOUBLE freq(const int i) const {return _freq[i];};
	24	void set_a2c(const MDOUBLE a2c);
	25	void set_a2g(const MDOUBLE a2g);
	26	void set_a2t(const MDOUBLE a2t);
	27	void set_c2g(const MDOUBLE c2g);
	28	void set_c2t(const MDOUBLE c2t);
	29	void set_g2t(const MDOUBLE g2t);
	30	MDOUBLE get_a2c() const;
	31	MDOUBLE get_a2g() const;
	32	MDOUBLE get_a2t() const;
	33	MDOUBLE get_c2g() const;
	34	MDOUBLE get_c2t() const;
	35	MDOUBLE get_g2t() const;
	36	const VVdouble& getQ() const {return _Q;}
	37
	38
	39	private:
	40	void updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t);
	41	void norm(const MDOUBLE scale);
	42	MDOUBLE sumPijQij();
	43
	44	private:
	45	VVdouble _Q;
	46	Vdouble _freq;
	47	q2pt _q2pt;
	48	MDOUBLE _a2c;
	49	MDOUBLE _a2g;
	50	MDOUBLE _a2t;
	51	MDOUBLE _c2g;
	52	MDOUBLE _c2t;
	53	MDOUBLE _g2t;
	54	};
	55	#endif
	56
	57
	58
	59
	60
	61

+593

-0

libs/phylogeny/hky.cpp less more

	0	// $Id: hky.cpp 4291 2008-06-23 10:23:10Z itaymay $
	1
	2	#include "hky.h"
	3	#include "errorMsg.h"
	4
	5	hky::hky(const MDOUBLE inProb_a,
	6	const MDOUBLE inProb_c,
	7	const MDOUBLE inProb_g,
	8	const MDOUBLE inProb_t,
	9	const MDOUBLE TrTv) {
	10	_freq.resize(4);
	11	_freq[0] = inProb_a; _freq[1] = inProb_c;
	12	_freq[2] = inProb_g; _freq[3] = inProb_t;
	13	initParams(TrTv);
	14	}
	15
	16
	17	hky::hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv) : _freq(inProbs)
	18	{
	19	if (inProbs.size()!=4)
	20	errorMsg::reportError("hky::hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv) : the size of inProbs is not 4");
	21	initParams(TrTv);
	22	}
	23
	24	void hky::initParams(MDOUBLE TrTv) // init _a, _b, _c, and _y by using _freq and TrTv
	25	{
	26	MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta.
	27	// In k2p Tr/Tv = alpha / 2*beta.
	28
	29	_c = 2(_freq[0]_freq[2]+_freq[3]*_freq[1]);
	30	_y = 2(_freq[0]+_freq[2])(_freq[1]+_freq[3]);
	31	// c_a + y_b = 1;
	32	//_a/_b = k;
	33	_b = 1.0 / (_c*In_k+_y);
	34	_a = _b*In_k;
	35	}
	36
	37	void hky::changeTrTv(const MDOUBLE TrTv){
	38	MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta.
	39	// In k2p Tr/Tv = alpha / 2*beta.
	40	_b = 1.0 / (_c*In_k+_y);
	41	_a = _b*In_k;
	42	}
	43
	44	MDOUBLE hky::getTrTv() const {
	45	return (_a/(2.0*_b));
	46	}
	47
	48	const MDOUBLE hky::Pij_t(const int i, const int j, const MDOUBLE t) const {
	49	const MDOUBLE &pa = _freq[0];
	50	const MDOUBLE &pc = _freq[1];
	51	const MDOUBLE &pg = _freq[2];
	52	const MDOUBLE &pt = _freq[3];
	53	const MDOUBLE py = pc+pt;
	54	const MDOUBLE pr = pa+pg;
	55
	56	const MDOUBLE &b = _b;
	57	const MDOUBLE &a = _a;
	58	const MDOUBLE lamda3 = -(pyb+pra);
	59	const MDOUBLE lamda4 = -(pya+prb);
	60
	61	MDOUBLE term1=0.0;
	62	MDOUBLE term2=0.0;
	63	MDOUBLE term3=0.0;
	64	MDOUBLE termAll=0.0;
	65	switch (i) {
	66	case 0:
	67	switch (j) {
	68	case 0:
	69	term1 = pa;
	70	term2 = exp(-bt)(py)*pa/pr;
	71	term3 = pgexp(tlamda3)/pr;
	72	termAll = term1 + term2+term3;
	73	return termAll;
	74
	75	break;
	76	case 1:
	77	termAll = pc - exp(-bt)pc;
	78	return termAll;
	79
	80	break;
	81	case 2:
	82	term1 = pg;
	83	term2 = exp(-bt)py*pg/pr;
	84	term3 = -pgexp(tlamda3)/pr;
	85	termAll = term1 + term2+term3;
	86	return termAll;
	87
	88	break;
	89	case 3:
	90	termAll = pt - exp(-bt)pt;
	91	return termAll;
	92
	93	break;
	94	}
	95	break;
	96
	97	case 1:
	98	switch (j) {
	99	case 0:
	100	termAll = pa - exp(-bt)pa;
	101	return termAll;
	102	break;
	103	case 1:
	104	term1 = pc;
	105	term2 = exp(-bt)pr*pc/py;
	106	term3 = ptexp(tlamda4)/py;
	107	termAll = term1 + term2+term3;
	108	return termAll;
	109
	110
	111	break;
	112	case 2:
	113	termAll = pg - exp(-bt)pg;
	114	return termAll;
	115	break;
	116
	117	case 3:
	118	term1 = pt;
	119	term2 = exp(-bt)pr*pt/py;
	120	term3 = -ptexp(tlamda4)/py;
	121	termAll = term1 + term2 + term3;
	122	return termAll;
	123
	124	break;
	125	}
	126	break;
	127
	128	case 2:
	129	switch (j) {
	130	case 0:
	131	term1 = pa;
	132	term2 = exp(-bt)py*pa/pr;
	133	term3 = -paexp(tlamda3)/pr;
	134	termAll = term1 + term2+term3;
	135
	136	return termAll;
	137	break;
	138	case 1:
	139	termAll = pc - exp(-bt)pc;
	140	return termAll;
	141	break;
	142	case 2:
	143	term1 = pg;
	144	term2 = exp(-bt)py*pg/pr;
	145	term3 = paexp(tlamda3)/pr;
	146	termAll = term1 + term2 + term3;
	147
	148	return termAll;
	149	break;
	150
	151	case 3:
	152	termAll = pt - exp(-bt)pt;
	153
	154	return termAll;
	155	break;
	156	}
	157	break;
	158	case 3:
	159	switch (j) {
	160	case 0:
	161	termAll = pa - exp(-bt)pa;
	162	return termAll;
	163	break;
	164	case 1:
	165	term1 = pc;
	166	term2 = exp(-bt)pr*pc/py;
	167	term3 = -pcexp(tlamda4)/py;
	168	termAll = term1 + term2+term3;
	169	return termAll;
	170
	171
	172	break;
	173	case 2:
	174	termAll = pg - exp(-bt)pg;
	175	return termAll;
	176	break;
	177
	178	case 3:
	179	term1 = pt;
	180	term2 = exp(-bt)(pr)*pt/(py);
	181	term3 = pcexp(tlamda4)/(py);
	182	termAll = term1 + term2 + term3;
	183	return termAll;
	184
	185	break;
	186	}
	187	break;
	188
	189	}
	190	return -1;
	191	}
	192
	193	const MDOUBLE hky::dPij_dt(const int i,const int j, const MDOUBLE t) const {
	194	const MDOUBLE &pa = _freq[0];
	195	const MDOUBLE &pc = _freq[1];
	196	const MDOUBLE &pg = _freq[2];
	197	const MDOUBLE &pt = _freq[3];
	198	const MDOUBLE py = pc+pt;
	199	const MDOUBLE pr = pa+pg;
	200
	201	const MDOUBLE &b = _b;
	202	const MDOUBLE &a = _a;
	203	const MDOUBLE lamda3 = -(pyb+pra);
	204	const MDOUBLE lamda4 = -(pya+prb);
	205
	206	MDOUBLE term1, term2, term3,termAll;
	207
	208	switch (i) {
	209	case 0:
	210	switch (j) {
	211	case 0://ok
	212	term1 = 0;
	213	term2 = exp(-bt)(py)*pa/pr;
	214	term2 *= -b;
	215	term3 = pgexp(tlamda3)/pr;
	216	term3*= lamda3;
	217	termAll = term1 + term2+term3;
	218	return termAll;
	219
	220	break;
	221	case 1://ok
	222	termAll = b* exp(-bt)pc;
	223	return termAll;
	224
	225	break;
	226	case 2://ok
	227	term1 = 0;
	228	term2 = (-b)exp(-bt)pypg/pr;
	229	term3 = -pgexp(tlamda3)/pr;
	230	term3*=lamda3;
	231	termAll = term1 + term2+term3;
	232	return termAll;
	233
	234	break;
	235	case 3://ok
	236	termAll = bexp(-bt)*pt;
	237	return termAll;
	238
	239	break;
	240	}
	241	break;
	242
	243	case 1:
	244	switch (j) {
	245	case 0://ok
	246	termAll = bexp(-bt)*pa;
	247	return termAll;
	248	break;
	249	case 1://ok
	250	term1 = 0;
	251	term2 = (-b)exp(-bt)prpc/py;
	252	term3 = lamda4ptexp(t*lamda4)/py;
	253	termAll = term1 + term2+term3;
	254	return termAll;
	255	break;
	256	case 2://ok
	257	termAll = bexp(-bt)*pg;
	258	return termAll;
	259	break;
	260	case 3://ok
	261	term1 = 0;
	262	term2 = (-b)exp(-bt)prpt/py;
	263	term3 = (lamda4)(-pt)exp(t*lamda4)/py;
	264	termAll = term1 + term2 + term3;
	265	return termAll;
	266	break;
	267	}
	268	break;
	269	case 2:
	270	switch (j) {
	271	case 0://ok
	272	term1 = 0;
	273	term2 = (-b)exp(-bt)pypa/pr;
	274	term3 = lamda3(-pa)exp(t*lamda3)/pr;
	275	termAll = term1 + term2+term3;
	276	return termAll;
	277	break;
	278	case 1://ok
	279	termAll = bexp(-bt)*pc;
	280	return termAll;
	281	break;
	282	case 2://ok
	283	term1 = 0;
	284	term2 = (-b)exp(-bt)pypg/pr;
	285	term3 = lamda3paexp(t*lamda3)/pr;
	286	termAll = term1 + term2 + term3;
	287	return termAll;
	288	break;
	289	case 3://ok
	290	termAll = bexp(-bt)*pt;
	291	return termAll;
	292	break;
	293	}
	294	break;
	295	case 3:
	296	switch (j) {
	297	case 0://ok
	298	termAll = bexp(-bt)*pa;
	299	return termAll;
	300	break;
	301	case 1://ok
	302	term1 = 0;
	303	term2 = (-b)exp(-bt)prpc/py;
	304	term3 = lamda4(-pc)exp(t*lamda4)/py;
	305	termAll = term1 + term2+term3;
	306	return termAll;
	307	break;
	308	case 2://ok
	309	termAll = b* exp(-bt)pg;
	310	return termAll;
	311	break;
	312	case 3://ok
	313	term1 = 0;
	314	term2 = (-b)exp(-bt)(pr)pt/(py);
	315	term3 = (lamda4)pcexp(t*lamda4)/(py);
	316	termAll = term1 + term2 + term3;
	317	return termAll;
	318	break;
	319	}
	320	break;
	321	}
	322	return -1;
	323	}
	324
	325	const MDOUBLE hky::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
	326	const MDOUBLE &pa = _freq[0];
	327	const MDOUBLE &pc = _freq[1];
	328	const MDOUBLE &pg = _freq[2];
	329	const MDOUBLE &pt = _freq[3];
	330	const MDOUBLE py = pc+pt;
	331	const MDOUBLE pr = pa+pg;
	332
	333	const MDOUBLE &b = _b;
	334	const MDOUBLE &a = _a;
	335	const MDOUBLE lamda3 = -(pyb+pra);
	336	const MDOUBLE lamda4 = -(pya+prb);
	337
	338	MDOUBLE term1, term2, term3,termAll;
	339
	340	switch (i) {
	341	case 0:
	342	switch (j) {
	343	case 0://ok2
	344	term1 = 0;
	345	term2 = bbexp(-bt)(py)*pa/pr;
	346	term3 = lamda3lamda3pgexp(tlamda3)/pr;
	347	termAll = term1 + term2+term3;
	348	return termAll;
	349
	350	break;
	351	case 1://ok2
	352	termAll = -bb exp(-bt)pc;
	353	return termAll;
	354	break;
	355	case 2://ok2
	356	term1 = 0;
	357	term2 = bbexp(-bt)py*pg/pr;
	358	term3 = lamda3lamda3(-pg)exp(tlamda3)/pr;
	359	termAll = term1 + term2+term3;
	360	return termAll;
	361	break;
	362	case 3://ok2
	363	termAll = -bbexp(-bt)pt;
	364	return termAll;
	365	break;
	366	}
	367	break;
	368	case 1:
	369	switch (j) {
	370	case 0://ok2
	371	termAll = -bbexp(-bt)pa;
	372	return termAll;
	373	break;
	374	case 1://ok2
	375	term1 = 0;
	376	term2 = bbexp(-bt)pr*pc/py;
	377	term3 = lamda4lamda4ptexp(tlamda4)/py;
	378	termAll = term1 + term2+term3;
	379	return termAll;
	380	break;
	381	case 2://ok2
	382	termAll = -bbexp(-bt)pg;
	383	return termAll;
	384	break;
	385	case 3://ok2
	386	term1 = 0;
	387	term2 = bbexp(-bt)pr*pt/py;
	388	term3 = lamda4lamda4(-pt)exp(tlamda4)/py;
	389	termAll = term1 + term2 + term3;
	390	return termAll;
	391	break;
	392	}
	393	break;
	394	case 2:
	395	switch (j) {
	396	case 0://ok2
	397	term1 = 0;
	398	term2 = bbexp(-bt)py*pa/pr;
	399	term3 = lamda3lamda3(-pa)exp(tlamda3)/pr;
	400	termAll = term1 + term2+term3;
	401	return termAll;
	402	break;
	403	case 1://ok2
	404	termAll = -bbexp(-bt)pc;
	405	return termAll;
	406	break;
	407	case 2://ok2
	408	term1 = 0;
	409	term2 = bbexp(-bt)py*pg/pr;
	410	term3 = lamda3lamda3paexp(tlamda3)/pr;
	411	termAll = term1 + term2 + term3;
	412	return termAll;
	413	break;
	414	case 3://ok2
	415	termAll = -bbexp(-bt)pt;
	416	return termAll;
	417	break;
	418	}
	419	break;
	420	case 3:
	421	switch (j) {
	422	case 0://ok2
	423	termAll = -bbexp(-bt)pa;
	424	return termAll;
	425	break;
	426	case 1://ok2
	427	term1 = 0;
	428	term2 = bbexp(-bt)pr*pc/py;
	429	term3 = lamda4lamda4(-pc)exp(tlamda4)/py;
	430	termAll = term1 + term2+term3;
	431	return termAll;
	432	break;
	433	case 2://ok2
	434	termAll = -bb exp(-bt)pg;
	435	return termAll;
	436	break;
	437	case 3://ok2
	438	term1 = 0;
	439	term2 = bbexp(-bt)(pr)*pt/(py);
	440	term3 = lamda4lamda4pcexp(tlamda4)/(py);
	441	termAll = term1 + term2 + term3;
	442	return termAll;
	443	break;
	444	}
	445	break;
	446	}
	447	return -1;
	448	}
	449
	450	const MDOUBLE hky::dPij_tdBeta(const int i, const int j, const MDOUBLE t) const {
	451	const MDOUBLE &pa = _freq[0];
	452	const MDOUBLE &pc = _freq[1];
	453	const MDOUBLE &pg = _freq[2];
	454	const MDOUBLE &pt = _freq[3];
	455	const MDOUBLE &py = pc+pt;
	456	const MDOUBLE &pr = pa+pg;
	457
	458	const MDOUBLE &b = _b;
	459	const MDOUBLE &a = _a;
	460	const MDOUBLE &lamda3 = -(pyb+pra);
	461	const MDOUBLE &lamda4 = -(pya+prb);
	462
	463	MDOUBLE term2, term3,termAll;
	464
	465	const MDOUBLE& dlamda3= -py+_y*pr/_c;
	466	const MDOUBLE& dlamda4= -pr+_y*py/_c;
	467
	468	switch (i) {
	469
	470	case 0:
	471	switch (j) {
	472	case 0:
	473	term2 = (-t)exp(-bt)(py)pa/pr;
	474	term3 = tdlamda3pgexp(tlamda3)/pr;
	475	termAll = term2+term3;
	476	return termAll;
	477
	478	break;
	479	case 1:
	480	termAll = t* exp(-bt)pc;
	481	return termAll;
	482
	483	break;
	484	case 2:
	485	term2 = (-t)exp(-bt)pypg/pr;
	486	term3 = tdlamda3(-pg)exp(tlamda3)/pr;
	487	termAll = term2+term3;
	488	return termAll;
	489
	490	break;
	491	case 3:
	492	termAll = t* exp(-bt)pt;
	493	return termAll;
	494
	495	break;
	496	}
	497	break;
	498
	499	case 1:
	500	switch (j) {
	501	case 0:
	502	termAll = t* exp(-bt)pa;
	503	return termAll;
	504	break;
	505	case 1:
	506	term2 = (-t)exp(-bt)prpc/py;
	507	term3 = tdlamda4ptexp(tlamda4)/py;
	508	termAll = term2+term3;
	509	return termAll;
	510
	511
	512	break;
	513	case 2:
	514	termAll = t* exp(-bt)pg;
	515	return termAll;
	516	break;
	517
	518	case 3:
	519	term2 = (-t)exp(-bt)prpt/py;
	520	term3 = tdlamda4(-pt)exp(tlamda4)/py;
	521	termAll = term2 + term3;
	522	return termAll;
	523
	524	break;
	525	}
	526	break;
	527
	528	case 2:
	529	switch (j) {
	530	case 0:
	531	term2 = (-t)exp(-bt)pypa/pr;
	532	term3 = tdlamda3(-pa)exp(tlamda3)/pr;
	533	termAll = term2+term3;
	534
	535	return termAll;
	536	break;
	537	case 1:
	538	termAll = texp(-bt)*pc;
	539	return termAll;
	540	break;
	541	case 2:
	542	term2 = (-t)exp(-bt)pypg/pr;
	543	term3 = tdlamda3paexp(tlamda3)/pr;
	544	termAll = term2 + term3;
	545
	546	return termAll;
	547	break;
	548
	549	case 3:
	550	termAll = t* exp(-bt)pt;
	551	return termAll;
	552	break;
	553	}
	554	break;
	555	case 3:
	556	switch (j) {
	557	case 0:
	558	termAll = t* exp(-bt)pa;
	559	return termAll;
	560	break;
	561	case 1:
	562	term2 = (-t)exp(-bt)prpc/py;
	563	term3 = tdlamda4(-pc)exp(tlamda4)/py;
	564	termAll = term2+term3;
	565	return termAll;
	566
	567
	568	break;
	569	case 2:
	570	termAll = t* exp(-bt)pg;
	571	return termAll;
	572	break;
	573
	574	case 3:
	575	term2 = (-t)exp(-bt)(pr)pt/(py);
	576	term3 = tdlamda4pcexp(tlamda4)/(py);
	577	termAll = term2 + term3;
	578	return termAll;
	579
	580	break;
	581	}
	582	break;
	583
	584	}
	585	return -1;
	586	}
	587
	588	//Q[0][1] = freq[1]_b ; Q[0][2] = freq[2]_a ; Q[0][3] = freq[3]*_b;
	589	//Q[1][0] = freq[0]_b; ; Q[1][2] = freq[2]_b ; Q[1][3] = freq[3]*_a;
	590	//Q[2][0] = freq[0]_a; Q[2][1] = freq[1]_b ; ; Q[2][3] = freq[3]*_b;
	591	//Q[3][0] = freq[0]_b; Q[3][1] = freq[1]_a ; Q[3][2] = freq[2]*_b;
	592

+46

-0

libs/phylogeny/hky.h less more

	0	// $Id: hky.h 4291 2008-06-23 10:23:10Z itaymay $
	1
	2	#ifndef ___HKY
	3	#define ___HKY
	4
	5	#include "replacementModel.h"
	6	#include <cmath>
	7
	8	class hky : public replacementModel {
	9	public:
	10	explicit hky(const MDOUBLE inProb_a,
	11	const MDOUBLE inProb_c,
	12	const MDOUBLE inProb_g,
	13	const MDOUBLE inProb_t,
	14	const MDOUBLE TrTv);
	15
	16	explicit hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv);
	17
	18	virtual replacementModel* clone() const { return new hky(*this); }
	19	// virtual nucJC* clone() const { return new nucJC(*this); } // see note down:
	20
	21	const int alphabetSize() const {return 4;}
	22
	23
	24	void changeTrTv(const MDOUBLE In_TrTv);
	25	MDOUBLE getTrTv() const;
	26	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
	27	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
	28	const MDOUBLE freq(const int i) const {return _freq[i];};
	29	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
	30
	31	const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const;
	32
	33	private:
	34	void initParams(MDOUBLE TrTv); // init _a, _b, _c, and _y by using _freq and TrTv
	35
	36	private:
	37	Vdouble _freq;
	38	MDOUBLE _a; //
	39	MDOUBLE _b; //
	40
	41	MDOUBLE _c,_y; // relationship between probA, probC, prob G, prob T.
	42	};
	43
	44	#endif
	45

+58

-0

libs/phylogeny/indel.cpp less more

	0	// $Id: indel.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "indel.h"
	3
	4	indel::indel() {}
	5
	6	int indel::fromChar(const char s) const{
	7	switch (s) {
	8	case 'x' : case'X' : return 0; break;
	9	case '-' : case'_' : return 1; break;
	10	default:
	11	vector<string> err;
	12	err.push_back(" The indel sequences contained the character: ");
	13	err[0]+=s;
	14	err.push_back(" Indel was not one of the following: ");
	15	err.push_back(" -, X");
	16	err.push_back(" _, x");
	17	errorMsg::reportError(err);
	18	}// end of switch
	19	return -99; // never suppose to be here.
	20	}// end of function
	21
	22	vector<int> indel::fromString(const string &str) const {
	23	vector<int> vec;
	24	for (int i=0;i<str.size();i++)
	25	vec.push_back(fromChar(str[i]));
	26	return vec;
	27	}
	28
	29	string indel::fromInt(const int in_id) const{
	30	char res = 0;
	31	switch (in_id) {
	32	case 0 : res = 'X' ; break;
	33	case 1 : res = '-' ; break;
	34	default:
	35	vector<string> err;
	36	err.push_back("unable to print indel_id. indel_id was not one of the following: ");
	37	err.push_back("X, -");
	38	err.push_back("x, _");
	39	errorMsg::reportError(err);
	40	}//end of switch
	41	string vRes;
	42	vRes.append(1,res);
	43	return vRes;
	44	}// end of function
	45
	46	// There are no relations here.
	47	int indel::relations(const int charInSeq, const int charToCheck) const{
	48	if (charInSeq == charToCheck)
	49	return 1;
	50	return 0;
	51	}
	52
	53	int indel::fromChar(const string& str, const int pos) const{
	54	return fromChar(str[pos]);
	55	}
	56
	57

+28

-0

libs/phylogeny/indel.h less more

	0	// $Id: indel.h 1901 2007-03-15 13:21:06Z nimrodru $
	1	#ifndef ____INDEL
	2	#define ____INDEL
	3
	4	#include "definitions.h"
	5	#include "errorMsg.h"
	6	#include "alphabet.h"
	7
	8
	9	class indel : public alphabet {
	10	public:
	11	explicit indel();
	12	virtual ~indel() {}
	13	virtual alphabet* clone() const { return new indel(*this); }
	14	int unknown() const {return -2;}
	15	int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
	16	int size() const {return 2;}
	17	int stringSize() const {return 1;} // one letter code.
	18	int relations(const int charInSeq, const int charToCheck) const;
	19	int fromChar(const string& str, const int pos) const;
	20	int fromChar(const char s) const;
	21	string fromInt(const int in_id) const;
	22	vector<int> fromString(const string& str) const;
	23	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	24
	25	};//end of class
	26
	27	#endif

+15

-0

libs/phylogeny/indelModel.cpp less more

	0	// $Id: indelModel.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "indelModel.h"
	2
	3
	4	void indelModel::setFreqX(const MDOUBLE freq_x)
	5	{
	6	_freq[0] =freq_x ;
	7	_alpha = 1/(2_freq[0]_freq[1]) ;
	8	}
	9
	10	void indelModel::setFreqG(const MDOUBLE freq_g)
	11	{
	12	_freq[0] =freq_g ;
	13	_alpha = 1/(2_freq[0]_freq[1]) ;
	14	}

+61

-0

libs/phylogeny/indelModel.h less more

	0	// $Id: indelModel.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___INDEL_MODEL
	2	#define ___INDEL_MODEL
	3
	4	#include "replacementModel.h"
	5	#include <cmath>
	6	using namespace std;
	7
	8	class indelModel : public replacementModel
	9	{
	10	public:
	11	explicit indelModel(const MDOUBLE freq_x, const MDOUBLE freq_g)
	12	{
	13	_alpha = 1/(2freq_xfreq_g);
	14	_freq.push_back(freq_x);
	15	_freq.push_back(freq_g);
	16	}
	17
	18	virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const
	19	{
	20	if (i==j)
	21	return exp(-t*_alpha);
	22	return (1-exp(-t*_alpha));
	23	}
	24
	25	virtual const MDOUBLE freq(const int i) const { return _freq[i];}
	26
	27	virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const
	28	{
	29	// [e^(-t/2PxPg)] / 2PxPg
	30	return (exp(-t_alpha)_alpha);
	31	}
	32	virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const
	33	{
	34	// [-e^(-t/2PxPg)] / [(2PxPg)^2]
	35	return ( -exp(-t_alpha) _alpha * _alpha);
	36	}
	37
	38	virtual replacementModel* clone() const { return new indelModel(*this);}
	39
	40	virtual const int alphabetSize() const {return 2;};
	41
	42
	43	void setFreqX(const MDOUBLE freq_x);
	44	void setFreqG(const MDOUBLE freq_g);
	45
	46
	47	private:
	48	Vdouble _freq; // [0] X [1] -
	49	// save _alpha to make things faster. _alpha depends on _freq
	50	MDOUBLE _alpha;
	51	};
	52
	53
	54	#endif
	55
	56
	57
	58
	59
	60

+62

-0

libs/phylogeny/integerAlphabet.cpp less more

	0	#include "integerAlphabet.h"
	1	#include "logFile.h"
	2	#include "someUtil.h"
	3	#include <cctype>
	4	#include <cstdlib>
	5
	6	//return -99 if not succeeds.
	7	int integerAlphabet::fromChar(const string& s, const int pos) const {
	8	if (s.size() <= (pos + stringSize()-1)) {
	9	string textToPrint("integerAlphabet::fromChar: Trying to read a character past the end of the string. ");
	10	LOG(1,<<textToPrint<<endl);
	11	return -99;
	12	}
	13
	14	string s_sub=s.substr(pos,stringSize());
	15	int leftMostDigit(0);
	16	// find the left most digit. (s_sub can contain for example "0032" and so the left most digit is '3' and the number that should be returned is 32.
	17	for (leftMostDigit=0; leftMostDigit < s_sub.size(); ++leftMostDigit) {
	18	if (s_sub[leftMostDigit]!='0')
	19	break;
	20	}
	21	s_sub =s_sub.substr(leftMostDigit);
	22
	23	return (atoi(s_sub.c_str()));
	24	}
	25
	26	vector<int> integerAlphabet::fromString(const string &str) const {
	27	vector<int> vec;
	28	if (str.size()%stringSize()!=0) {
	29	errorMsg::reportError("error in integerAlphabet::fromString. String length should be a multiplication of stringSize");
	30	}
	31	for (int i=0;i<str.size();i+=stringSize())
	32	vec.push_back(fromChar(str,i));
	33	return vec;
	34	}
	35
	36
	37	int integerAlphabet::stringSize() const {
	38	int countDigits(1);
	39	int wholeNum = _size/10;
	40	while (wholeNum > 0) {
	41	countDigits++;
	42	wholeNum /=10;
	43	}
	44	return (countDigits);
	45	}
	46
	47
	48	string integerAlphabet::fromInt(const int in_id) const{
	49
	50	string res = int2string(in_id);
	51	while (res.size() <= stringSize()) {
	52	}
	53	return res;
	54	}
	55
	56	// There are no relations here.
	57	int integerAlphabet::relations(const int charInSeq, const int charToCheck) const{
	58	if (charInSeq == charToCheck)
	59	return 1;
	60	return 0;
	61	}

+29

-0

libs/phylogeny/integerAlphabet.h less more

	0	#ifndef ___INTEGER_ALPH
	1	#define ___INTEGER_ALPH
	2
	3	#include "alphabet.h"
	4	#include "errorMsg.h"
	5
	6
	7	class integerAlphabet : public alphabet {
	8	public:
	9	explicit integerAlphabet(int size): _size(size){};
	10	virtual ~integerAlphabet() {}
	11	virtual alphabet* clone() const { return new integerAlphabet(*this); }
	12	int unknown() const {return -2;}
	13	int gap() const {errorMsg::reportError("The method integerAlphabet::gap() is used"); return -1;}
	14	int size() const {return _size;}
	15	int stringSize() const; // one letter code.
	16	int relations(const int charInSeq, const int charToCheck) const;
	17	int fromChar(const string& str, const int pos) const;
	18	int fromChar(const char s) const;
	19	string fromInt(const int in_id) const;
	20	vector<int> fromString(const string& str) const;
	21	bool isSpecific(const int id) const {return true;}
	22
	23	private:
	24	int _size;
	25
	26	};
	27
	28	#endif

+141

-0

libs/phylogeny/jcDistance.h less more

	0	// $Id: jcDistance.h 1928 2007-04-04 16:46:12Z privmane $
	1
	2	#ifndef ___JC_DISTANCE
	3	#define ___JC_DISTANCE
	4
	5	#include "definitions.h"
	6	#include "distanceMethod.h"
	7	#include <typeinfo>
	8	#include <cmath>
	9	/*********************************************************
	10	Jukes-Cantor distance method.
	11	Assumes no constraints on replacement from one state to another.
	12	Receives size of alphabet in constructor, and this enables
	13	to have one class for JC-distance for nucleotides, a.a., and codons
	14	Weights are an input vector for giving additional weight to positions in the sequences.
	15	*******************************************************/
	16	class jcDistance : public distanceMethod {
	17
	18	public:
	19	explicit jcDistance() {}
	20	virtual jcDistance* clone() const{ return new jcDistance(*this);}
	21
	22	const MDOUBLE giveDistance( const sequence& s1,
	23	const sequence& s2,
	24	const vector<MDOUBLE> * weights,
	25	MDOUBLE* score=NULL) const {//score is not used here
	26
	27	if (typeid(s1.getAlphabet()) != typeid(s2.getAlphabet()))
	28	errorMsg::reportError("Error in jcDistance::giveDistance, s1 and s2 contain different type of alphabet");
	29
	30	// pS1Base and pS2Base are references to s1 and s2 respectively.
	31	// The method uses seq1 and seq2 and not s1 and s2, because when
	32	// the sequences contain mulAlphabet we must first convert them to the base alphabet
	33	const sequence* pS1Base(&s1);
	34	const sequence* pS2Base(&s2);
	35	const alphabet* alph = s1.getAlphabet();
	36	// if s1 and contains mulAlphabet
	37	const mulAlphabet* mulAlph = dynamic_cast<const mulAlphabet*>(alph);
	38	if (mulAlph!=NULL) {
	39	pS1Base = new sequence(s1,mulAlph->getBaseAlphabet());
	40	pS2Base = new sequence(s2,mulAlph->getBaseAlphabet());
	41	}
	42
	43	int alphabetSize = pS1Base->getAlphabet()->size();
	44
	45	// const MDOUBLE MAXDISTANCE=2.0;
	46	const MDOUBLE MAXDISTANCE=15;
	47
	48	MDOUBLE p =0;
	49	MDOUBLE len=0.0;
	50	if (weights == NULL) {
	51	for (int i = 0; i < pS1Base->seqLen() ; ++i) {
	52	if ((pS1Base)[i]<0 \|\| (pS2Base)[i]<0) continue; //gaps and missing data.
	53	len+=1.0;
	54	if ((pS1Base)[i] != (pS2Base)[i]) p++;
	55	}
	56	if (len==0) p=1;
	57	else p = p/len;
	58	} else {
	59	for (int i = 0; i < pS1Base->seqLen() ; ++i) {
	60	if ((pS1Base)[i]<0 \|\| (pS2Base)[i]<0) continue; //gaps and missing data.
	61	len += (*weights)[i];
	62	if ((pS1Base)[i] != (pS2Base)[i]) p+=((*weights)[i]);
	63	}
	64	if (len==0) p=1;
	65	else {
	66	p = p/len;
	67	}
	68	}
	69	if (pS1Base != &s1) {
	70	delete pS1Base;
	71	delete pS2Base;
	72	}
	73
	74	const MDOUBLE inLog = 1 - (MDOUBLE)alphabetSize*p/(alphabetSize-1.0);
	75	if (inLog<=0) {
	76	// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
	77	// LOG(6,<<" p="<<p<<endl);
	78	return MAXDISTANCE;
	79	}
	80	MDOUBLE dis = -1.0 * (1.0 - 1.0/alphabetSize) * log (inLog);
	81	return dis;
	82	}
	83	};
	84
	85	class jcDistanceOLD : public distanceMethod {
	86	// in this version, if you have
	87	// a gap in front of a letter - it will be taken as a different
	88	// and also the length of the pairwise comparison will be increased.
	89	// in case of a gap-gap, it won't be a difference, but the length will
	90	// be increase.
	91
	92	private:
	93	const int _alphabetSize;
	94
	95	public:
	96	explicit jcDistanceOLD(const int alphabetSize) : _alphabetSize(alphabetSize) {
	97	}
	98	explicit jcDistanceOLD(const jcDistanceOLD& other) : _alphabetSize(other._alphabetSize) {
	99	}
	100	virtual jcDistanceOLD* clone() const{ return new jcDistanceOLD(*this);}
	101
	102	const MDOUBLE giveDistance( const sequence& s1,
	103	const sequence& s2,
	104	const vector<MDOUBLE> * weights,
	105	MDOUBLE* score=NULL) const {//score is not used here
	106	// const MDOUBLE MAXDISTANCE=2.0;
	107	const MDOUBLE MAXDISTANCE=15;
	108
	109	MDOUBLE p =0;
	110	MDOUBLE len=0.0;
	111	if (weights == NULL) {
	112	for (int i = 0; i < s1.seqLen() ; ++i) {
	113	//if (s1[i]<0 \|\| s2[i]<0) continue; //gaps and missing data.
	114	len+=1.0;
	115	if (s1[i] != s2[i]) p++;
	116	}
	117	if (len==0) p=1;
	118	else p = p/len;
	119	} else {
	120	for (int i = 0; i < s1.seqLen() ; ++i) {
	121	//if (s1[i]<0 \|\| s2[i]<0) continue; //gaps and missing data.
	122	len += (*weights)[i];
	123	if (s1[i] != s2[i]) p+=((*weights)[i]);
	124	}
	125	if (len==0) p=1;
	126	else {
	127	p = p/len;
	128	}
	129	}
	130	const MDOUBLE inLog = 1 - (MDOUBLE)_alphabetSize*p/(_alphabetSize-1.0);
	131	if (inLog<=0) {
	132	// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
	133	// LOG(6,<<" p="<<p<<endl);
	134	return MAXDISTANCE;
	135	}
	136	MDOUBLE dis = -1.0 * (1.0 - 1.0/_alphabetSize) * log (inLog);
	137	return dis;
	138	}
	139	};
	140	#endif

+131

-0

libs/phylogeny/jones.dat.q less more

	0	" 58 "
	1	" 54 45 "
	2	" 81 16 528 "
	3	" 56 113 34 10 "
	4	" 57 310 86 49 9 "
	5	" 105 29 58 767 5 323 "
	6	" 179 137 81 130 59 26 119 "
	7	" 27 328 391 112 69 597 26 23 "
	8	" 36 22 47 11 17 9 12 6 16 "
	9	" 30 38 12 7 23 72 9 6 56 229 "
	10	" 35 646 263 26 7 292 181 27 45 21 14 "
	11	" 54 44 30 15 31 43 18 14 33 479 388 65 "
	12	" 15 5 10 4 78 4 5 5 40 89 248 4 43 "
	13	" 194 74 15 15 14 164 18 24 115 10 102 21 16 17 "
	14	" 378 101 503 59 223 53 30 201 73 40 59 47 29 92 285 "
	15	" 475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477 "
	16	" 9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12 "
	17	" 11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71 "
	18	" 298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16 "
	19	" 0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830 "
	20	" 0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126 "
	21	" 0.050901 0.068765 0.058565 0.014261 0.032102 0.066005 "
	22	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	23	" S_ij = S_ji and PI_i for the Jones model based on the SWISSPROT "
	24	" Version 22 data. "
	25	" Rate Q_ij=S_ij*PI_j. "
	26	" The rest of the file is not used. "
	27	" Prepared by Z. Yang, March 1995. "
	28	" See the following reference for notation: "
	29	" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
	30	" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "
	31	" ----------------------------------------------------------------------- "
	32	" 426 "
	33	" 333 185 "
	34	" 596 80 2134 "
	35	" 159 214 54 20 "
	36	" 332 1203 277 192 14 "
	37	" 920 176 286 4497 11 1497 "
	38	" 1853 954 470 907 158 144 999 "
	39	" 88 716 704 244 58 1027 69 71 "
	40	" 286 114 198 59 34 37 72 44 37 "
	41	" 394 332 88 62 79 497 101 80 217 2086 "
	42	" 294 3606 1209 148 15 1289 1210 215 115 121 140 "
	43	" 185 100 56 34 27 78 50 47 33 1129 1567 167 "
	44	" 84 21 33 16 115 14 23 28 69 354 1690 17 76 "
	45	" 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 "
	46	" 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 "
	47	" 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 "
	48	" 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 "
	49	" 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 "
	50	" 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 "
	51	" A R N D C Q E G H I L K M F P S T W Y V "
	52	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	53	" Accepted point mutations (x10), similar to Figure 80 of Dayhoff et "
	54	" al. (1978). SwissProt version 22 data. "
	55	" ------------------------------------------------------------------------------ "
	56	" 256458 426 333 596 159 332 920 1853 88 286 394 294 185 84 1395 3664 3920 19 49 2771 "
	57	" 426 182302 185 80 214 1203 176 954 716 114 332 3606 100 21 360 661 360 171 62 111 "
	58	" 333 185 150772 2134 54 277 286 470 704 198 88 1209 56 33 64 2706 1069 9 178 86 "
	59	" 596 80 2134 178390 20 192 4497 907 244 59 62 148 34 16 74 390 216 5 142 195 "
	60	" 159 214 54 20 68120 14 11 158 58 34 79 15 27 115 27 559 91 60 246 150 "
	61	" 332 1203 277 192 14 139546 1497 144 1027 37 497 1289 78 14 629 278 227 20 59 100 "
	62	" 920 176 286 4497 11 1497 218432 999 69 72 101 1210 50 23 106 236 217 17 26 336 "
	63	" 1853 954 470 907 158 144 999 255274 71 44 80 215 47 28 171 1861 266 106 34 420 "
	64	" 88 716 704 244 58 1027 69 71 77124 37 217 115 33 69 249 214 116 5 777 32 "
	65	" 286 114 198 59 34 37 72 44 37 191018 2086 121 1129 354 54 274 1420 13 102 6260 "
	66	" 394 332 88 62 79 497 101 80 217 2086 319504 140 1567 1690 882 691 256 127 131 2020 "
	67	" 294 3606 1209 148 15 1289 1210 215 115 121 140 206568 167 17 117 351 653 16 30 99 "
	68	" 185 100 56 34 27 78 50 47 33 1129 1567 167 84670 76 36 89 579 15 25 937 "
	69	" 84 21 33 16 115 14 23 28 69 354 1690 17 76 143088 66 468 54 56 1276 307 "
	70	" 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 175488 1839 653 8 32 142 "
	71	" 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 234536 3527 64 259 320 "
	72	" 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 203636 18 73 805 "
	73	" 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 50486 60 44 "
	74	" 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 114728 63 "
	75	" 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 223724 "
	76	" Observed difference counts from pairwise comparisons, with ancestral sequences "
	77	" constructed by parsimony. F(t) = PI*P(t). "
	78	" Based on the SwissProt 22 data, kindly provided by D. Jones (Jones et al. 1992) "
	79	" ------------------------------------------------------------------------------- "
	80	" Ala 0.98754 0.00030 0.00023 0.00042 0.00011 0.00023 0.00065 0.00130 0.00006 0.00020 0.00028 0.00021 0.00013 0.00006 0.00098 0.00257 0.00275 0.00001 0.00003 0.00194 "
	81	" Arg 0.00044 0.98974 0.00019 0.00008 0.00022 0.00125 0.00018 0.00099 0.00075 0.00012 0.00035 0.00376 0.00010 0.00002 0.00037 0.00069 0.00037 0.00018 0.00006 0.00012 "
	82	" Asn 0.00042 0.00023 0.98720 0.00269 0.00007 0.00035 0.00036 0.00059 0.00089 0.00025 0.00011 0.00153 0.00007 0.00004 0.00008 0.00342 0.00135 0.00001 0.00022 0.00011 "
	83	" Asp 0.00062 0.00008 0.00223 0.98954 0.00002 0.00020 0.00470 0.00095 0.00025 0.00006 0.00006 0.00015 0.00004 0.00002 0.00008 0.00041 0.00023 0.00001 0.00015 0.00020 "
	84	" Cys 0.00043 0.00058 0.00015 0.00005 0.99432 0.00004 0.00003 0.00043 0.00016 0.00009 0.00021 0.00004 0.00007 0.00031 0.00007 0.00152 0.00025 0.00016 0.00067 0.00041 "
	85	" Gln 0.00044 0.00159 0.00037 0.00025 0.00002 0.98955 0.00198 0.00019 0.00136 0.00005 0.00066 0.00170 0.00010 0.00002 0.00083 0.00037 0.00030 0.00003 0.00008 0.00013 "
	86	" Glu 0.00080 0.00015 0.00025 0.00392 0.00001 0.00130 0.99055 0.00087 0.00006 0.00006 0.00009 0.00105 0.00004 0.00002 0.00009 0.00021 0.00019 0.00001 0.00002 0.00029 "
	87	" Gly 0.00136 0.00070 0.00035 0.00067 0.00012 0.00011 0.00074 0.99350 0.00005 0.00003 0.00006 0.00016 0.00003 0.00002 0.00013 0.00137 0.00020 0.00008 0.00003 0.00031 "
	88	" His 0.00021 0.00168 0.00165 0.00057 0.00014 0.00241 0.00016 0.00017 0.98864 0.00009 0.00051 0.00027 0.00008 0.00016 0.00058 0.00050 0.00027 0.00001 0.00182 0.00008 "
	89	" Ile 0.00029 0.00011 0.00020 0.00006 0.00003 0.00004 0.00007 0.00004 0.00004 0.98729 0.00209 0.00012 0.00113 0.00035 0.00005 0.00027 0.00142 0.00001 0.00010 0.00627 "
	90	" Leu 0.00023 0.00019 0.00005 0.00004 0.00005 0.00029 0.00006 0.00005 0.00013 0.00122 0.99330 0.00008 0.00092 0.00099 0.00052 0.00040 0.00015 0.00007 0.00008 0.00118 "
	91	" Lys 0.00027 0.00331 0.00111 0.00014 0.00001 0.00118 0.00111 0.00020 0.00011 0.00011 0.00013 0.99100 0.00015 0.00002 0.00011 0.00032 0.00060 0.00001 0.00003 0.00009 "
	92	" Met 0.00042 0.00023 0.00013 0.00008 0.00006 0.00018 0.00011 0.00011 0.00007 0.00255 0.00354 0.00038 0.98818 0.00017 0.00008 0.00020 0.00131 0.00003 0.00006 0.00212 "
	93	" Phe 0.00011 0.00003 0.00004 0.00002 0.00015 0.00002 0.00003 0.00004 0.00009 0.00047 0.00227 0.00002 0.00010 0.99360 0.00009 0.00063 0.00007 0.00008 0.00171 0.00041 "
	94	" Pro 0.00148 0.00038 0.00007 0.00008 0.00003 0.00067 0.00011 0.00018 0.00026 0.00006 0.00093 0.00012 0.00004 0.00007 0.99270 0.00194 0.00069 0.00001 0.00003 0.00015 "
	95	" Ser 0.00287 0.00052 0.00212 0.00031 0.00044 0.00022 0.00018 0.00146 0.00017 0.00021 0.00054 0.00027 0.00007 0.00037 0.00144 0.98556 0.00276 0.00005 0.00020 0.00025 "
	96	" Thr 0.00360 0.00033 0.00098 0.00020 0.00008 0.00021 0.00020 0.00024 0.00011 0.00131 0.00024 0.00060 0.00053 0.00005 0.00060 0.00324 0.98665 0.00002 0.00007 0.00074 "
	97	" Trp 0.00007 0.00065 0.00003 0.00002 0.00023 0.00008 0.00006 0.00040 0.00002 0.00005 0.00048 0.00006 0.00006 0.00021 0.00003 0.00024 0.00007 0.99686 0.00023 0.00017 "
	98	" Tyr 0.00008 0.00010 0.00030 0.00024 0.00041 0.00010 0.00004 0.00006 0.00130 0.00017 0.00022 0.00005 0.00004 0.00214 0.00005 0.00043 0.00012 0.00010 0.99392 0.00011 "
	99	" Val 0.00226 0.00009 0.00007 0.00016 0.00012 0.00008 0.00027 0.00034 0.00003 0.00511 0.00165 0.00008 0.00076 0.00025 0.00012 0.00026 0.00066 0.00004 0.00005 0.98761 "
	100	" P(0.01), amino acid exchange data generated from SWISSPROT Release 22.0 "
	101	" Ref. Jones D.T., Taylor W.R. and Thornton J.M. (1992) CABIOS 8:275-282 "
	102	" Usable sequences: 23824 "
	103	" Final alignments: 5437 "
	104	" Accepted point mutations: 92883 "
	105	" A R N D C Q E G H I L K M F P S T W Y V "
	106	" 0.0767477 100 "
	107	" 0.0516907 82.3263 "
	108	" 0.0426448 102.697 "
	109	" 0.0515445 83.8924 "
	110	" 0.0198027 45.6097 "
	111	" 0.0407523 83.8825 "
	112	" 0.0618296 75.7914 "
	113	" 0.0731516 52.1273 "
	114	" 0.0229438 91.1374 "
	115	" 0.0537609 101.99 "
	116	" 0.0919042 53.7672 "
	117	" 0.0586762 72.2308 "
	118	" 0.0238262 94.8144 "
	119	" 0.0401265 51.3146 "
	120	" 0.0509007 58.5874 "
	121	" 0.0687652 115.899 "
	122	" 0.0585647 107.092 "
	123	" 0.0142613 25.2297 "
	124	" 0.0321015 48.7629 "
	125	" 0.0660051 99.4571 "
	126	" "
	127	" Normalized Relative "
	128	" frequency mutabilities "
	129	" (SUM m*f) = 80.240436 "
	130	" ------------------------------------------- "

+4

-0

libs/phylogeny/knownBugs less more

	0	tree:1031 - tree::rootToUnrootedTree
	1	the node-id numbers will end up with a "hole" where the removed root use to be, and this may couse problems later on.
	2	Wed May 31 11:32:03 IDT 2006, by Matan
	3	⏎

+380

-0

libs/phylogeny/likeDist.cpp less more

	0	// $Id: likeDist.cpp 9582 2011-06-21 11:31:21Z cohenofi $
	1
	2	#include "likeDist.h"
	3	#include "numRec.h"
	4	#include "someUtil.h"
	5
	6	stochasticProcess& likeDist::getNonConstStochasticProcess() {
	7	if (!_nonConstSpPtr) {
	8	errorMsg::reportError("likeDist::getNonConstStochasticProcess: Can't give non-const stochasticProcess because the stochasticProcess that was given to the constructor of this likeDist object was const");
	9	}
	10	return *_nonConstSpPtr;
	11	}
	12
	13	// ======================= functors needed for the computations =============
	14
	15	class C_evalLikeDistDirect{
	16	private:
	17	const stochasticProcess& _sp;
	18	const sequence& _s1;
	19	const sequence& _s2;
	20	const vector<MDOUBLE> * _weights;
	21	public:
	22	C_evalLikeDistDirect(const stochasticProcess& inS1,
	23	const sequence& s1,
	24	const sequence& s2,
	25	const vector<MDOUBLE> * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {};
	26
	27	MDOUBLE operator() (MDOUBLE dist) const {
	28	return -likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights);
	29	}
	30	};
	31
	32	MDOUBLE likeDist::evalLikelihoodForDistance(const stochasticProcess& sp,
	33	const sequence& s1,
	34	const sequence& s2,
	35	const MDOUBLE dist,
	36	const vector<MDOUBLE> * weights) {
	37	MDOUBLE sumL=0.0; // sum of log likelihoods
	38	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	39	for (int pos=0; pos < s1.seqLen(); ++pos){
	40	if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns
	41	posLikelihood = 0.0;
	42	if (s1.isUnknown(pos) && s2.isSpecific(pos)) {
	43	// this is the more complicated case, where s1 = ?, s2 = specific
	44	posLikelihood = sp.freq(s2[pos]);
	45	} else if (s2.isUnknown(pos) && s1.isSpecific(pos)) {
	46	posLikelihood = sp.freq(s1[pos]);
	47	} else {
	48	for (int rateCategor = 0; rateCategor<sp.categories(); ++rateCategor) {
	49	MDOUBLE rate = sp.rates(rateCategor);
	50	MDOUBLE pij= 0.0;
	51	if (s1.isSpecific(pos) && s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
	52	pij= sp.Pij_t(s1[pos],s2[pos],dist*rate);
	53	posLikelihood += pij * sp.freq(s1[pos])*sp.ratesProb(rateCategor);
	54	} else {// this is the most complicated case, when you have
	55	// combinations of letters, for example B in one
	56	// sequence and ? in the other.
	57	for (int iS1 =0; iS1< sp.alphabetSize(); ++iS1) {
	58	for (int iS2 =0; iS2< sp.alphabetSize(); ++iS2) {
	59	if ((s1.getAlphabet()->relations(s1[pos],iS1)) &&
	60	(s2.getAlphabet()->relations(s2[pos],iS2))) {
	61	posLikelihood += sp.freq(iS1)sp.Pij_t(iS1,iS2,distrate)*sp.ratesProb(rateCategor);
	62	}
	63	}
	64	}
	65	}
	66	} // end of for on the rates
	67	}
	68	assert(posLikelihood!=0.0);
	69	sumL += log(posLikelihood)(weights ? (weights)[pos]:1.0);
	70	}
	71	return sumL;
	72	};
	73
	74	class C_evalLikeDistDirect_d{ // derivative.
	75	private:
	76	const stochasticProcess& _sp;
	77	const sequence& _s1;
	78	const sequence& _s2;
	79	const vector<MDOUBLE> * _weights;
	80	public:
	81	C_evalLikeDistDirect_d(const stochasticProcess& sp,
	82	const sequence& s1,
	83	const sequence& s2,
	84	const vector<MDOUBLE> * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {};
	85
	86	MDOUBLE operator() (MDOUBLE dist) const {
	87	MDOUBLE sumL=0.0; // sum of log likelihoods
	88	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	89	MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
	90	for (int pos=0; pos < _s1.seqLen(); ++pos){
	91	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	92	posLikelihood = 0.0;
	93	posLikelihood_d = 0.0;
	94	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	95	// this is the more complicated case, where s1 = ?, s2 = specific
	96	posLikelihood = _sp.freq(_s2[pos]);
	97	posLikelihood_d =0.0;
	98	} else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	99	posLikelihood = _sp.freq(_s1[pos]);
	100	posLikelihood_d =0.0;
	101	} else {
	102	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	103	MDOUBLE rate = _sp.rates(rateCategor);
	104	MDOUBLE pij= 0.0;
	105	MDOUBLE dpij=0.0;
	106	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
	107	//simple case, where AA i is changing to AA j
	108	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	109	dpij= _sp.dPij_dt(_s1[pos],_s2[pos],distrate)rate;
	110	MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor);
	111	posLikelihood += pij *tmp;
	112	posLikelihood_d += dpij*tmp;
	113	} else {// this is the most complicated case, when you have combinations of letters,
	114	// for example B in one sequence and ? in the other.
	115	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	116	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	117	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	118	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	119	MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor);
	120	posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
	121	posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,distrate)rate;
	122	}
	123	}
	124	}
	125	}
	126	}// end of for rate categories
	127	}
	128	assert(posLikelihood>0.0);
	129	sumL += (posLikelihood_d/posLikelihood)(_weights ? (_weights)[pos]:1.0);
	130	}
	131	return -sumL;
	132	};
	133	};
	134
	135
	136	// THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE
	137	MDOUBLE likeDist::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2,
	138	const MDOUBLE dis2evaluate) {
	139	C_evalLikeDistDirect Cev(_sp,s1,s2,NULL);
	140	return -Cev.operator ()(dis2evaluate);
	141	}
	142
	143	MDOUBLE likeDist::giveDistanceThroughCTC( const sequence& s1,
	144	const sequence& s2,
	145	const vector<MDOUBLE> * weights,
	146	MDOUBLE* score) const {
	147	// only in the case of homogenous model - work through pairwise EM like
	148	countTableComponentGam ctc;
	149	if (_sp.categories() != 1) {
	150	errorMsg::reportError("this function only work for homogenous model.");
	151	}
	152	ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1);
	153	for (int i=0; i<s1.seqLen(); ++i) {
	154	ctc.addToCounts(s1[i],s2[i],0,weights?(*weights)[i]:1.0);
	155	}
	156	MDOUBLE resL =0;
	157	return giveDistance(ctc,resL);
	158	}
	159
	160	const MDOUBLE likeDist::giveDistance(const countTableComponentGam& ctc,
	161	MDOUBLE& resQ,
	162	const MDOUBLE initialGuess) const {
	163	//return giveDistanceNR(ctc,resL,initialGuess);
	164	return giveDistanceBrent(ctc,resQ,initialGuess);
	165	}
	166
	167	const MDOUBLE likeDist::giveDistanceBrent(const countTableComponentGam& ctc,
	168	MDOUBLE& resL,
	169	const MDOUBLE initialGuess) const {
	170	const MDOUBLE ax=_minPairwiseDistance,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	171	MDOUBLE dist=-1.0;
	172	resL = -dbrent(ax,bx,cx,
	173	C_evalLikeDist(ctc,_sp,_unObservableData_p),
	174	C_evalLikeDist_d(ctc,_sp,_unObservableData_p),
	175	tol,
	176	&dist);
	177	return dist;
	178	}
	179
	180	template <typename regF, typename dF>
	181	MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f,
	182	dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function.
	183	zeroFound = 1;
	184	MDOUBLE currentF = f(current);
	185	if (fabs(currentF)<tol) return current;
	186	MDOUBLE lowF = f(low);
	187	MDOUBLE highF = f(high);
	188	if (((lowF>0) && (highF>0)) \|\| ((lowF<0) && (highF<0))) {// unable to find a zero
	189	zeroFound = 0;
	190	return 0;
	191	}
	192	if (lowF>0) {// fixing things to be in the right order.
	193	MDOUBLE tmp = low;
	194	low = high;
	195	high = tmp;
	196	tmp = lowF;
	197	lowF = highF;
	198	highF = tmp;
	199	}
	200	if (currentF>0) {
	201	high = current;
	202	highF = currentF;
	203	} else {
	204	low = current;
	205	lowF = currentF;
	206	} // now the zero is between current and either low or high.
	207
	208	MDOUBLE currentIntervalSize = fabs(low-high);
	209	MDOUBLE oldIntervalSize = currentIntervalSize;
	210
	211	// we have to decide if we do NR or devide the interval by two:
	212	// we want to check if the next NR step is within our interval
	213	// recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1)
	214	// So we want (current - currentF/currentDF) to be between low and high
	215	for (int i=0 ; i < max_it; ++i) {
	216	MDOUBLE currentDF = df(current);
	217	MDOUBLE newGuess = current - currentF/currentDF;
	218	if ((newGuess<low && newGuess> high) \|\| (newGuess>low && newGuess< high)) {
	219	// in this case we should do a NR step.
	220	current = newGuess;
	221	currentF = f(current);
	222	if (currentF > 0){
	223	high = current;
	224	highF = currentF;
	225	} else {
	226	low = current;
	227	lowF = currentF;
	228	}
	229
	230	oldIntervalSize = currentIntervalSize;
	231	currentIntervalSize =fabs (high-low);
	232	if (currentIntervalSize < tol) {
	233	return current;
	234	}
	235	//LOG(5,<<"NR: low= "<<low<<" high= "<<high<<endl);
	236	}
	237	else { // bisection
	238	oldIntervalSize = currentIntervalSize;
	239	currentIntervalSize /= 2.0;
	240	current = (low+high)/2.0;
	241	currentF = f(current);
	242	if (currentF > 0){
	243	high = current;
	244	highF = currentF;
	245	} else {
	246	low = current;
	247	lowF = currentF;
	248	}
	249	//LOG(5,<<"BIS: low= "<<low<<" high= "<<high<<endl);
	250	if (currentIntervalSize < tol) {
	251	return current;
	252	}
	253
	254	}
	255	}
	256	errorMsg::reportError("to many iterations in myNR function");
	257	return 0;
	258	}
	259
	260	const MDOUBLE likeDist::giveDistanceNR( const countTableComponentGam& ctc,
	261	MDOUBLE& resL,
	262	const MDOUBLE initialGuess) const {
	263	//change bx so that it will be the current branch length!
	264	const MDOUBLE ax=_minPairwiseDistance,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	265	// LOG(5,<<"===================================================\n");
	266	MDOUBLE dist=-1.0;
	267	int zeroFound = 0;
	268	dist = myNRmethod(ax,bx,cx,
	269	C_evalLikeDist_d(ctc,_sp),
	270	C_evalLikeDist_d2(ctc,_sp),
	271	tol,
	272	100,
	273	zeroFound);// max it for NR;
	274	if (zeroFound == 0) {// there was an error finding a zero
	275	dist = bx;
	276	}
	277
	278	return dist;
	279	}
	280
	281
	282
	283
	284
	285
	286
	287
	288
	289
	290
	291	/*
	292
	293
	294
	295
	296	const MDOUBLE likeDist::giveDistance( // the NR version.
	297	const countTableComponentGam& ctc,
	298	MDOUBLE& resL) const {
	299	LOG(5,<<"=============="<<endl);
	300	MDOUBLE oldGuess=0.05; // move to parameters.
	301	if (oldGuess<0) oldGuess=0.05; // move up.
	302	int max_it = 100;
	303	MDOUBLE oldDist =0;
	304	MDOUBLE currentDist =oldGuess;
	305	MDOUBLE newDer =VERYBIG;
	306	MDOUBLE oldDer =VERYBIG;
	307	//const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance,tol=_toll;
	308	for (int i=0; i < max_it; ++i){
	309	MDOUBLE sumDL=0.0;
	310	MDOUBLE sumDL2=0.0;
	311	for (int alph1=0; alph1 < ctc.alphabetSize(); ++alph1){
	312	for (int alph2=0; alph2 < ctc.alphabetSize(); ++alph2){
	313	for (int rateCategor = 0; rateCategor<_s1.categories(); ++rateCategor) {
	314	MDOUBLE rate = _s1.rates(rateCategor);
	315
	316	MDOUBLE pij= _s1.Pij_t(alph1,alph2,currentDist*rate);
	317	MDOUBLE dpij = _s1.dPij_dt(alph1,alph2,currentDist*rate);
	318	MDOUBLE dpij2 = _s1.d2Pij_dt2(alph1,alph2,currentDist*rate);
	319	if (pij==0) {
	320	pij = 0.000000001;
	321	dpij = 0.000000001;
	322	}
	323	sumDL+= ctc.getCounts(alph1,alph2,rateCategor)*dpij
	324	*rate/pij;
	325	sumDL2+= ctc.getCounts(alph1,alph2,rateCategor)rate(pijdpij2-dpij dpij)
	326	/(pij*pij);
	327	}
	328	}
	329	}
	330	oldDer = newDer;
	331	newDer = sumDL;
	332	LOG(5,<<"\ndistance = "<<currentDist<<endl);
	333	LOG(5,<<"derivation = "<<sumDL<<endl);
	334	LOG(5,<<"sec derivation = "<<sumDL2<<endl);
	335	oldDist = currentDist;
	336	if ((fabs(newDer) < fabs(oldDer)) && (sumDL2 < 0)) {
	337	currentDist = currentDist - newDer/sumDL2;
	338	}
	339	else {
	340	currentDist = currentDist / 2;
	341	}
	342	MDOUBLE epsilonForDeriv = 0.001;// move up
	343	if (fabs(newDer) < epsilonForDeriv) break;
	344
	345	}
	346
	347	return currentDist;
	348	}*/
	349
	350	const MDOUBLE likeDist::giveDistance(const sequence& s1,
	351	const sequence& s2,
	352	const vector<MDOUBLE> * weights,
	353	MDOUBLE* score) const {
	354
	355	const MDOUBLE ax=_minPairwiseDistance, cx=_maxPairwiseDistance,tol=_toll;
	356	MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/=1.0/;
	357	if (!(bx==bx)) bx = 1.0; // safety check that the JC distance did not return nan (not a number)
	358	if (!(bx>0)) bx = 0.000001; // safety check that the JC distance returned a positive number
	359	MDOUBLE dist=-1.0;
	360	MDOUBLE resL = -dbrent(ax,bx,cx,
	361	C_evalLikeDistDirect(_sp,s1,s2,weights),
	362	C_evalLikeDistDirect_d(_sp,s1,s2,weights),
	363	tol,
	364	&dist);
	365	if (score) *score = resL;
	366	return dist;
	367	}
	368
	369	const MDOUBLE likeDist::giveLikelihood(const sequence& s1,
	370	const sequence& s2,
	371	MDOUBLE distance,
	372	const vector<MDOUBLE> * weights) const
	373	{
	374
	375
	376	C_evalLikeDistDirect evalDis(_sp,s1,s2,weights);
	377	return -evalDis(distance);
	378
	379	}

+208

-0

libs/phylogeny/likeDist.h less more

	0	// $Id: likeDist.h 9752 2011-08-05 20:27:25Z rubi $
	1
	2	#ifndef ___LIKE_DIST_H
	3	#define ___LIKE_DIST_H
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "distanceMethod.h"
	8	#include "stochasticProcess.h"
	9	#include "logFile.h"
	10	#include "jcDistance.h"
	11	#include "unObservableData.h"
	12	#include <cmath>
	13	using namespace std;
	14
	15	class likeDist : public distanceMethod {
	16	public:
	17	// WARNING: the stochasticProcess is NOT copied. The same object is used
	18	explicit likeDist(const stochasticProcess& sp,
	19	const MDOUBLE toll =0.0001,
	20	const MDOUBLE maxPairwiseDistance = 5.0,
	21	const MDOUBLE minPairwiseDistance = 0.0000001,
	22	unObservableData* unObservableData_p=NULL)
	23	: _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {}
	24
	25	likeDist(const likeDist& other)
	26	: _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {}
	27
	28	virtual likeDist* clone() const {return new likeDist(*this);}
	29	// This constructor allows non-const stochasticProcess so that likeDist will be able to change alpha, etc.
	30	explicit likeDist(stochasticProcess& sp,
	31	const MDOUBLE toll =0.0001,
	32	const MDOUBLE maxPairwiseDistance = 5.0,
	33	const MDOUBLE minPairwiseDistance = 0.0000001)
	34	: _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {}
	35
	36	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
	37	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	38	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	39	const MDOUBLE giveDistance(const countTableComponentGam& ctc,
	40	MDOUBLE& resQ,
	41	const MDOUBLE initialGuess= 0.03) const; // initial guess
	42
	43	// given two sequences, it evaluates the log likelihood.
	44	MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
	45	const sequence& s2,
	46	const MDOUBLE dis2evaluate);
	47
	48	// returns the estimated ML distance between the 2 sequences.
	49	// if score is given, it will be the log-likelihood.
	50	const MDOUBLE giveDistance(const sequence& s1,
	51	const sequence& s2,
	52	const vector<MDOUBLE> * weights,
	53	MDOUBLE* score=NULL) const;
	54
	55	// this function creates a countTableComponent (ctc) from the two sequences.
	56	// it then computes the distance from this ctc.
	57	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
	58	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	59	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	60	MDOUBLE giveDistanceThroughCTC(const sequence& s1,
	61	const sequence& s2,
	62	const vector<MDOUBLE> * weights,
	63	MDOUBLE* score=NULL) const;
	64
	65	const MDOUBLE giveLikelihood(const sequence& s1,
	66	const sequence& s2,
	67	MDOUBLE distance,
	68	const vector<MDOUBLE> * weights=NULL) const;
	69
	70	// return the stochasticProcess
	71	const stochasticProcess& getStochasticProcess() const {return _sp;}
	72	stochasticProcess& getNonConstStochasticProcess();
	73	bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
	74	MDOUBLE getToll() const {return _toll;}
	75	MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
	76
	77	protected:
	78	const stochasticProcess &_sp;
	79	stochasticProcess *_nonConstSpPtr;
	80	const MDOUBLE _toll;
	81	const MDOUBLE _maxPairwiseDistance;
	82	const MDOUBLE _minPairwiseDistance;
	83	jcDistance _jcDist;
	84	unObservableData* _unObservableData_p;
	85
	86	private:
	87	const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc,
	88	MDOUBLE& resL,
	89	const MDOUBLE initialGuess= 0.03) const; // initial guess
	90	const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
	91	MDOUBLE& resL,
	92	const MDOUBLE initialGuess= 0.03) const; // initial guess
	93
	94
	95
	96	public:
	97	static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
	98	const sequence& s1,
	99	const sequence& s2,
	100	const MDOUBLE dist,
	101	const vector<MDOUBLE> * weights=NULL);
	102
	103	};
	104
	105	//////////////////////////////////////////////////////////////////////////
	106	class C_evalLikeDist{
	107	private:
	108	const countTableComponentGam& _ctc;
	109	const stochasticProcess& _sp;
	110	unObservableData* _unObservableData_p;
	111
	112	public:
	113	C_evalLikeDist(const countTableComponentGam& ctc,
	114	const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL)
	115	:_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
	116
	117	MDOUBLE operator() (MDOUBLE dist) {
	118	const MDOUBLE epsilonPIJ = 1e-10;
	119	MDOUBLE sumL=0.0;
	120	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	121	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	122	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	123	MDOUBLE rate = _sp.rates(rateCategor);
	124	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	125	if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
	126	sumL+= _ctc.getCounts(alph1,alph2,rateCategor)(log(pij)-log(_sp.freq(alph2)));//_sp.ratesProb(rateCategor);// removed.
	127	}
	128	}
	129	}
	130	//if(_unObservableData_p)
	131	// sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist
	132	LOG(8,<<"check bl="<<dist<<" gives sumL "<<sumL<<endl);
	133	return -sumL;
	134	};
	135	};
	136
	137	// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
	138	// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
	139	// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
	140
	141	class C_evalLikeDist_d{ // derivative.
	142	public:
	143	C_evalLikeDist_d(const countTableComponentGam& ctc,
	144	const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL): _ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
	145	private:
	146	const countTableComponentGam& _ctc;
	147	const stochasticProcess& _sp;
	148	unObservableData* _unObservableData_p;
	149
	150	public:
	151	MDOUBLE operator() (MDOUBLE dist) {
	152	const MDOUBLE epsilonPIJ = 1e-10;
	153	MDOUBLE sumDL=0.0;
	154	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	155	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	156	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	157	MDOUBLE rate = _sp.rates(rateCategor);
	158	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	159	if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
	160	MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
	161	sumDL+= _ctc.getCounts(alph1,alph2,rateCategor)dpij //_sp.ratesProb(rateCategor) : removed CODE_RED
	162	*rate/pij;
	163	}
	164	}
	165	}
	166	//cerr<<"derivation = "<<-sumDL<<endl;
	167	//if(_unObservableData_p)
	168	// sumDL = sumDL/(1- exp(_unObservableData_p->getlogLforMissingData())); // 1. need to find an efficient way to update LofMissingData with dist 2. correct the derivative?
	169	LOG(12,<<"check bl="<<dist<<" gives sumDL "<<sumDL<<endl);
	170	return -sumDL;
	171	};
	172	};
	173
	174
	175
	176
	177
	178	//////////////////////////////////////////////////////////////////////////
	179	class C_evalLikeDist_d2{ // second derivative.
	180	public:
	181	C_evalLikeDist_d2(const countTableComponentGam& ctc,
	182	const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
	183	private:
	184	const countTableComponentGam& _ctc;
	185	const stochasticProcess& _sp;
	186	public:
	187	MDOUBLE operator() (MDOUBLE dist) {
	188	MDOUBLE sumDL=0.0;
	189	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	190	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	191	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	192	MDOUBLE rate = _sp.rates(rateCategor);
	193
	194	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	195	MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
	196	MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
	197	sumDL+= rate_ctc.getCounts(alph1,alph2,rateCategor)
	198	(pijd2pij - dpij dpij )/(pij*pij);
	199	}
	200	}
	201	}
	202	return -sumDL;
	203	};
	204	};
	205
	206	#endif
	207

+25

-0

libs/phylogeny/likeDist2Codon.cpp less more

	0	// $RCSfile$ $Revision: 4699 $ $Date: 2008-08-14 17:19:46 +0300 (Thu, 14 Aug 2008) $
	1
	2	#include "likeDist2Codon.h"
	3	#include "numRec.h"
	4
	5
	6	const MDOUBLE likeDist2Codon::giveDistance( const countTableComponentGam& ctc,
	7	MDOUBLE& resQ,
	8	const MDOUBLE initialGuess) const {
	9	//return giveDistanceNR(ctc,resL,initialGuess);
	10	return giveDistanceBrent(ctc,resQ,initialGuess);
	11	}
	12
	13	const MDOUBLE likeDist2Codon::giveDistanceBrent( const countTableComponentGam& ctc,
	14	MDOUBLE& resL,
	15	const MDOUBLE initialGuess) const {
	16	const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	17	MDOUBLE dist=-1.0;
	18	resL = -dbrent(ax,bx,cx,
	19	C_evalLikeDist2Codon(ctc,_spVec),
	20	C_evalLikeDist_d_2Codon(ctc,_spVec),
	21	tol,
	22	&dist);
	23	return dist;
	24	}

+110

-0

libs/phylogeny/likeDist2Codon.h less more

	0	// $Id: likeDist2Codon.h 4699 2008-08-14 14:19:46Z privmane $
	1
	2	#ifndef ___LIKE_DIST_2_CODON_H
	3	#define ___LIKE_DIST_2_CODON_H
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "distanceMethod.h"
	8	#include "stochasticProcess.h"
	9	#include "logFile.h"
	10	#include "wYangModel.h"
	11	#include <cmath>
	12	using namespace std;
	13
	14	class likeDist2Codon : public distanceMethod {
	15	public:
	16	explicit likeDist2Codon(const vector<stochasticProcess>& spVec,
	17	const MDOUBLE toll =0.0001,
	18	const MDOUBLE maxPairwiseDistance = 2.0) : _spVec(spVec) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) {
	19	}
	20
	21	likeDist2Codon (const likeDist2Codon& other): _spVec(other._spVec) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {};
	22	virtual likeDist2Codon* clone() const {return new likeDist2Codon(*this);}
	23
	24	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
	25	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	26	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	27	const MDOUBLE giveDistance( const countTableComponentGam& ctc,
	28	MDOUBLE& resQ,
	29	const MDOUBLE initialGuess= 0.03) const; // initial guess
	30
	31
	32	// returns the estimated ML distance between the 2 sequences.
	33	// if score is given, it will be the log-likelihood.
	34	//!!!!!!!!!!!!!!TO DO
	35	const MDOUBLE giveDistance(const sequence& s1,
	36	const sequence& s2,
	37	const vector<MDOUBLE> * weights,
	38	MDOUBLE* score=NULL) const { return 1;}
	39
	40	const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc,
	41	MDOUBLE& resL,
	42	const MDOUBLE initialGuess) const;
	43
	44	private:
	45	const vector<stochasticProcess>& _spVec;
	46	const MDOUBLE _toll;
	47	const MDOUBLE _maxPairwiseDistance;
	48
	49	};
	50
	51
	52	class C_evalLikeDist2Codon{
	53	private:
	54	const countTableComponentGam& _ctc;
	55	const vector<stochasticProcess>& _spVec;
	56	public:
	57	C_evalLikeDist2Codon(const countTableComponentGam& ctc,
	58	const vector<stochasticProcess>& inS1):_ctc(ctc), _spVec(inS1) {};
	59
	60	MDOUBLE operator() (MDOUBLE dist) {
	61	const MDOUBLE epsilonPIJ = 1e-10;
	62	MDOUBLE sumL=0.0;
	63	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	64	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	65	for (int categor = 0; categor<_spVec.size(); ++categor) {
	66	MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist);
	67	if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
	68	sumL += _ctc.getCounts(alph1,alph2,categor)(log(pij)-log(_spVec[categor].freq(alph2)));//_sp.ratesProb(rateCategor);// removed.
	69	}
	70	}
	71	}
	72	// LOG(5,<<"check bl="<<dist<<" gives "<<sumL<<endl);
	73
	74	return -sumL;
	75	};
	76	};
	77
	78	// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
	79	// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
	80	// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
	81
	82	class C_evalLikeDist_d_2Codon{ // derivative.
	83	public:
	84	C_evalLikeDist_d_2Codon(const countTableComponentGam& ctc,
	85	const vector<stochasticProcess>& inS1) : _ctc(ctc), _spVec(inS1) {};
	86	private:
	87	const countTableComponentGam& _ctc;
	88	const vector<stochasticProcess>& _spVec;
	89	public:
	90	MDOUBLE operator() (MDOUBLE dist) {
	91	MDOUBLE sumDL=0.0;
	92	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	93	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	94	for (int categor = 0; categor<_spVec.size(); ++categor) {
	95	MDOUBLE selection = static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->getW();
	96	MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist);
	97	MDOUBLE dpij = _spVec[categor].dPij_dt(alph1,alph2,dist);
	98	sumDL+= _ctc.getCounts(alph1,alph2,categor)dpij //_sp.ratesProb(rateCategor) : removed CODE_RED
	99	*selection/pij;
	100	}
	101	}
	102	}
	103	//LOG(5,<<"derivation = "<<-sumDL<<endl);
	104	return -sumDL;
	105	};
	106	};
	107
	108	#endif
	109

+65

-0

libs/phylogeny/likeDist2USSRV.cpp less more

	0	// $Id: likeDist2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2
	3	#include "likeDist2USSRV.h"
	4	#include "numRec.h"
	5
	6
	7	const MDOUBLE likeDist2USSRV::giveDistance( const countTableComponentGam& ctcBase,
	8	const countTableComponentHom& ctcSSRV,
	9	MDOUBLE& resQ,
	10	const MDOUBLE initialGuess) const {
	11	return giveDistanceBrent(ctcBase,ctcSSRV,resQ,initialGuess);
	12	}
	13
	14
	15	const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase,
	16	const countTableComponentHom& ctcSSRV,
	17	MDOUBLE& resL,
	18	const MDOUBLE initialGuess) const {
	19	const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	20	LOG(12,<<"ax: " << ax << " bx: " << bx << " cx: " << cx << endl);
	21	MDOUBLE dist=-1.0;
	22	resL = -brent(ax,bx,cx,
	23	C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
	24	tol,
	25	&dist);
	26
	27
	28	LOG(9, <<"brent: resL = " << resL << " dist = " << dist << endl);
	29
	30	return dist;
	31	}
	32
	33	// @@@@dbrent doesn't work. I should try fix this
	34	//const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase,
	35	// const countTableComponentHom& ctcSSRV,
	36	// MDOUBLE& resL,
	37	// const MDOUBLE initialGuess) const {
	38	// const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	39	// const MDOUBLE ax_debug=0,bx_debug=initialGuess,cx_debug=_maxPairwiseDistance,tol_debug=_toll;
	40	// MDOUBLE dist=-1.0;
	41	// // @@@@ debug OZ
	42	// MDOUBLE dist_debug=-1.0;
	43	// MDOUBLE resL_debug = -brent(ax_debug,bx_debug,cx_debug,
	44	// C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
	45	// tol_debug,
	46	// &dist_debug);
	47	//
	48	// resL = -dbrent(ax,bx,cx,
	49	// C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
	50	// C_evalLikeDist_d_2USSRV(ctcBase,ctcSSRV,_model),
	51	// tol,
	52	// &dist);
	53	//
	54	// MDOUBLE small = 0.001;
	55	// if ((resL < resL_debug - small) \|\| (resL_debug < resL-small) \|\|
	56	// (dist < dist_debug - small) \|\| (dist_debug < dist-small))
	57	// {
	58	// LOG(8,<<"likeDist2USSRV::giveDistanceBrent, different results when using brent and dbrent" << endl);
	59	// LOG(8,<<"dbrent resL = " << resL << " , brent resL = " << resL_debug << endl);
	60	// LOG(8,<<"dbrent dist = " << dist << " , brent dist = " << dist_debug << endl);
	61	// }
	62	// // end of debug OZ
	63	// return dist;
	64	//}

+152

-0

libs/phylogeny/likeDist2USSRV.h less more

	0	// $Id: likeDist2USSRV.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___LIKE_DIST_2_USSRV_H
	2	#define ___LIKE_DIST_2_USSRV_H
	3
	4	#include "definitions.h"
	5	#include "countTableComponent.h"
	6	#include "distanceMethod.h"
	7	#include "stochasticProcess.h"
	8	#include "logFile.h"
	9	#include "ussrvModel.h"
	10	#include <cmath>
	11	using namespace std;
	12
	13	class likeDist2USSRV : public distanceMethod {
	14	public:
	15	explicit likeDist2USSRV(const ussrvModel& model,
	16	const MDOUBLE toll =0.0001,
	17	const MDOUBLE maxPairwiseDistance = 5.0) : _model(model) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance)
	18	{}
	19
	20	likeDist2USSRV (const likeDist2USSRV& other): _model(other._model) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {};
	21	virtual likeDist2USSRV* clone() const {return new likeDist2USSRV(*this);}
	22
	23	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
	24	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	25	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	26	const MDOUBLE giveDistance( const countTableComponentGam& ctcBase,
	27	const countTableComponentHom& ctcSSRV,
	28	MDOUBLE& resQ,
	29	const MDOUBLE initialGuess= 0.03) const; // initial guess
	30
	31
	32	// returns the estimated ML distance between the 2 sequences.
	33	// if score is given, it will be the log-likelihood.
	34	//!!!!!!!!!!!!!!TO DO @@@@
	35	const MDOUBLE giveDistance(const sequence& s1,
	36	const sequence& s2,
	37	const vector<MDOUBLE> * weights,
	38	MDOUBLE* score=NULL) const {
	39	LOG(4,<<"likeDist2USSRV:giveDistance : This method should never be used" << endl);
	40	return 1;}
	41
	42	const MDOUBLE giveDistanceBrent(const countTableComponentGam& ctcBase,
	43	const countTableComponentHom& ctcSSRV,
	44	MDOUBLE& resL,
	45	MDOUBLE initialGuess) const;
	46
	47	private:
	48	const ussrvModel& _model;
	49	const MDOUBLE _toll;
	50	const MDOUBLE _maxPairwiseDistance;
	51
	52	};
	53
	54
	55	class C_evalLikeDist2USSRV{
	56	private:
	57	const countTableComponentGam& _ctcBase;
	58	const countTableComponentHom& _ctcSSRV;
	59	const ussrvModel& _model;
	60	public:
	61	C_evalLikeDist2USSRV(const countTableComponentGam& ctcBase,
	62	const countTableComponentHom& ctcSSRV,
	63	const ussrvModel& model):_ctcBase(ctcBase),_ctcSSRV(ctcSSRV), _model(model) {};
	64
	65	MDOUBLE operator() (MDOUBLE dist) {
	66	const MDOUBLE epsilonPIJ = 1e-10;
	67	MDOUBLE sumL=0.0;
	68	MDOUBLE pij;
	69	int categor, alph1,alph2;
	70	// base model
	71	const stochasticProcess& baseSp = _model.getBaseModel();
	72
	73	for (alph1=0; alph1 < _ctcBase.alphabetSize(); ++alph1){
	74	for (alph2=0; alph2 < _ctcBase.alphabetSize(); ++alph2){
	75	for (categor = 0; categor < baseSp.categories(); ++categor) {
	76	MDOUBLE rate = baseSp.rates(categor);
	77	pij= baseSp.Pij_t(alph1,alph2,dist*rate);
	78	if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
	79	sumL += _ctcBase.getCounts(alph1,alph2,categor)(log(pij)-log(baseSp.freq(alph2)));//_sp.ratesProb(rateCategor);// removed.
	80
	81	}
	82	}
	83	}
	84
	85	// ssrv model
	86	const stochasticProcessSSRV& ssrvSp = _model.getSSRVmodel();
	87	for (alph1=0; alph1 < _ctcSSRV.alphabetSize(); ++alph1){
	88	for (alph2=0; alph2 < _ctcSSRV.alphabetSize(); ++alph2){
	89	pij = ssrvSp.Pij_t(alph1,alph2,dist);
	90	if (pij<epsilonPIJ) pij = epsilonPIJ;
	91	sumL+=_ctcSSRV.getCounts(alph1,alph2)(log(pij)-log(ssrvSp.freq(alph2)));//_sp.ratesProb(rateCategor);// removed.
	92	}
	93	}
	94	LOG(12,<<"check bl="<<dist<<" gives "<<sumL<<endl);
	95
	96	return -sumL;
	97	}
	98	};
	99
	100	// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
	101	// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
	102	// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
	103
	104	// @@@@ doesn't work
	105	class C_evalLikeDist_d_2USSRV{ // derivative.
	106	public:
	107	C_evalLikeDist_d_2USSRV(const countTableComponentGam& ctcBase,
	108	const countTableComponentHom& ctcSSRV,
	109	const ussrvModel& model) : _ctcBase(ctcBase), _ctcSSRV(ctcSSRV),_model(model) {};
	110
	111	private:
	112	const countTableComponentGam& _ctcBase;
	113	const countTableComponentHom& _ctcSSRV;
	114	const ussrvModel& _model;
	115
	116	public:
	117	MDOUBLE operator() (MDOUBLE dist) {
	118	MDOUBLE sumDL=0.0;
	119	MDOUBLE pij, dpij;
	120	int categor, alph1,alph2;
	121	// Base model
	122	const stochasticProcess& spBase = _model.getBaseModel();
	123	for (alph1=0; alph1 < _ctcBase.alphabetSize(); ++alph1){
	124	for (alph2=0; alph2 < _ctcBase.alphabetSize(); ++alph2){
	125	for (categor = 0; categor<_model.noOfCategor(); ++categor) {
	126	MDOUBLE rate = spBase.rates(categor);
	127	MDOUBLE pij= spBase.Pij_t(alph1,alph2,dist);
	128	MDOUBLE dpij= spBase.dPij_dt(alph1,alph2,dist);
	129
	130	sumDL+= _ctcBase.getCounts(alph1,alph2,categor)*dpij
	131	*rate/pij;
	132	}
	133	}
	134	}
	135	// SSRV model
	136	const stochasticProcessSSRV& spSSRV = _model.getSSRVmodel();
	137	for (alph1=0; alph1 < _ctcSSRV.alphabetSize(); ++alph1){
	138	for (alph2=0; alph2 < _ctcSSRV.alphabetSize(); ++alph2){
	139	pij= spSSRV.Pij_t(alph1,alph2,dist);
	140	dpij= spSSRV.dPij_dt(alph1,alph2,dist);
	141	sumDL+= _ctcSSRV.getCounts(alph1,alph2)*dpij/pij; //rate=1;
	142	}
	143	}
	144
	145	LOG(8,<<"derivation = "<<-sumDL<<endl);
	146	return -sumDL;
	147	};
	148	};
	149
	150	#endif // ___LIKE_DIST_2_USSRV_H
	151

+21

-0

libs/phylogeny/likeDistProp.cpp less more

	0	// $Id: likeDistProp.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "likeDistProp.h"
	3	#include "numRec.h"
	4
	5	const MDOUBLE likeDistProp::giveDistance( const vector<countTableComponentGam>& ctc,
	6	MDOUBLE& resL) const {
	7	const MDOUBLE MAXDISTANCE=2.0;
	8	// const MDOUBLE PRECISION_TOLL=0.001;
	9	const MDOUBLE ax=0,bx=1.0,cx=MAXDISTANCE,tol=_toll;
	10	MDOUBLE dist=-1.0;
	11	resL = -dbrent(ax,bx,cx,
	12	C_evallikeDistProp(ctc,_s1),
	13	C_evallikeDistProp_d(ctc,_s1),
	14	tol,
	15	&dist);
	16	return dist;
	17	}
	18
	19	// the minus resL = -dbrent because C_evalDist return - value, because it is computing the min not the max...
	20

+91

-0

libs/phylogeny/likeDistProp.h less more

	0	// $Id: likeDistProp.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___LIKE_DIST_PROP
	3	#define ___LIKE_DIST_PROP
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "stochasticProcess.h"
	8	#include <cmath>
	9
	10	class likeDistProp {
	11	private:
	12	const int _alphabetSize;
	13	const vector<stochasticProcess>& _s1;
	14	const MDOUBLE _toll;
	15	public:
	16	const MDOUBLE giveDistance( const vector<countTableComponentGam>& ctc,
	17	MDOUBLE& resL) const;
	18	explicit likeDistProp(const int alphabetSize,
	19	const vector<stochasticProcess>& s1,
	20	const MDOUBLE toll) : _alphabetSize(alphabetSize), _s1(s1) ,_toll(toll){
	21	}
	22	};
	23
	24
	25
	26	class C_evallikeDistProp_d{ // derivative.
	27	public:
	28	C_evallikeDistProp_d(const vector<countTableComponentGam>& ctc,
	29	const vector<stochasticProcess>& inS1) : _ctc(ctc), _sp(inS1) {};
	30	private:
	31	const vector<countTableComponentGam>& _ctc;
	32	const vector<stochasticProcess>& _sp;
	33	public:
	34	MDOUBLE operator() (MDOUBLE dist) {
	35	MDOUBLE sumDL=0.0;
	36	const MDOUBLE epsilonPIJ = 1e-10;
	37	for (int gene=0; gene < _ctc.size(); ++ gene) {
	38	for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){
	39	for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){
	40	for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) {
	41	MDOUBLE rate = _sp[gene].rates(rateCategor);
	42	MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate);
	43	MDOUBLE dpij = _sp[gene].dPij_dt(alph1,alph2,dist*rate);
	44	if (pij<epsilonPIJ) {
	45	pij = epsilonPIJ;
	46	dpij = epsilonPIJ;
	47	}
	48	sumDL+= _ctc[gene].getCounts(alph1,alph2,rateCategor)dpij_sp[gene].ratesProb(rateCategor)
	49	*rate/pij;
	50	}
	51	}
	52	}
	53	}
	54	return -sumDL;
	55	}
	56	};
	57
	58
	59
	60	class C_evallikeDistProp{
	61	private:
	62	const vector<countTableComponentGam>& _ctc;
	63	const vector<stochasticProcess>& _sp;
	64	public:
	65	C_evallikeDistProp(const vector<countTableComponentGam>& ctc,
	66	const vector<stochasticProcess>& inS1):_ctc(ctc), _sp(inS1) {};
	67
	68	MDOUBLE operator() (MDOUBLE dist) {
	69	const MDOUBLE epsilonPIJ = 1e-10;
	70	MDOUBLE sumL=0.0;
	71	for (int gene=0; gene < _ctc.size(); ++ gene) {
	72	for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){
	73	for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){
	74	for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) {
	75	MDOUBLE rate = _sp[gene].rates(rateCategor);
	76	MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate);
	77	if (pij<0) {
	78	pij = epsilonPIJ;
	79	}
	80	sumL += _ctc[gene].getCounts(alph1,alph2,rateCategor)(log(pij)-log(_sp[gene].freq(alph2)))_sp[gene].ratesProb(rateCategor);
	81	}
	82	}
	83	}
	84	}
	85	return -sumL;
	86	}
	87	};
	88
	89	#endif
	90

+21

-0

libs/phylogeny/likeDistPropEB.cpp less more

	0	// $Id: likeDistProp.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "likeDistPropEB.h"
	3	#include "numRec.h"
	4
	5	const MDOUBLE likeDistPropEB::giveDistance( const vector< vector<countTableComponentGamProportional> >& ctc,const int nodeID,
	6	MDOUBLE& resL,const MDOUBLE initialGuess) const {
	7	const MDOUBLE ax = _minPairwiseDistance;
	8	const MDOUBLE bx = initialGuess;
	9	const MDOUBLE cx = _maxPairwiseDistance;
	10	const MDOUBLE tol = _toll;
	11	MDOUBLE dist=-1.0;
	12	resL = -dbrent(ax,bx,cx,
	13	C_evallikeDistPropEB(ctc,_msp,_pProportionDist,nodeID),
	14	C_evallikeDistPropEB_d(ctc,_msp,_pProportionDist,nodeID),
	15	tol,&dist);
	16	return dist;
	17	}
	18
	19	// the minus resL = -dbrent because C_evalDist return - value, because it is computing the min not the max...
	20

+115

-0

libs/phylogeny/likeDistPropEB.h less more

	0	// $Id: likeDistProp.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___LIKE_DIST_PROP_EB
	3	#define ___LIKE_DIST_PROP_EB
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "multipleStochasticProcess.h"
	8	#include "gammaDistribution.h"
	9	#include "logFile.h"
	10	#include <cmath>
	11
	12	class likeDistPropEB {
	13	private:
	14	multipleStochasticProcess * _msp;
	15	const gammaDistribution* _pProportionDist;
	16	const MDOUBLE _maxPairwiseDistance;
	17	const MDOUBLE _minPairwiseDistance;
	18	const MDOUBLE _toll;
	19	public:
	20	const MDOUBLE giveDistance( const vector< vector<countTableComponentGamProportional> >& ctc,const int nodeID,
	21	MDOUBLE& resL,const MDOUBLE initialGuess= 0.03) const;
	22	explicit likeDistPropEB(multipleStochasticProcess * msp,
	23	const gammaDistribution* pProportionDist,
	24	const MDOUBLE toll =0.0001,
	25	const MDOUBLE maxPairwiseDistance = 5.0,
	26	const MDOUBLE minPairwiseDistance = 0.0000001) : _msp(msp) ,_pProportionDist(pProportionDist), _maxPairwiseDistance(maxPairwiseDistance), _minPairwiseDistance(minPairwiseDistance),_toll(toll){
	27	}
	28	likeDistPropEB(const likeDistPropEB & other)
	29	: _msp(other._msp),_pProportionDist(other._pProportionDist),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_toll(other._toll){}
	30	virtual likeDistPropEB* clone() const {return new likeDistPropEB(*this);}
	31	};
	32
	33
	34
	35	class C_evallikeDistPropEB_d{ // derivative.
	36	public:
	37	C_evallikeDistPropEB_d(const vector< vector<countTableComponentGamProportional> >& ctc,
	38	multipleStochasticProcess* msp,const gammaDistribution* pProportionDist,const int nodeID) : _ctc(ctc), _msp(msp), _pProportionDist(pProportionDist), _nodeID(nodeID) {};
	39	private:
	40	const vector< vector<countTableComponentGamProportional> >& _ctc;
	41	multipleStochasticProcess* _msp;
	42	const gammaDistribution* _pProportionDist;
	43	const int _nodeID;
	44	public:
	45	MDOUBLE operator() (MDOUBLE dist) {
	46	const MDOUBLE epsilonPIJ = 1e-10;
	47	MDOUBLE sumDL = 0.0;
	48	for (int gene=0; gene < _msp->getSPVecSize(); ++gene) {
	49	for (int alph1=0; alph1 < _ctc[gene][_nodeID].alphabetSize(); ++alph1){
	50	for (int alph2=0; alph2 < _ctc[gene][_nodeID].alphabetSize(); ++alph2){
	51	for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){
	52	_msp->getSp(gene)->setGlobalRate(_pProportionDist->rates(globalRateCategor));
	53	MDOUBLE globalRate = _pProportionDist->rates(globalRateCategor);
	54	for (int localRateCategor = 0; localRateCategor < _msp->getSp(gene)->categories(); ++localRateCategor) {
	55	MDOUBLE localRate = _msp->getSp(gene)->rates(localRateCategor);
	56	MDOUBLE pij= _msp->getSp(gene)->Pij_t(alph1,alph2,distglobalRatelocalRate);
	57	if (pij<epsilonPIJ) {
	58	pij = epsilonPIJ;
	59	}
	60	MDOUBLE dpij = _msp->getSp(gene)->dPij_dt(alph1,alph2,distglobalRatelocalRate);
	61
	62	//sumDL+= _ctc[gene][_nodeID].getCounts(alph1,alph2,globalRateCategor,localRateCategor)dpij_pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor)
	63	// globalRatelocalRate/pij;
	64	sumDL+= _ctc[gene][_nodeID].getCounts(alph1,alph2,globalRateCategor,localRateCategor)dpijglobalRate*localRate/pij;
	65	}
	66	}
	67	}
	68	}
	69	}
	70	LOG(12,<<"check bl="<<dist<<" gives sumDL "<<sumDL<<endl);
	71	return -sumDL;
	72	};
	73	};
	74
	75
	76
	77	class C_evallikeDistPropEB{
	78	private:
	79	const vector< vector<countTableComponentGamProportional> >& _ctc;
	80	multipleStochasticProcess* _msp;
	81	const gammaDistribution* _pProportionDist;
	82	const int _nodeID;
	83	public:
	84	C_evallikeDistPropEB(const vector< vector<countTableComponentGamProportional> >& ctc,
	85	multipleStochasticProcess* msp,const gammaDistribution* pProportionDist,const int nodeID):_ctc(ctc), _msp(msp), _pProportionDist(pProportionDist), _nodeID(nodeID) {};
	86
	87	MDOUBLE operator() (MDOUBLE dist) {
	88	const MDOUBLE epsilonPIJ = 1e-10;
	89	MDOUBLE sumL = 0.0;
	90	for (int gene=0; gene < _msp->getSPVecSize(); ++gene) {
	91	for (int alph1=0; alph1 < _ctc[gene][_nodeID].alphabetSize(); ++alph1){
	92	for (int alph2=0; alph2 < _ctc[gene][_nodeID].alphabetSize(); ++alph2){
	93	for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){
	94	_msp->getSp(gene)->setGlobalRate(_pProportionDist->rates(globalRateCategor));
	95	MDOUBLE globalRate = _pProportionDist->rates(globalRateCategor);
	96	for (int localRateCategor = 0; localRateCategor < _msp->getSp(gene)->categories(); ++localRateCategor) {
	97	MDOUBLE localRate = _msp->getSp(gene)->rates(localRateCategor);
	98	MDOUBLE pij= _msp->getSp(gene)->Pij_t(alph1,alph2,distglobalRatelocalRate);
	99	if (pij<epsilonPIJ) {
	100	pij = epsilonPIJ;
	101	}
	102	sumL += _ctc[gene][_nodeID].getCounts(alph1,alph2,globalRateCategor,localRateCategor)(log(pij)-log(_msp->getSp(gene)->freq(alph2)));//_pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor);
	103	}
	104	}
	105	}
	106	}
	107	}
	108	LOG(8,<<"check bl="<<dist<<" gives sumL "<<sumL<<endl);
	109	return -sumL;
	110	};
	111	};
	112
	113	#endif
	114

+378

-0

libs/phylogeny/likeDistfixRoot.cpp less more

	0	// $Id: likeDistfixRoot.cpp 4470 2008-07-17 15:37:40Z cohenofi $
	1
	2	#include "likeDistfixRoot.h"
	3	#include "numRec.h"
	4	#include "someUtil.h"
	5
	6	stochasticProcess& likeDistfixRoot::getNonConstStochasticProcess() {
	7	if (!_nonConstSpPtr) {
	8	errorMsg::reportError("likeDistfixRoot::getNonConstStochasticProcess: Can't give non-const stochasticProcess because the stochasticProcess that was given to the constructor of this likeDistfixRoot object was const");
	9	}
	10	return *_nonConstSpPtr;
	11	}
	12
	13	// ======================= functors needed for the computations =============
	14
	15	class C_evalLikeDistDirect{
	16	private:
	17	const stochasticProcess& _sp;
	18	const sequence& _s1;
	19	const sequence& _s2;
	20	const vector<MDOUBLE> * _weights;
	21	public:
	22	C_evalLikeDistDirect(const stochasticProcess& inS1,
	23	const sequence& s1,
	24	const sequence& s2,
	25	const vector<MDOUBLE> * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {};
	26
	27	MDOUBLE operator() (MDOUBLE dist) const {
	28	return -likeDistfixRoot::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights);
	29	}
	30	};
	31
	32	MDOUBLE likeDistfixRoot::evalLikelihoodForDistance(const stochasticProcess& sp,
	33	const sequence& s1,
	34	const sequence& s2,
	35	const MDOUBLE dist,
	36	const vector<MDOUBLE> * weights) {
	37	MDOUBLE sumL=0.0; // sum of log likelihoods
	38	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	39	for (int pos=0; pos < s1.seqLen(); ++pos){
	40	if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns
	41	posLikelihood = 0.0;
	42	if (s1.isUnknown(pos) && s2.isSpecific(pos)) {
	43	// this is the more complicated case, where s1 = ?, s2 = specific
	44	posLikelihood = sp.freq(s2[pos]);
	45	} else if (s2.isUnknown(pos) && s1.isSpecific(pos)) {
	46	posLikelihood = sp.freq(s1[pos]);
	47	} else {
	48	for (int rateCategor = 0; rateCategor<sp.categories(); ++rateCategor) {
	49	MDOUBLE rate = sp.rates(rateCategor);
	50	MDOUBLE pij= 0.0;
	51	if (s1.isSpecific(pos) && s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
	52	pij= sp.Pij_t(s1[pos],s2[pos],dist*rate);
	53	posLikelihood += pij * sp.freq(s1[pos])*sp.ratesProb(rateCategor);
	54	} else {// this is the most complicated case, when you have
	55	// combinations of letters, for example B in one
	56	// sequence and ? in the other.
	57	for (int iS1 =0; iS1< sp.alphabetSize(); ++iS1) {
	58	for (int iS2 =0; iS2< sp.alphabetSize(); ++iS2) {
	59	if ((s1.getAlphabet()->relations(s1[pos],iS1)) &&
	60	(s2.getAlphabet()->relations(s2[pos],iS2))) {
	61	posLikelihood += sp.freq(iS1)sp.Pij_t(iS1,iS2,distrate)*sp.ratesProb(rateCategor);
	62	}
	63	}
	64	}
	65	}
	66	} // end of for on the rates
	67	}
	68	assert(posLikelihood!=0.0);
	69	sumL += log(posLikelihood)(weights ? (weights)[pos]:1.0);
	70	}
	71	return sumL;
	72	};
	73
	74	class C_evalLikeDistDirect_d{ // derivative.
	75	private:
	76	const stochasticProcess& _sp;
	77	const sequence& _s1;
	78	const sequence& _s2;
	79	const vector<MDOUBLE> * _weights;
	80	public:
	81	C_evalLikeDistDirect_d(const stochasticProcess& sp,
	82	const sequence& s1,
	83	const sequence& s2,
	84	const vector<MDOUBLE> * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {};
	85
	86	MDOUBLE operator() (MDOUBLE dist) const {
	87	MDOUBLE sumL=0.0; // sum of log likelihoods
	88	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	89	MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
	90	for (int pos=0; pos < _s1.seqLen(); ++pos){
	91	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	92	posLikelihood = 0.0;
	93	posLikelihood_d = 0.0;
	94	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	95	// this is the more complicated case, where s1 = ?, s2 = specific
	96	posLikelihood = _sp.freq(_s2[pos]);
	97	posLikelihood_d =0.0;
	98	} else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	99	posLikelihood = _sp.freq(_s1[pos]);
	100	posLikelihood_d =0.0;
	101	} else {
	102	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	103	MDOUBLE rate = _sp.rates(rateCategor);
	104	MDOUBLE pij= 0.0;
	105	MDOUBLE dpij=0.0;
	106	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
	107	//simple case, where AA i is changing to AA j
	108	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	109	dpij= _sp.dPij_dt(_s1[pos],_s2[pos],distrate)rate;
	110	MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor);
	111	posLikelihood += pij *tmp;
	112	posLikelihood_d += dpij*tmp;
	113	} else {// this is the most complicated case, when you have combinations of letters,
	114	// for example B in one sequence and ? in the other.
	115	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	116	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	117	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	118	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	119	MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor);
	120	posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
	121	posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,distrate)rate;
	122	}
	123	}
	124	}
	125	}
	126	}// end of for rate categories
	127	}
	128	assert(posLikelihood>0.0);
	129	sumL += (posLikelihood_d/posLikelihood)(_weights ? (_weights)[pos]:1.0);
	130	}
	131	return -sumL;
	132	};
	133	};
	134
	135
	136	// THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE
	137	MDOUBLE likeDistfixRoot::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2,
	138	const MDOUBLE dis2evaluate) {
	139	C_evalLikeDistDirect Cev(_sp,s1,s2,NULL);
	140	return -Cev.operator ()(dis2evaluate);
	141	}
	142
	143	//MDOUBLE likeDistfixRoot::giveDistanceThroughCTC( const sequence& s1,
	144	// const sequence& s2,
	145	// const vector<MDOUBLE> * weights,
	146	// MDOUBLE* score) const {
	147	// // only in the case of homogenous model - work through pairwise EM like
	148	// countTableComponentGam ctc;
	149	// if (_sp.categories() != 1) {
	150	// errorMsg::reportError("this function only work for homogenous model.");
	151	// }
	152	// ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1);
	153	// for (int i=0; i<s1.seqLen(); ++i) {
	154	// ctc.addToCounts(s1[i],s2[i],0,weights?(*weights)[i]:1.0);
	155	// }
	156	// MDOUBLE resL =0;
	157	// return giveDistance(ctc,resL);
	158	//}
	159
	160	const MDOUBLE likeDistfixRoot::giveDistance(const vector<countTableComponentGam>& ctc,
	161	MDOUBLE& resQ,
	162	const MDOUBLE initialGuess) const {
	163	//return giveDistanceNR(ctc,resL,initialGuess);
	164	return giveDistanceBrent(ctc,resQ,initialGuess);
	165	}
	166
	167	const MDOUBLE likeDistfixRoot::giveDistanceBrent(const vector<countTableComponentGam>& ctc,
	168	MDOUBLE& resL,
	169	const MDOUBLE initialGuess) const {
	170	const MDOUBLE ax=_minPairwiseDistance,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	171	MDOUBLE dist=-1.0;
	172	resL = -dbrent(ax,bx,cx,
	173	C_evallikeDistfixRoot(ctc,_sp,_unObservableData_p),
	174	C_evalLikeDist_dfixRoot(ctc,_sp),
	175	tol,
	176	&dist);
	177	return dist;
	178	}
	179
	180	template <typename regF, typename dF>
	181	MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f,
	182	dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function.
	183	zeroFound = 1;
	184	MDOUBLE currentF = f(current);
	185	if (fabs(currentF)<tol) return current;
	186	MDOUBLE lowF = f(low);
	187	MDOUBLE highF = f(high);
	188	if (((lowF>0) && (highF>0)) \|\| ((lowF<0) && (highF<0))) {// unable to find a zero
	189	zeroFound = 0;
	190	return 0;
	191	}
	192	if (lowF>0) {// fixing things to be in the right order.
	193	MDOUBLE tmp = low;
	194	low = high;
	195	high = tmp;
	196	tmp = lowF;
	197	lowF = highF;
	198	highF = tmp;
	199	}
	200	if (currentF>0) {
	201	high = current;
	202	highF = currentF;
	203	} else {
	204	low = current;
	205	lowF = currentF;
	206	} // now the zero is between current and either low or high.
	207
	208	MDOUBLE currentIntervalSize = fabs(low-high);
	209	MDOUBLE oldIntervalSize = currentIntervalSize;
	210
	211	// we have to decide if we do NR or devide the interval by two:
	212	// we want to check if the next NR step is within our interval
	213	// recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1)
	214	// So we want (current - currentF/currentDF) to be between low and high
	215	for (int i=0 ; i < max_it; ++i) {
	216	MDOUBLE currentDF = df(current);
	217	MDOUBLE newGuess = current - currentF/currentDF;
	218	if ((newGuess<low && newGuess> high) \|\| (newGuess>low && newGuess< high)) {
	219	// in this case we should do a NR step.
	220	current = newGuess;
	221	currentF = f(current);
	222	if (currentF > 0){
	223	high = current;
	224	highF = currentF;
	225	} else {
	226	low = current;
	227	lowF = currentF;
	228	}
	229
	230	oldIntervalSize = currentIntervalSize;
	231	currentIntervalSize =fabs (high-low);
	232	if (currentIntervalSize < tol) {
	233	return current;
	234	}
	235	//LOG(5,<<"NR: low= "<<low<<" high= "<<high<<endl);
	236	}
	237	else { // bisection
	238	oldIntervalSize = currentIntervalSize;
	239	currentIntervalSize /= 2.0;
	240	current = (low+high)/2.0;
	241	currentF = f(current);
	242	if (currentF > 0){
	243	high = current;
	244	highF = currentF;
	245	} else {
	246	low = current;
	247	lowF = currentF;
	248	}
	249	//LOG(5,<<"BIS: low= "<<low<<" high= "<<high<<endl);
	250	if (currentIntervalSize < tol) {
	251	return current;
	252	}
	253
	254	}
	255	}
	256	errorMsg::reportError("to many iterations in myNR function");
	257	return 0;
	258	}
	259
	260	//const MDOUBLE likeDistfixRoot::giveDistanceNR( const countTableComponentGam& ctc,
	261	// MDOUBLE& resL,
	262	// const MDOUBLE initialGuess) const {
	263	// //change bx so that it will be the current branch length!
	264	// const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
	265	// // LOG(5,<<"===================================================\n");
	266	// MDOUBLE dist=-1.0;
	267	// int zeroFound = 0;
	268	// dist = myNRmethod(ax,bx,cx,
	269	// C_evalLikeDist_dGL(ctc,_sp),
	270	// C_evalLikeDist_d2GL(ctc,_sp),
	271	// tol,
	272	// 100,
	273	// zeroFound);// max it for NR;
	274	// if (zeroFound == 0) {// there was an error finding a zero
	275	// dist = bx;
	276	// }
	277	//
	278	// return dist;
	279	//}
	280
	281
	282
	283
	284
	285
	286
	287
	288
	289
	290
	291	/*
	292
	293
	294
	295
	296	const MDOUBLE likeDistfixRoot::giveDistance( // the NR version.
	297	const countTableComponentGam& ctc,
	298	MDOUBLE& resL) const {
	299	LOG(5,<<"=============="<<endl);
	300	MDOUBLE oldGuess=0.05; // move to parameters.
	301	if (oldGuess<0) oldGuess=0.05; // move up.
	302	int max_it = 100;
	303	MDOUBLE oldDist =0;
	304	MDOUBLE currentDist =oldGuess;
	305	MDOUBLE newDer =VERYBIG;
	306	MDOUBLE oldDer =VERYBIG;
	307	//const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance,tol=_toll;
	308	for (int i=0; i < max_it; ++i){
	309	MDOUBLE sumDL=0.0;
	310	MDOUBLE sumDL2=0.0;
	311	for (int alph1=0; alph1 < ctc.alphabetSize(); ++alph1){
	312	for (int alph2=0; alph2 < ctc.alphabetSize(); ++alph2){
	313	for (int rateCategor = 0; rateCategor<_s1.categories(); ++rateCategor) {
	314	MDOUBLE rate = _s1.rates(rateCategor);
	315
	316	MDOUBLE pij= _s1.Pij_t(alph1,alph2,currentDist*rate);
	317	MDOUBLE dpij = _s1.dPij_dt(alph1,alph2,currentDist*rate);
	318	MDOUBLE dpij2 = _s1.d2Pij_dt2(alph1,alph2,currentDist*rate);
	319	if (pij==0) {
	320	pij = 0.000000001;
	321	dpij = 0.000000001;
	322	}
	323	sumDL+= ctc.getCounts(alph1,alph2,rateCategor)*dpij
	324	*rate/pij;
	325	sumDL2+= ctc.getCounts(alph1,alph2,rateCategor)rate(pijdpij2-dpij dpij)
	326	/(pij*pij);
	327	}
	328	}
	329	}
	330	oldDer = newDer;
	331	newDer = sumDL;
	332	LOG(5,<<"\ndistance = "<<currentDist<<endl);
	333	LOG(5,<<"derivation = "<<sumDL<<endl);
	334	LOG(5,<<"sec derivation = "<<sumDL2<<endl);
	335	oldDist = currentDist;
	336	if ((fabs(newDer) < fabs(oldDer)) && (sumDL2 < 0)) {
	337	currentDist = currentDist - newDer/sumDL2;
	338	}
	339	else {
	340	currentDist = currentDist / 2;
	341	}
	342	MDOUBLE epsilonForDeriv = 0.001;// move up
	343	if (fabs(newDer) < epsilonForDeriv) break;
	344
	345	}
	346
	347	return currentDist;
	348	}*/
	349
	350	const MDOUBLE likeDistfixRoot::giveDistance(const sequence& s1,
	351	const sequence& s2,
	352	const vector<MDOUBLE> * weights,
	353	MDOUBLE* score) const {
	354	const MDOUBLE ax=_minPairwiseDistance, cx=_maxPairwiseDistance,tol=_toll;
	355	MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/=1.0/;
	356	if (!(bx==bx)) bx = 1.0; // safety check that the JC distance did not return nan (not a number)
	357	MDOUBLE dist=-1.0;
	358	MDOUBLE resL = -dbrent(ax,bx,cx,
	359	C_evalLikeDistDirect(_sp,s1,s2,weights),
	360	C_evalLikeDistDirect_d(_sp,s1,s2,weights),
	361	tol,
	362	&dist);
	363	if (score) *score = resL;
	364	return dist;
	365	}
	366
	367	const MDOUBLE likeDistfixRoot::giveLikelihood(const sequence& s1,
	368	const sequence& s2,
	369	MDOUBLE distance,
	370	const vector<MDOUBLE> * weights) const
	371	{
	372
	373
	374	C_evalLikeDistDirect evalDis(_sp,s1,s2,weights);
	375	return -evalDis(distance);
	376
	377	}

+228

-0

libs/phylogeny/likeDistfixRoot.h less more

	0	// $Id: likeDistfixRoot.h 4470 2008-07-17 15:37:40Z cohenofi $
	1
	2	#ifndef ___LIKE_DIST_H_GL_FIX_ROOT
	3	#define ___LIKE_DIST_H_GL_FIX_ROOT
	4
	5	#include "definitions.h"
	6	#include "countTableComponent.h"
	7	#include "distanceMethod.h"
	8	#include "stochasticProcess.h"
	9	#include "logFile.h"
	10	#include "jcDistance.h"
	11	#include "sequenceContainer.h"
	12	#include "unObservableData.h"
	13	#include <cmath>
	14	using namespace std;
	15
	16	class likeDistfixRoot : public distanceMethod {
	17	public:
	18	// WARNING: the stochasticProcess is NOT copied. The same object is used
	19	explicit likeDistfixRoot(const stochasticProcess& sp,
	20	const MDOUBLE toll =0.0001,
	21	const MDOUBLE maxPairwiseDistance = 5.0,
	22	const MDOUBLE minPairwiseDistance = 0.0000001,
	23	unObservableData* unObservableData_p=NULL)
	24	: _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {}
	25
	26	likeDistfixRoot(const likeDistfixRoot& other)
	27	: _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {}
	28
	29	virtual likeDistfixRoot* clone() const {return new likeDistfixRoot(*this);}
	30	// This constructor allows non-const stochasticProcess so that likeDistfixRoot will be able to change alpha, etc.
	31	explicit likeDistfixRoot(stochasticProcess& sp,
	32	const MDOUBLE toll =0.0001,
	33	const MDOUBLE maxPairwiseDistance = 5.0,
	34	const MDOUBLE minPairwiseDistance = 0.0000001)
	35	: _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {}
	36
	37	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
	38	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	39	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	40	const MDOUBLE giveDistance(const vector<countTableComponentGam>& ctc,
	41	MDOUBLE& resQ,
	42	const MDOUBLE initialGuess= 0.03) const; // initial guess
	43
	44	// given two sequences, it evaluates the log likelihood.
	45	MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
	46	const sequence& s2,
	47	const MDOUBLE dis2evaluate);
	48
	49	// returns the estimated ML distance between the 2 sequences.
	50	// if score is given, it will be the log-likelihood.
	51	const MDOUBLE giveDistance(const sequence& s1,
	52	const sequence& s2,
	53	const vector<MDOUBLE> * weights,
	54	MDOUBLE* score=NULL) const;
	55
	56	// this function creates a countTableComponent (ctc) from the two sequences.
	57	// it then computes the distance from this ctc.
	58	// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
	59	// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
	60	// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
	61	//MDOUBLE giveDistanceThroughCTC(const sequence& s1,
	62	// const sequence& s2,
	63	// const vector<MDOUBLE> * weights,
	64	// MDOUBLE* score=NULL) const;
	65
	66	const MDOUBLE giveLikelihood(const sequence& s1,
	67	const sequence& s2,
	68	MDOUBLE distance,
	69	const vector<MDOUBLE> * weights=NULL) const;
	70
	71	// return the stochasticProcess
	72	const stochasticProcess& getStochasticProcess() const {return _sp;}
	73	stochasticProcess& getNonConstStochasticProcess();
	74	bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
	75	MDOUBLE getToll() const {return _toll;}
	76	MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
	77	MDOUBLE getMinPairwiseDistance() const {return _minPairwiseDistance;}
	78
	79	protected:
	80	const stochasticProcess &_sp;
	81	stochasticProcess *_nonConstSpPtr;
	82	const MDOUBLE _toll;
	83	const MDOUBLE _maxPairwiseDistance;
	84	const MDOUBLE _minPairwiseDistance;
	85	jcDistance _jcDist;
	86	unObservableData* _unObservableData_p;
	87
	88	private:
	89	const MDOUBLE giveDistanceBrent( const vector<countTableComponentGam>& ctc,
	90	MDOUBLE& resL,
	91	const MDOUBLE initialGuess= 0.03) const; // initial guess
	92	const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
	93	MDOUBLE& resL,
	94	const MDOUBLE initialGuess= 0.03) const; // initial guess
	95
	96
	97
	98	public:
	99	static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
	100	const sequence& s1,
	101	const sequence& s2,
	102	const MDOUBLE dist,
	103	const vector<MDOUBLE> * weights=NULL);
	104
	105	};
	106
	107
	108	class C_evallikeDistfixRoot{
	109	private:
	110	const vector<countTableComponentGam>& _ctc;
	111	const stochasticProcess& _sp;
	112	unObservableData* _unObservableData_p;
	113	public:
	114	C_evallikeDistfixRoot(const vector<countTableComponentGam>& ctc, // ctc[letterAtRoot][rate][alph][alph]
	115	const stochasticProcess& inS1, unObservableData* unObservableData_p=NULL)
	116	:_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
	117
	118	MDOUBLE operator() (MDOUBLE dist)
	119	{
	120	//if(_plogLforMissingData){
	121	// sequenceContainer scZero;
	122	// gainLossAlphabet alph;
	123	// scZero.startZeroSequenceContainerGL(_sc, alph);
	124	// _plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,scZero,_sp);
	125	//}
	126	const MDOUBLE epsilonPIJ = 1e-10;
	127	MDOUBLE sumL=0.0;
	128	for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
	129	for (int alph1=0; alph1 < _sp.alphabetSize(); ++alph1){
	130	for (int alph2=0; alph2 < _sp.alphabetSize(); ++alph2){
	131	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	132	MDOUBLE rate = _sp.rates(rateCategor);
	133
	134	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	135	if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
	136	sumL += _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)
	137	//*_sp.freq(letterAtRoot)
	138	//*(log(pij)-log(_sp.freq(letterAtRoot))) ;
	139
	140	//*_sp.freq(letterAtRoot)
	141	*(log(pij)-log(_sp.freq(alph2))) ;
	142	}
	143	}
	144	}
	145	}
	146	//if(_unObservableData_p)
	147	// sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist
	148	LOG(8,<<"check bl="<<dist<<" gives sumL "<<sumL<<endl);
	149	return -sumL;
	150	};
	151	};
	152
	153	// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
	154	// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
	155	// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
	156
	157	class C_evalLikeDist_dfixRoot{ // derivative.
	158	public:
	159	C_evalLikeDist_dfixRoot(const vector<countTableComponentGam>& ctc,
	160	const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
	161	private:
	162	const vector<countTableComponentGam>& _ctc;
	163	const stochasticProcess& _sp;
	164	public:
	165	MDOUBLE operator() (MDOUBLE dist) {
	166	MDOUBLE sumDL=0.0;
	167	for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
	168	for (int alph1=0; alph1 < _ctc[letterAtRoot].alphabetSize(); ++alph1){
	169	for (int alph2=0; alph2 < _ctc[letterAtRoot][alph1].alphabetSize(); ++alph2){
	170	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	171	MDOUBLE rate = _sp.rates(rateCategor);
	172
	173	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	174	MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
	175	//cout<<letterAtRoot<<"\n";
	176	//cout<<alph1<<"\n";
	177	//cout<<alph2<<"\n";
	178	//cout<<rateCategor<<"\n";
	179	//cout<<rate<<"\n";
	180	//cout<<_ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)<<"\n";
	181	sumDL+= _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)*dpij
	182	//*_sp.freq(letterAtRoot)
	183	*rate/pij ;
	184	}
	185	}
	186	}//cerr<<"derivation = "<<-sumDL<<endl;
	187	}
	188	LOG(8,<<"check bl="<<dist<<" gives sumDL "<<sumDL<<endl);
	189	return -sumDL;
	190	};
	191	};
	192
	193
	194
	195
	196
	197
	198	//////////////////////////////////////////////////////////////////////////
	199	class C_evalLikeDist_d2GLfixRoot{ // second derivative.
	200	public:
	201	C_evalLikeDist_d2GLfixRoot(const countTableComponentGam& ctc,
	202	const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
	203	private:
	204	const countTableComponentGam& _ctc;
	205	const stochasticProcess& _sp;
	206	public:
	207	MDOUBLE operator() (MDOUBLE dist) {
	208	MDOUBLE sumDL=0.0;
	209	for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
	210	for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
	211	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	212	MDOUBLE rate = _sp.rates(rateCategor);
	213
	214	MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
	215	MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
	216	MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
	217	sumDL+= rate_ctc.getCounts(alph1,alph2,rateCategor)
	218	(pijd2pij - dpij dpij )/(pij*pij);
	219	}
	220	}
	221	}
	222	return -sumDL;
	223	};
	224	};
	225
	226	#endif
	227

+612

-0

libs/phylogeny/likelihoodComputation.cpp less more

	0	// $Id: likelihoodComputation.cpp 9899 2011-10-11 19:56:48Z rubi $
	1
	2	#include "definitions.h"
	3	#include "tree.h"
	4	#include "computeUpAlg.h"
	5	#include "likelihoodComputation.h"
	6	#include "gammaUtilities.h"
	7	#include <cmath>
	8	#include <cassert>
	9
	10
	11	using namespace likelihoodComputation;
	12
	13	/********************************************************************************************
	14	likelihood computation - full data (1)
	15	*********************************************************************************************/
	16	MDOUBLE likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	17	const sequenceContainer& sc,
	18	const stochasticProcess& sp,
	19	const Vdouble * const weights,
	20	unObservableData *unObservableData_p)
	21	{
	22	computePijGam pi;
	23	pi.fillPij(et,sp);
	24
	25	MDOUBLE logLforMissingData;
	26	MDOUBLE LforMissingData;
	27	if(unObservableData_p){
	28	logLforMissingData = unObservableData_p->getlogLforMissingData();
	29	LforMissingData = exp(logLforMissingData);
	30	}
	31	MDOUBLE res =0;
	32	doubleRep LofPos;
	33	int k;
	34	for (k=0; k < sc.seqLen(); ++k) {
	35	LofPos = likelihoodComputation::getLofPos(k,//pos,
	36	et, //const tree&
	37	sc, // sequenceContainer& sc,
	38	pi, //const computePijGam& ,
	39	sp,
	40	NULL);
	41	if(unObservableData_p){ // conditioning on observability for all rateCat.
	42	LofPos = LofPos / (1- LforMissingData);
	43	}
	44	res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& );
	45	}
	46	//if(unObservableData_p){ // conditioning on observability for allPos & allRateCat
	47	// res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
	48	//}
	49	return res;
	50	}
	51
	52	/********************************************************************************************
	53	likelihood computation - per pos (1.1)
	54	*********************************************************************************************/
	55	doubleRep likelihoodComputation::getLofPos(const int pos,
	56	const tree& et,
	57	const sequenceContainer& sc,
	58	const computePijGam& pi,
	59	const stochasticProcess& sp,
	60	unObservableData *unObservableData_p)
	61	{
	62	// with the pi already computed.
	63	doubleRep tmp=0;
	64	int numOfCat = sp.categories();
	65	VdoubleRep tmpPerCat;
	66	tmpPerCat.resize(numOfCat);
	67
	68	for (int i=0; i < sp.categories();++i) {
	69	tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp);
	70	// ver1 - fix likelihoodForEachCat by LforMissingDataPerCat - Wrong version...
	71	//if(pLforMissingDataPerCat){
	72	// tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]);
	73	//}
	74	tmp += tmpPerCat[i]*sp.ratesProb(i);
	75	}
	76	// ver2 - fix likelihoodForEachCat by LforMissingDataAll
	77	if(unObservableData_p){ // conditioning on observability for all rateCat.
	78	tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData()));
	79	}
	80	return tmp;
	81	}
	82
	83	/********************************************************************************************
	84	likelihood computation - per pos, per cat (1.1.1)
	85	*********************************************************************************************/
	86	doubleRep likelihoodComputation::getLofPos(const int pos,
	87	const tree& et,
	88	const sequenceContainer& sc,
	89	const computePijHom& pi,
	90	const stochasticProcess& sp,
	91	unObservableData *unObservableData_p)
	92	{
	93	computeUpAlg cup;
	94	suffStatGlobalHomPos ssc;
	95	cup.fillComputeUp(et,sc,pos,pi,ssc);
	96
	97	doubleRep tmp = 0.0;
	98	for (int let = 0; let < sp.alphabetSize(); ++let) {
	99	doubleRep tmpLcat=
	100	ssc.get(et.getRoot()->id(),let)*
	101	sp.freq(let);
	102	if (!DBIG_EQUAL(convert(tmpLcat), 0.0))
	103	{
	104	cerr<<"tmpLcat = "<<tmpLcat<<endl;
	105	errorMsg::reportError("error in likelihoodComputation::getLofPos. likelihood is smaller than zero");
	106	}
	107	//assert(tmpLcat>=0.0);
	108	tmp+=tmpLcat;
	109	}
	110	// cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP
	111	if (!DBIG_EQUAL(convert(tmp), 0.0)){
	112	LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<<endl;);
	113	LOG(5,<<"pos = "<< pos <<endl;);
	114	tmp = EPSILON;
	115	//errorMsg::reportError("likelihoodComputation::getLofPos: likelihood of pos was zero!",1);
	116	}
	117
	118	if(unObservableData_p){ // conditioning on observability
	119	tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData()));
	120	}
	121	return tmp;
	122	}
	123
	124	//r4s_proportional
	125	/********************************************************************************************
	126	likelihood computation - full data (1)
	127	*********************************************************************************************/
	128	Vdouble likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(const tree& et,
	129	const vector<sequenceContainer>& sc,
	130	multipleStochasticProcess* msp,
	131	const gammaDistribution* pProportionDist,
	132	const Vdouble * const weights)
	133	{
	134	Vdouble geneLikelihoodVec;
	135	//geneRateLikelihoodVec[geneN][globalRateCateg] will hold the LL of the gene given the global rate
	136	VVdouble geneRateLikelihoodVec;
	137	geneLikelihoodVec.resize(sc.size(),0.0);
	138	geneRateLikelihoodVec.resize(sc.size());
	139	for(int geneN = 0;geneN < sc.size();++geneN){
	140	geneRateLikelihoodVec[geneN].resize(pProportionDist->categories(),0.0);
	141	for(int globalRateCateg = 0;globalRateCateg < pProportionDist->categories();++globalRateCateg){
	142	msp->getSp(geneN)->setGlobalRate(pProportionDist->rates(globalRateCateg));
	143	computePijGam pi;
	144	pi.fillPij(et,*msp->getSp(geneN));
	145	doubleRep LofPos;
	146	for (int k=0; k < sc[geneN].seqLen(); ++k) {
	147	//LofPos is sum LofPos_LocalRateCat_i*p(LocalRateCat_i)
	148	LofPos = likelihoodComputation::getLofPosProportional(k,//pos,
	149	et, //const tree&
	150	sc[geneN], // sequenceContainer& sc,
	151	pi, //const computePijGam& ,
	152	*msp->getSp(geneN)); //removed the prior of the globar rate categ cause it is multiplied below
	153	geneRateLikelihoodVec[geneN][globalRateCateg] += log(LofPos)(weights?(weights)[k]:1);
	154	}
	155	}
	156	//Once we are finished iterating over all globalRateCategs we need to sum the log likelihood for this gene
	157	//which is: log(prior(globalRateCateg_i)exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+prior(globalRateCateg_j)exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..)
	158	//assuming a flat prior this equals: log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..)
	159	//which can be written as:log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]))(1+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]-geneRateLikelihoodVec[geneN][globalRateCateg_i]..)
	160	geneLikelihoodVec[geneN] = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec[geneN]);//Strictly assumes a flat prior distribution
	161	}
	162	return geneLikelihoodVec;
	163	}
	164
	165	/********************************************************************************************
	166	likelihood computation - per pos (1.1)
	167	*********************************************************************************************/
	168	//Old - remove when QA is done
	169	doubleRep likelihoodComputation::getLofPosProportional(const int pos,
	170	const tree& et,
	171	const sequenceContainer& sc,
	172	const computePijGam& pi,
	173	const stochasticProcess& sp,
	174	const MDOUBLE globalRateProb)
	175	{
	176	// with the pi already computed.
	177	doubleRep tmp=0;
	178	int numOfCat = sp.categories();
	179	VdoubleRep tmpPerCat;
	180	tmpPerCat.resize(numOfCat);
	181
	182	for (int i=0; i < sp.categories();++i) {
	183	tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp);
	184	tmp += tmpPerCat[i]sp.ratesProb(i)globalRateProb; //old - now globalRateProb is multipled outside
	185	}
	186	return tmp;
	187	}
	188
	189	/********************************************************************************************
	190	likelihood computation - per pos (1.1)
	191	*********************************************************************************************/
	192	doubleRep likelihoodComputation::getLofPosProportional(const int pos,
	193	const tree& et,
	194	const sequenceContainer& sc,
	195	const computePijGam& pi,
	196	const stochasticProcess& sp)
	197	{
	198	// with the pi already computed.
	199	doubleRep tmp=0;
	200	int numOfCat = sp.categories();
	201	VdoubleRep tmpPerCat;
	202	tmpPerCat.resize(numOfCat);
	203
	204	for (int i=0; i < sp.categories();++i) {
	205	tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp);
	206	tmp += tmpPerCat[i]*sp.ratesProb(i);
	207	}
	208	return tmp;
	209	}
	210
	211	//r4s_proportional
	212
	213
	214	/********************************************************************************************
	215	*********************************************************************************************/
	216	doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledHom(const int pos,
	217	const tree& et,
	218	const sequenceContainer& sc,
	219	const stochasticProcess& sp,
	220	const suffStatGlobalHomPos& ssc){
	221	// using the pij of stochastic process rather than pre computed pij's...
	222	if (ssc.size()==0) {errorMsg::reportError("error in function likelihoodComputation::getLofPosWhenUpIsFilled");}
	223	doubleRep tmp = 0.0;
	224	for (int let = 0; let < sp.alphabetSize(); ++let) {
	225	doubleRep tmpLcat=
	226	ssc.get(et.getRoot()->id(),let)*
	227	sp.freq(let);
	228	tmp+=tmpLcat;
	229	}
	230	return tmp;
	231	}
	232
	233	/********************************************************************************************
	234	*********************************************************************************************/
	235	doubleRep likelihoodComputation::getLofPosHomModelEachSiteDifferentRate(const int pos,
	236	const tree& et,
	237	const sequenceContainer& sc,
	238	const stochasticProcess& sp){
	239	// using the pij of stochastic process rather than pre computed pij's...
	240	if (sp.categories()!=1) {
	241	errorMsg::reportError("num of categories in function getLofPosHomModel must be one");
	242	}
	243	computeUpAlg cup;
	244	suffStatGlobalHomPos ssc;
	245	computePijHom cpij;
	246	cpij.fillPij(et,sp);
	247	cup.fillComputeUp(et,sc,pos,cpij,ssc);
	248	return getProbOfPosWhenUpIsFilledHom(pos,et,sc,sp,ssc);
	249	}
	250	/********************************************************************************************
	251	*********************************************************************************************/
	252	doubleRep likelihoodComputation::getLofPosGamModelEachSiteDifferentRate(const int pos,
	253	const tree& et,
	254	const sequenceContainer& sc,
	255	const stochasticProcess& sp){
	256	computePijGam pi;
	257	pi.fillPij(et,sp);
	258	return getLofPos(pos,et,sc,pi,sp);
	259	}
	260	/********************************************************************************************
	261	*********************************************************************************************/
	262	doubleRep likelihoodComputation::getLofPos(const int pos,
	263	const tree& et,
	264	const sequenceContainer& sc,
	265	const stochasticProcess& sp,
	266	const MDOUBLE gRate){ // when there is a global rate for this position
	267	// using the pij of stochastic process rather than pre computed pij's...
	268	computeUpAlg cup;
	269	suffStatGlobalHomPos ssc;
	270	cup.fillComputeUpSpecificGlobalRate(et,sc,pos,sp,ssc,gRate);
	271
	272	doubleRep tmp = 0.0;
	273	for (int let = 0; let < sp.alphabetSize(); ++let) {
	274	doubleRep tmpLcat=
	275	ssc.get(et.getRoot()->id(),let)*
	276	sp.freq(let);;
	277	assert(tmpLcat>=0.0);
	278	tmp+=tmpLcat;
	279	}
	280	return tmp;
	281	}
	282
	283	/********************************************************************************************
	284	*********************************************************************************************/
	285	doubleRep likelihoodComputation::getLofPosAndPosteriorOfRates(const int pos,
	286	const tree& et,
	287	const sequenceContainer& sc,
	288	const computePijGam& pi,
	289	const stochasticProcess& sp,
	290	VdoubleRep& postrior){
	291	// with the pi already computed.
	292	doubleRep tmp=0;
	293	for (int i=0; i < sp.categories();++i) {
	294	postrior[i]=getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i);
	295	tmp += postrior[i];
	296	}
	297	for (int i=0; i < sp.categories();++i)
	298	postrior[i] /= tmp;
	299	return tmp;
	300	}
	301	/********************************************************************************************
	302	*********************************************************************************************/
	303	MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp(const tree& et,
	304	const sequenceContainer& sc,
	305	const stochasticProcess& sp,
	306	const suffStatGlobalGam& cup,
	307	const Vdouble * weights) {
	308	MDOUBLE like = 0;
	309	//computing the likelihood from up:
	310	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	311	doubleRep tmp=0;
	312	for (int categor = 0; categor < sp.categories(); ++categor) {
	313	doubleRep veryTmp =0;
	314	for (int let =0; let < sc.getAlphabet()->size(); ++let) {
	315	veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
	316	}
	317	tmp += veryTmp*sp.ratesProb(categor);
	318	}
	319	like += log(tmp) * (weights?(*weights)[pos]:1);
	320	}
	321	return like;
	322	}
	323	/********************************************************************************************
	324	*********************************************************************************************/
	325	MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et,
	326	const sequenceContainer& sc,
	327	const stochasticProcess& sp,
	328	const suffStatGlobalGam& cup,
	329	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	330	const Vdouble * weights,
	331	unObservableData* unObservableData_p) {
	332	posLike.clear();
	333	MDOUBLE like = 0;
	334	//computing the likelihood from up:
	335	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	336	doubleRep tmp=0;
	337	for (int categor = 0; categor < sp.categories(); ++categor) {
	338	doubleRep veryTmp =0;
	339	for (int let =0; let < sc.alphabetSize(); ++let) {
	340	veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
	341	}
	342	tmp += veryTmp*sp.ratesProb(categor);
	343	}
	344	assert(tmp>0.0);
	345	if(unObservableData_p){
	346	tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData()));
	347	}
	348	like += log(tmp) * (weights?(*weights)[pos]:1);
	349	posLike.push_back(tmp);
	350	}
	351	return like;
	352	}
	353	/********************************************************************************************
	354	*********************************************************************************************/
	355	//old
	356	MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et,
	357	const sequenceContainer& sc,
	358	stochasticProcess& sp,
	359	const suffStatGlobalGamProportional& cup,
	360	const gammaDistribution* pProportionDist,
	361	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	362	const Vdouble * weights) {
	363	posLike.clear();
	364	MDOUBLE like = 0.0;
	365	//computing the likelihood from up:
	366	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	367	doubleRep tmp(0.0);
	368	for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){
	369	for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) {
	370	doubleRep veryTmp =0;
	371	for (int let =0; let < sc.alphabetSize(); ++let) {
	372	veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let);
	373	}
	374	tmp += veryTmppProportionDist->ratesProb(globalRateCategor)sp.ratesProb(localRateCategor);
	375	}
	376	}
	377	assert(tmp>0.0);
	378	like += log(tmp) * (weights?(*weights)[pos]:1);
	379	posLike.push_back(tmp);
	380	}
	381	return like;
	382	}
	383
	384	//new
	385	MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et,
	386	const sequenceContainer& sc,
	387	stochasticProcess& sp,
	388	const suffStatGlobalGamProportional& cup,
	389	const gammaDistribution* pProportionDist,
	390	VVdoubleRep& posLike,
	391	const Vdouble * weights) {
	392	for(int pos = 0;pos < sc.seqLen();++pos){
	393	posLike[pos].resize(pProportionDist->categories(),0.0);
	394	}
	395	Vdouble geneRateLikelihoodVec;
	396	geneRateLikelihoodVec.resize(pProportionDist->categories(),0.0);
	397	MDOUBLE like = 0.0;
	398	//computing the likelihood from up:
	399	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	400	VdoubleRep tmpVec; //hold the LofPos for each global rate category
	401	tmpVec.resize(pProportionDist->categories(),0.0);//This would sum for every global rate category
	402	for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){
	403	doubleRep tmp1(0.0);
	404	doubleRep tmp2(0.0);
	405	for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) {
	406	doubleRep veryTmp(0.0);
	407	for (int let =0; let < sc.alphabetSize(); ++let) {
	408	veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let);
	409	}
	410	tmp1 += veryTmp;
	411	tmp2 += veryTmp*sp.ratesProb(localRateCategor);
	412	}
	413	tmpVec[globalRateCategor] += tmp2;
	414	posLike[pos][globalRateCategor] = tmp1;
	415	}
	416	for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){
	417	assert(tmpVec[globalRateCategor]>0.0);
	418	geneRateLikelihoodVec[globalRateCategor] += log(tmpVec[globalRateCategor])(weights?(weights)[pos]:1);
	419	}
	420	}
	421	like = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec);
	422	return like;
	423	}
	424
	425	/********************************************************************************************
	426	fill the posteriorLike matrix with each position posterior rate (p(r\|D))
	427	but without the weights.
	428	*********************************************************************************************/
	429	MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et,
	430	const sequenceContainer& sc,
	431	const stochasticProcess& sp,
	432	VVdoubleRep& posteriorLike,
	433	const Vdouble * weights) {
	434	suffStatGlobalGam cup;
	435	computeUpAlg cupAlg;
	436	computePijGam cpGam;
	437	cpGam.fillPij(et,sp);
	438	cupAlg.fillComputeUp(et,sc,cpGam,cup);
	439	return getPosteriorOfRates(et,sc,sp,cup,posteriorLike,weights);
	440	}
	441
	442	// fill the posteriorLike matrix with each position posterior rate (p(r\|D))
	443	// but without the weights.
	444	MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et,
	445	const sequenceContainer& sc,
	446	const stochasticProcess& sp,
	447	const suffStatGlobalGam& cup,
	448	VVdoubleRep& posteriorLike,
	449	const Vdouble * weights) {
	450	posteriorLike.clear();
	451	posteriorLike.resize(sc.seqLen());
	452	for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories());
	453	MDOUBLE like = 0;
	454	//computing the likelihood from up:
	455	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	456	doubleRep posProb=0;
	457	for (int categor = 0; categor < sp.categories(); ++categor) {
	458	doubleRep veryTmp =0;
	459	for (int let =0; let < sc.getAlphabet()->size(); ++let) {
	460	veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
	461	}
	462	posProb += veryTmp*sp.ratesProb(categor);
	463	posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor);
	464	}
	465	like += log(posProb) * (weights?(*weights)[pos]:1);
	466	for (int categor1 = 0; categor1 < sp.categories(); ++categor1) {
	467	posteriorLike[pos][categor1] /= posProb;
	468	}
	469	}
	470
	471	return like;
	472	}
	473
	474
	475	// fill the posteriorLike matrix with each position posterior rate (p(r\|D))
	476	// and the LLPP, but without the weights.
	477	MDOUBLE likelihoodComputation::getPosteriorOfRatesAndLLPP(const tree& et,
	478	const sequenceContainer& sc,
	479	const stochasticProcess& sp,
	480	const suffStatGlobalGam& cup,
	481	VVdoubleRep& posteriorLike,
	482	VdoubleRep& LLPerPos,
	483	const Vdouble * weights) {
	484	posteriorLike.clear();
	485	posteriorLike.resize(sc.seqLen());
	486	for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories());
	487	MDOUBLE like = 0;
	488	//computing the likelihood from up:
	489	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	490	LLPerPos[pos] = 0.0;
	491	for (int categor = 0; categor < sp.categories(); ++categor) {
	492	doubleRep veryTmp =0;
	493	for (int let =0; let < sc.getAlphabet()->size(); ++let) {
	494	veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
	495	}
	496	LLPerPos[pos] += veryTmp*sp.ratesProb(categor);
	497	posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor);
	498	}
	499	like += log(LLPerPos[pos]) * (weights?(*weights)[pos]:1);
	500	for (int categor1 = 0; categor1 < sp.categories(); ++categor1) {
	501	posteriorLike[pos][categor1] /= LLPerPos[pos];
	502	}
	503	}
	504
	505	return like;
	506	}
	507
	508	// this function forces non gamma computation of likelihoods from up.
	509	// i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma.
	510	MDOUBLE likelihoodComputation::getTreeLikelihoodFromUpSpecifcRates(const tree& et,
	511	const sequenceContainer& sc,
	512	const stochasticProcess& sp,
	513	const suffStatGlobalHom& cup,
	514	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	515	const Vdouble * weights)
	516	{
	517	posLike.clear();
	518	MDOUBLE like = 0;
	519	//computing the likelihood from up:
	520	for (int pos = 0; pos < sc.seqLen(); ++pos)
	521	{
	522	doubleRep tmp=0;
	523	for (int let =0; let < sc.getAlphabet()->size(); ++let) {
	524	tmp += cup.get(pos, et.getRoot()->id(), let) * sp.freq(let);
	525	}
	526
	527	assert(tmp > 0);
	528	like += log(tmp) * (weights?(*weights)[pos]:1);
	529	posLike.push_back(tmp);
	530	}
	531	return like;
	532	}
	533	/********************************************************************************************
	534	*********************************************************************************************/
	535	doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledGam(const int pos,
	536	const tree& et,
	537	const sequenceContainer& sc,
	538	const stochasticProcess& sp,
	539	const suffStatGlobalGamPos& cup) {
	540	doubleRep tmp=0;
	541	for (int categor = 0; categor < sp.categories(); ++categor) {
	542	doubleRep veryTmp =0;
	543	for (int let =0; let < sc.alphabetSize(); ++let) {
	544	veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let);
	545	}
	546	tmp += veryTmp*sp.ratesProb(categor);
	547	}
	548	assert(tmp>0.0);
	549	return tmp;
	550	}
	551	/********************************************************************************************
	552	*********************************************************************************************/
	553	MDOUBLE likelihoodComputation::computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et,
	554	const stochasticProcess &sp, Vdouble &LLPerPos) {
	555	MDOUBLE treeLogLikelihood = 0.0;
	556	computePijGam cpij;
	557	cpij.fillPij(et, sp);
	558	LLPerPos.resize(sc.seqLen());
	559	doubleRep LofPos;
	560	for (int pos=0; pos < sc.seqLen() ;++pos) {
	561	LofPos = likelihoodComputation::getLofPos(pos, et, sc, cpij, sp);
	562	MDOUBLE tmpLL = log(LofPos);
	563	treeLogLikelihood += tmpLL;
	564	LLPerPos[pos] = tmpLL;
	565	}
	566	return treeLogLikelihood;
	567	}
	568	/********************************************************************************************
	569	likelihood for each category - used for unObservableData
	570	*********************************************************************************************/
	571	Vdouble likelihoodComputation::getLofPosPerCat(const int pos,
	572	const tree& et,
	573	const sequenceContainer& sc,
	574	const computePijGam& pi,
	575	const stochasticProcess& sp)
	576	{
	577	// with the pi already computed.
	578	int numOfCat = sp.categories();
	579	Vdouble tmp;
	580	tmp.resize(numOfCat);
	581	for (int i=0; i < numOfCat;++i) {
	582	tmp[i] = convert(getLofPos(pos,et,sc,pi[i],sp))*sp.ratesProb(i);
	583	}
	584	return tmp;
	585	}
	586
	587	//doubleRep likelihoodComputation::getLofPos(const int pos,
	588	// const tree& et,
	589	// const sequenceContainer& sc,
	590	// const computePijGam& pi,
	591	// const stochasticProcess& sp){
	592	//// with the pi already computed.
	593	// doubleRep tmp=0;
	594	// for (int i=0; i < sp.categories();++i) {
	595	// tmp += getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i);
	596	// }
	597	// return tmp;
	598	//}
	599
	600	// MDOUBLE likelihoodComputation::getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha,
	601	// const Vdouble originalBounderi,
	602	// const VVdouble& posteriorLike,
	603	// const VdoubleRep& LLPP,
	604	// const Vdouble* weights)
	605	// {
	606	// int nCategories = originalBounderi.size()-1;
	607	// Vdouble rateWeights; rateWeights.resize(nCategories);
	608	// for (int i=0; i<n; ++i)
	609	// rateWeights[i]=(gammp(alpha, originalBounderi[i+1]alpha)-gammp(alpha, originalBounderi[i]alpha))*nCategories;
	610
	611	// }

+208

-0

libs/phylogeny/likelihoodComputation.h less more

	0	// $Id: likelihoodComputation.h 9899 2011-10-11 19:56:48Z rubi $
	1
	2	#ifndef ___LIKELIHOOD_COMPUTATION
	3	#define ___LIKELIHOOD_COMPUTATION
	4
	5	#include "definitions.h"
	6	#include "computePijComponent.h"
	7	#include "sequenceContainer.h"
	8	#include "suffStatComponent.h"
	9	#include "unObservableData.h"
	10	#include "multipleStochasticProcess.h"
	11	#include "gammaDistribution.h"
	12	#include "distribution.h"
	13
	14
	15	namespace likelihoodComputation {
	16	// likelihood computation - full data (1)
	17	MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	18	const sequenceContainer& sc,
	19	const stochasticProcess& sp,
	20	const Vdouble * const weights = NULL,
	21	unObservableData *unObservableData_p=NULL);
	22	// likelihood computation - per pos (1.1)
	23	doubleRep getLofPos(const int pos, // this function is used
	24	const tree& et, // when gamma, and the br-len
	25	const sequenceContainer& sc, // are the same for all pos.
	26	const computePijGam& pi,
	27	const stochasticProcess& sp,
	28	unObservableData *unObservableData_p=NULL);
	29	// likelihood computation - per pos, per cat (1.1.1)
	30	doubleRep getLofPos(const int pos, // this function is used
	31	const tree& et, // when the br-len
	32	const sequenceContainer& sc, // are the same for all
	33	const computePijHom& pi, // positions.
	34	const stochasticProcess& sp,
	35	unObservableData *unObservableData_p=NULL);
	36
	37	//r4s_Proportional
	38	// likelihood computation - full data (1)
	39	Vdouble getTreeLikelihoodProportionalAllPosAlphTheSame(const tree& et,
	40	const vector<sequenceContainer>& sc,
	41	multipleStochasticProcess* msp,
	42	const gammaDistribution* pProportionDist,
	43	const Vdouble * const weights = NULL);
	44	// likelihood computation - per pos (1.1)
	45	//Old - remove when QA is done
	46	doubleRep getLofPosProportional(const int pos, // this function is used
	47	const tree& et, // when gamma, and the br-len
	48	const sequenceContainer& sc, // are the same for all pos.
	49	const computePijGam& pi,
	50	const stochasticProcess& sp,
	51	const MDOUBLE globalRateProb);
	52	doubleRep getLofPosProportional(const int pos, // this function is used
	53	const tree& et, // when gamma, and the br-len
	54	const sequenceContainer& sc, // are the same for all pos.
	55	const computePijGam& pi,
	56	const stochasticProcess& sp);
	57	//r4s_Proportional
	58
	59
	60
	61	// used when the likelihood given each category is needed, not only the sum
	62	Vdouble getLofPosPerCat(const int pos,
	63	const tree& et,
	64	const sequenceContainer& sc,
	65	const computePijGam& pi,
	66	const stochasticProcess& sp);
	67	// used to fill the likelihood for the unobservable for each category
	68	doubleRep getLofPos(const int pos,
	69	const tree& et,
	70	const sequenceContainer& sc,
	71	const computePijGam& pi,
	72	const stochasticProcess& sp,
	73	Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled
	74
	75
	76
	77
	78	// --------------------------------------------------------------------------------
	79	// this function should be used only when the branch lengths are not the same for
	80	// all positions. Otherwise, computePijHom should be calculated once,
	81	// and be used for all calls. In this function, computePijHom is being computed for
	82	// each position.
	83	doubleRep getLofPosHomModelEachSiteDifferentRate(const int pos,
	84	const tree& et,
	85	const sequenceContainer& sc,
	86	const stochasticProcess& sp);
	87	// ---------------------------------------------------------------------------------
	88
	89
	90	// --------------------------------------------------------------------------------
	91	// this function should be used only when the branch lengths are not the same for
	92	// all positions. Otherwise, computePijHom should be calculated once,
	93	// and be used for all calls. In this function, computePijHom is being computed for
	94	// each position.
	95	doubleRep getLofPosGamModelEachSiteDifferentRate(const int pos,
	96	const tree& et,
	97	const sequenceContainer& sc,
	98	const stochasticProcess& sp);
	99	// --------------------------------------------------------------------------------
	100
	101
	102	doubleRep getLofPos(const int pos, // with a site specific rate.
	103	const tree& et,
	104	const sequenceContainer& sc,
	105	const stochasticProcess& sp,
	106	const MDOUBLE gRate);
	107	doubleRep getProbOfPosWhenUpIsFilledHom(const int pos, // to be used for homogenous model
	108	const tree& et,
	109	const sequenceContainer& sc,
	110	const stochasticProcess& sp,
	111	const suffStatGlobalHomPos& ssc);
	112	doubleRep getProbOfPosWhenUpIsFilledGam(const int pos, // to be used for Gamma model.
	113	const tree& et,
	114	const sequenceContainer& sc,
	115	const stochasticProcess& sp,
	116	const suffStatGlobalGamPos& cup);
	117
	118	doubleRep getLofPosAndPosteriorOfRates(const int pos,
	119	const tree& et,
	120	const sequenceContainer& sc,
	121	const computePijGam& pi,
	122	const stochasticProcess& sp,
	123	VdoubleRep& postrior);
	124
	125	MDOUBLE getTreeLikelihoodFromUp(const tree& et,
	126	const sequenceContainer& sc,
	127	const stochasticProcess& sp,
	128	const suffStatGlobalGam& cup,
	129	const Vdouble * weights =0 );
	130
	131	MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
	132	const sequenceContainer& sc,
	133	const stochasticProcess& sp,
	134	const suffStatGlobalGam& cup,
	135	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	136	const Vdouble * weights=0,
	137	unObservableData* unObservableData_p=NULL);
	138	//old
	139	MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
	140	const sequenceContainer& sc,
	141	stochasticProcess& sp,
	142	const suffStatGlobalGamProportional& cup,
	143	const gammaDistribution* pProportionDist,
	144	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	145	const Vdouble * weights=0);
	146	//new
	147	MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
	148	const sequenceContainer& sc,
	149	stochasticProcess& sp,
	150	const suffStatGlobalGamProportional& cup,
	151	const gammaDistribution* pProportionDist,
	152	VVdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	153	const Vdouble * weights=0);
	154
	155	// fill this vector with each position posterior rate (p(r\|D))
	156	// but without the weights.
	157	// the weights are used only because we return the likelihood
	158	// (this takes these weights into account).
	159	MDOUBLE getPosteriorOfRates(const tree& et,
	160	const sequenceContainer& sc,
	161	const stochasticProcess& sp,
	162	const suffStatGlobalGam& cup,
	163	VVdoubleRep& posteriorLike,
	164	const Vdouble * weights = NULL);
	165
	166	MDOUBLE getPosteriorOfRates(const tree& et,
	167	const sequenceContainer& sc,
	168	const stochasticProcess& sp,
	169	VVdoubleRep& posteriorLike,
	170	const Vdouble * weights = NULL);
	171
	172	// fill the posteriorLike matrix with each position posterior rate (p(r\|D))
	173	// and the LLPP, but without the weights.
	174	MDOUBLE getPosteriorOfRatesAndLLPP(const tree& et,
	175	const sequenceContainer& sc,
	176	const stochasticProcess& sp,
	177	const suffStatGlobalGam& cup,
	178	VVdoubleRep& posteriorLike,
	179	VdoubleRep& LLPerPos,
	180	const Vdouble * weights=NULL);
	181	// From Itay M.
	182	// this function forces non gamma computation of likelihoods from up.
	183	// i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma.
	184	MDOUBLE getTreeLikelihoodFromUpSpecifcRates(const tree& et,
	185	const sequenceContainer& sc,
	186	const stochasticProcess& sp,
	187	const suffStatGlobalHom& cup,
	188	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	189	const Vdouble * weights = NULL);
	190
	191	// added from main semphy on 23.5.2005 (eyal privman + matan ninio).
	192	MDOUBLE computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et,
	193	const stochasticProcess &sp, Vdouble &LLPerPos);
	194	MDOUBLE getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha,
	195	const Vdouble originalBounderi,
	196	const VVdouble& posteriorLike,
	197	const VdoubleRep& LLPP,
	198	const Vdouble* weights);
	199
	200
	201
	202	};
	203
	204
	205
	206	#endif
	207

+94

-0

libs/phylogeny/likelihoodComputation2Codon.cpp less more

	0	#include "likelihoodComputation2Codon.h"
	1
	2	#include "wYangModel.h"
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "computeUpAlg.h"
	6	#include "likelihoodComputation.h"
	7
	8	#include <cmath>
	9	#include <cassert>
	10
	11	using namespace likelihoodComputation2Codon;
	12
	13
	14
	15	MDOUBLE likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	16	const sequenceContainer& sc,
	17	const vector<stochasticProcess>& spVec,const distribution * distr){
	18	computePijGam pi;
	19	pi._V.resize(distr->categories());
	20	for (int i=0; i < spVec.size(); ++i) {
	21	pi._V[i].fillPij(et,spVec[i]);
	22	}
	23
	24	suffStatGlobalGam ssc;
	25	computeUpAlg cup;
	26	cup.fillComputeUp(et,sc,pi,ssc);
	27
	28	MDOUBLE res = 0.0;
	29	int k;
	30	for (k=0; k < sc.seqLen(); ++k) {
	31	MDOUBLE lnL = log(likelihoodComputation2Codon::getProbOfPosUpIsFilledSelectionGam(k,//pos,
	32	et,//const tree&
	33	sc,// sequenceContainer& sc,
	34	spVec[0],
	35	ssc[k],//const computePijGam& ,
	36	distr)); //W distribution ,
	37	LOG(20,<<"pos= "<<k<<" lnL= "<<lnL<<endl);
	38	res += lnL;
	39	//if (k==5) exit(0);
	40
	41	}
	42	return res;
	43
	44
	45
	46	}
	47
	48
	49	MDOUBLE likelihoodComputation2Codon::getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& et,
	50	const sequenceContainer& sc,
	51	const stochasticProcess& sp,
	52	const suffStatGlobalGamPos& cup,const distribution * distr){
	53
	54	doubleRep tmp=0.0;
	55	for (int categor = 0; categor < distr->categories(); ++categor) {
	56	doubleRep veryTmp =0;
	57	for (int let =0; let < sc.alphabetSize(); ++let) {
	58	veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let);
	59
	60	}
	61	//cout<<"category= "<<categor<<" fh= "<<veryTmp<<" freqCategor= "<<distr->ratesProb(categor)<<endl;
	62	tmp += veryTmp*distr->ratesProb(categor);
	63	}
	64	assert(tmp>0.0);
	65	return convert(tmp);
	66	}
	67
	68	MDOUBLE likelihoodComputation2Codon::getTreeLikelihoodFromUp2(const tree& et,
	69	const sequenceContainer& sc,
	70	const stochasticProcess& sp,
	71	const suffStatGlobalGam& cup,
	72	Vdouble& posLike, // fill this vector with each position likelihood but without the weights.
	73	const distribution * distr,
	74	const Vdouble * weights) {
	75	posLike.clear();
	76	MDOUBLE like = 0;
	77	//computing the likelihood from up:
	78	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	79	doubleRep tmp=0;
	80	for (int categor = 0; categor < distr->categories(); ++categor) {
	81	doubleRep veryTmp =0;
	82	for (int let =0; let < sc.alphabetSize(); ++let) {
	83	veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
	84	}
	85	tmp += veryTmp*distr->ratesProb(categor);
	86	}
	87	assert(tmp>0.0);
	88	like += log(tmp) * (weights?(*weights)[pos]:1);
	89	posLike.push_back(convert(tmp));
	90	}
	91	return like;
	92
	93	}

+35

-0

libs/phylogeny/likelihoodComputation2Codon.h less more

	0	// $Id: likelihoodComputation2Codon.h 4699 2008-08-14 14:19:46Z privmane $
	1
	2	#ifndef ___LIKELIHOOD_COMPUTATION_2_CODON
	3	#define ___LIKELIHOOD_COMPUTATION_2_CODON
	4
	5	#include "definitions.h"
	6	#include "computePijComponent.h"
	7	#include "sequenceContainer.h"
	8	#include "suffStatComponent.h"
	9
	10	namespace likelihoodComputation2Codon {
	11
	12	MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	13	const sequenceContainer& sc,
	14	const vector<stochasticProcess>& spVec,
	15	const distribution * distr);
	16
	17	MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& et, //used for gamma model
	18	const sequenceContainer& sc,
	19	const stochasticProcess& sp,
	20	const suffStatGlobalGamPos& cup,
	21	const distribution * distr);
	22
	23	MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
	24	const sequenceContainer& sc,
	25	const stochasticProcess& sp,
	26	const suffStatGlobalGam& cup,
	27	Vdouble& posLike, // fill this vector with each position likelihood but without the weights.
	28	const distribution * distr,
	29	const Vdouble * weights=0);
	30	};
	31
	32
	33
	34	#endif

+82

-0

libs/phylogeny/likelihoodComputation2USSRV.cpp less more

	0	// $Id: likelihoodComputation2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "likelihoodComputation2USSRV.h"
	2
	3
	4	using namespace likelihoodComputation2USSRV;
	5
	6	//compute likelihood for the ssrv model and the base model.
	7
	8	MDOUBLE likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	9	const sequenceContainer& sc, const sequenceContainer& baseSc,
	10	const ussrvModel& model,const Vdouble * const weights){
	11
	12
	13	computePijHom piSSRV;
	14	piSSRV.fillPij(et,model.getSSRVmodel());
	15
	16	computePijGam piBase;
	17	piBase.fillPij(et,model.getBaseModel());
	18
	19	MDOUBLE res =0.0;
	20	MDOUBLE f = model.getF();
	21	doubleRep LofPosSSRV(0.0),LofPosBase(0.0);
	22	MDOUBLE lnL(0.);
	23	int k;
	24	for (k=0; k < sc.seqLen(); ++k) {
	25	if (f<1.0)
	26	LofPosBase = likelihoodComputation::getLofPos(k,et,baseSc,piBase,model.getBaseModel());
	27	if (f>0.0) {
	28	LofPosSSRV = likelihoodComputation::getLofPos(k,et,sc,piSSRV,model.getSSRVmodel());
	29	if (f<1.0)
	30	lnL = log(LofPosSSRVf+(1-f)LofPosBase);
	31	else // f == 1.0
	32	lnL = log(LofPosSSRV);
	33	}
	34	else // f == 0.0
	35	lnL = log(LofPosBase);
	36
	37	LOG(9,<<"pos= "<<k<<" lnL= "<<lnL<<endl);
	38	LOG(10,<<"logLofPosBase= "<< log(LofPosBase) << " logLofPosSSRV= " << log(LofPosSSRV) << " f= " << f <<endl);
	39	res += lnL * (weights?(*weights)[k]:1);
	40	}
	41	return res;
	42	}
	43
	44
	45
	46	MDOUBLE likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(const tree& et,
	47	const sequenceContainer& sc,
	48	const sequenceContainer& baseSc,
	49	const ussrvModel & model,
	50	const suffStatGlobalGam& cupBase,
	51	const suffStatGlobalHom& cupSSRV,
	52	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	53	const Vdouble * weights) {
	54	posLike.clear();
	55	MDOUBLE like = 0;
	56	MDOUBLE f = model.getF();
	57	//computing the likelihood from up:
	58	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	59	doubleRep tmp=0;
	60
	61	doubleRep tmp2 = 0; //like for the SSRV part
	62	// SSRV
	63	for (int let =0; let < model.getSSRVmodel().alphabetSize(); ++let) {
	64	tmp2+=cupSSRV.get(pos,et.getRoot()->id(),let) * model.getSSRVmodel().freq(let);
	65	}
	66	// Base model
	67	for (int categor = 0; categor < model.noOfCategor(); ++categor) {
	68	doubleRep veryTmp =0;
	69	for (int let =0; let < model.getBaseModel().alphabetSize(); ++let) {
	70	veryTmp+=cupBase.get(pos,categor,et.getRoot()->id(),let) * model.getBaseModel().freq(let);
	71	}
	72	tmp += veryTmp*model.getCategorProb(categor);
	73	}
	74
	75	if(tmp<0.0) errorMsg::reportError("like< 0 in likelihoodComputation2USSRV::getTreeLikelihoodFromUp2");
	76
	77	like += log((1-f)tmp+ftmp2) * (weights?(*weights)[pos]:1);
	78	posLike.push_back((1-f)tmp+ftmp2);
	79	}
	80	return like;
	81	}

+36

-0

libs/phylogeny/likelihoodComputation2USSRV.h less more

	0	// $Id: likelihoodComputation2USSRV.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___LIKELIHOOD_COMPUTATION_2_USSRV
	2	#define ___LIKELIHOOD_COMPUTATION_2_USSRV
	3
	4	#include "definitions.h"
	5	#include "computePijComponent.h"
	6	#include "sequenceContainer.h"
	7	#include "suffStatComponent.h"
	8	#include "ussrvModel.h"
	9	#include "tree.h"
	10	#include "computeUpAlg.h"
	11	#include "likelihoodComputation.h"
	12	#include <cmath>
	13	#include <cassert>
	14
	15
	16	namespace likelihoodComputation2USSRV {
	17
	18	MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
	19	const sequenceContainer& sc,const sequenceContainer& baseSc,
	20	const ussrvModel& model,const Vdouble * const weights=0);
	21
	22	MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
	23	const sequenceContainer& sc,
	24	const sequenceContainer& baseSc,
	25	const ussrvModel & model,
	26	const suffStatGlobalGam& cupBase,
	27	const suffStatGlobalHom& cupSSRV,
	28	VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
	29	const Vdouble * weights=0);
	30
	31	};
	32
	33
	34
	35	#endif // ___LIKELIHOOD_COMPUTATION_2_USSRV

+33

-0

libs/phylogeny/likelihoodComputationFactors.cpp less more

	0	// $Id: likelihoodComputationFactors.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "tree.h"
	4	#include "computeUpAlg.h"
	5	#include "likelihoodComputationFactors.h"
	6	#include <cmath>
	7	#include <cassert>
	8
	9	using namespace likelihoodComputation;
	10
	11	MDOUBLE likelihoodComputation::getLOG_LofPos(const int pos,
	12	const tree& et,
	13	const sequenceContainer& sc,
	14	const stochasticProcess& sp,
	15	const MDOUBLE gRate){ // when there is a global rate for this position
	16	// using the pij of stochastic process rather than pre computed pij's...
	17	vector<MDOUBLE> factors;
	18	computeUpAlg cup;
	19	suffStatGlobalHomPos ssc;
	20	cup.fillComputeUpSpecificGlobalRateFactors(et,sc,pos,sp,ssc,gRate,factors);
	21
	22	doubleRep tmp = 0.0;
	23	for (int let = 0; let < sp.alphabetSize(); ++let) {
	24	doubleRep tmpLcat=
	25	ssc.get(et.getRoot()->id(),let)*
	26	sp.freq(let);;
	27	assert(tmpLcat>=0);
	28	tmp+=tmpLcat;
	29	}
	30	return log(tmp)-factors[et.getRoot()->id()]*log(10.0);
	31	}
	32

+28

-0

libs/phylogeny/likelihoodComputationFactors.h less more

	0	// $Id: likelihoodComputationFactors.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___LIKELIHOOD_COMPUTATION_FACTORS
	3	#define ___LIKELIHOOD_COMPUTATION_FACTORS
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "computePijComponent.h"
	8	#include "sequenceContainer.h"
	9	#include "suffStatComponent.h"
	10
	11	namespace likelihoodComputation {
	12
	13	MDOUBLE getLOG_LofPos(const int pos, // with a site specific rate.
	14	const tree& et,
	15	const sequenceContainer& sc,
	16	const stochasticProcess& sp,
	17	const MDOUBLE gRate);
	18
	19	// add all the other functions to use factors...
	20
	21
	22	};
	23
	24
	25
	26	#endif
	27

+337

-0

libs/phylogeny/likelihoodComputationGL.cpp less more

	0	#include "likelihoodComputationGL.h"
	1
	2	#include "definitions.h"
	3	#include "tree.h"
	4	#include "likelihoodComputation.h"
	5	#include <cmath>
	6	#include <cassert>
	7
	8	using namespace likelihoodComputationGL;
	9
	10	// account for RateCat, GainCat,LossCat
	11	// - For each RateCat an "external" multiplication is conducted - copy_et.multipleAllBranchesByFactor
	12	// - the GainCat*LossCat SPs are covered by the "internal" mechanism of PijGam
	13
	14	/********************************************************************************************
	15	*********************************************************************************************/
	16	MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
	17	const sequenceContainer& sc,
	18	const vector<vector<stochasticProcess*> >& spVVec,
	19	const distribution * distGain, const distribution * distLoss,
	20	const Vdouble * const weights,
	21	unObservableData *unObservableData_p)
	22	{
	23	int numOfRateCategories = spVVec[0][0]->categories();
	24	vector<computePijGam> pi_vec(numOfRateCategories);
	25	vector<suffStatGlobalGam> ssc_vec(numOfRateCategories);
	26	vector<computeUpAlg> cup_vec(numOfRateCategories);
	27
	28	likelihoodComputationGL::fillPijAndUp(tr,sc,spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec);
	29	MDOUBLE logLforMissingData;
	30	MDOUBLE LforMissingData = 0;
	31	if(unObservableData_p){
	32	logLforMissingData = unObservableData_p->getlogLforMissingData();
	33	LforMissingData = exp(logLforMissingData);
	34	}
	35	MDOUBLE res = 0.0;
	36	for (int k=0; k < sc.seqLen(); ++k) {
	37	MDOUBLE lnL = 0;
	38	MDOUBLE resGivenRate = 0.0;
	39	for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
	40	lnL = likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(k,//pos,
	41	tr,//const tree&
	42	sc,// sequenceContainer& sc,
	43	spVVec, // only needed for sp.freq(let)
	44	ssc_vec[rateIndex][k],//const computePijGam& ,
	45	distGain, distLoss); // distributions ,
	46	resGivenRate += lnL * spVVec[0][0]->ratesProb(rateIndex);
	47	}
	48	if(unObservableData_p){ // conditioning on observability for all rateCat.
	49	resGivenRate = resGivenRate / (1- LforMissingData);
	50	}
	51
	52	LOG(20,<<"pos= "<<k+1<<" resGivenRate= "<<resGivenRate<<endl);
	53	//res += lnL;
	54	res += log(resGivenRate) * (weights?(*weights)[k]:1);
	55	}
	56	//if(unObservableData_p){
	57	// res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
	58	//}
	59	return res;
	60	}
	61	/********************************************************************************************
	62	*********************************************************************************************/
	63	void likelihoodComputationGL::fillPijAndUp(const tree& tr,
	64	const sequenceContainer& sc,
	65	const vector<vector<stochasticProcess*> >& spVVec,
	66	const distribution * distGain, const distribution * distLoss,
	67	vector<computePijGam>& pi_vec,
	68	vector<suffStatGlobalGam>& ssc_vec, // info filled into suffStat
	69	vector<computeUpAlg>& cup_vec)
	70	{
	71	int numOfSPs = distGain->categories()*distLoss->categories();
	72	int numOfRateCategories = spVVec[0][0]->categories();
	73	for (int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
	74	tree copy_et = tr;
	75	copy_et.multipleAllBranchesByFactor(spVVec[0][0]->rates(rateIndex));
	76	pi_vec[rateIndex]._V.resize(numOfSPs);
	77	//Pij
	78	for (int i=0; i < numOfSPs; ++i) {
	79	int gainIndex =fromIndex2gainIndex(i,distGain->categories(),distLoss->categories());
	80	int lossIndex =fromIndex2lossIndex(i,distGain->categories(),distLoss->categories());
	81	pi_vec[rateIndex]._V[i].fillPij(copy_et,*spVVec[gainIndex][lossIndex]);
	82	}
	83	//ComputeUp
	84	cup_vec[rateIndex].fillComputeUp(copy_et,sc,pi_vec[rateIndex],ssc_vec[rateIndex]);
	85	}
	86	}
	87
	88	/********************************************************************************************
	89	*********************************************************************************************/
	90	MDOUBLE likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& tr,
	91	const sequenceContainer& sc,
	92	const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
	93	const suffStatGlobalGamPos& cup,
	94	const distribution * distGain, const distribution * distLoss)
	95	{
	96
	97	doubleRep res =0;
	98	int numOfSPs = distGain->categories()*distLoss->categories();
	99	for (int categor = 0; categor < numOfSPs; ++categor) {
	100	doubleRep veryTmp =0.0;
	101	int gainCategor = fromIndex2gainIndex(categor,distGain->categories(),distLoss->categories());
	102	int lossCategor = fromIndex2lossIndex(categor,distGain->categories(),distLoss->categories());
	103	for (int let =0; let < sc.alphabetSize(); ++let) {
	104	veryTmp+=cup.get(categor,tr.getRoot()->id(),let) * spVVec[gainCategor][lossCategor]->freq(let); // Root character freq
	105	}
	106	res += veryTmp(distGain->ratesProb(gainCategor)distLoss->ratesProb(lossCategor));
	107	}
	108	if ((res<-EPSILON)){
	109	string err = "Error in likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam, non probability value (<0) Res=";
	110	err+=double2string(convert(res));
	111	errorMsg::reportError(err);
	112	};
	113	return convert(res);
	114	}
	115	/********************************************************************************************
	116	*********************************************************************************************/
	117	MDOUBLE likelihoodComputationGL::getTreeLikelihoodFromUp2(const tree& tr,
	118	const sequenceContainer& sc,
	119	const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
	120	const suffStatGlobalGam& cup, //computing the likelihood from up:
	121	const distribution * distGain, const distribution * distLoss,
	122	const Vdouble * weights,
	123	unObservableData *unObservableData_p,
	124	Vdouble* posLike)
	125	{
	126	if(posLike)
	127	posLike->clear();
	128	MDOUBLE like = 0;
	129
	130	int numOfSPs = distGain->categories()*distLoss->categories();
	131	for (int pos = 0; pos < sc.seqLen(); ++pos) {
	132	doubleRep tmp=0;
	133	for (int categor = 0; categor < numOfSPs; ++categor) {
	134	doubleRep veryTmp =0;
	135	int gainCategor = fromIndex2gainIndex(categor,distGain->categories(),distLoss->categories());
	136	int lossCategor = fromIndex2lossIndex(categor,distGain->categories(),distLoss->categories());
	137	for (int let =0; let < sc.alphabetSize(); ++let) {
	138	veryTmp+=cup.get(pos,categor,tr.getRoot()->id(),let) * spVVec[gainCategor][lossCategor]->freq(let);
	139	}
	140	tmp += veryTmp(distGain->ratesProb(gainCategor)distLoss->ratesProb(lossCategor));
	141	}
	142	if(unObservableData_p)
	143	tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData()));
	144	if(posLike)
	145	posLike->push_back(log(tmp));
	146	like += log(tmp) * (weights?(*weights)[pos]:1);
	147
	148	}
	149	return like;
	150	}
	151
	152	/********************************************************************************************
	153	*********************************************************************************************/
	154	MDOUBLE likelihoodComputationGL::getTreeLikelihoodFromUp2(const tree& tr,
	155	const sequenceContainer& sc,
	156	const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
	157	const vector<suffStatGlobalGam>& cup_vec, //computing the likelihood from up:
	158	const distribution * distGain, const distribution * distLoss,
	159	const Vdouble * weights,
	160	unObservableData *unObservableData_p,
	161	Vdouble* posLike )
	162	{
	163	if(posLike)
	164	posLike->resize(sc.seqLen());
	165	MDOUBLE like = 0;
	166	int numOfRateCategories = spVVec[0][0]->categories();
	167	for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
	168	Vdouble posLikePerCat;
	169	like += likelihoodComputationGL::getTreeLikelihoodFromUp2(tr,sc,spVVec,cup_vec[rateIndex], distGain,distLoss,weights,unObservableData_p,&posLikePerCat)
	170	* spVVec[0][0]->ratesProb(rateIndex);
	171	if(posLike){
	172	for (int k=0; k < sc.seqLen(); ++k) {
	173	(posLike)[k]+= (posLikePerCat[k] spVVec[0][0]->ratesProb(rateIndex));
	174	}
	175	}
	176	}
	177	return like;
	178	}
	179
	180	/********************************************************************************************
	181	*********************************************************************************************/
	182	//MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSameNoComputeUp(const tree& tr,
	183	// const sequenceContainer& sc,
	184	// const vector<vector<stochasticProcess*> >& spVVec,
	185	// const distribution * distGain, const distribution * distLoss,
	186	// unObservableData *unObservableData_p)
	187	//{
	188	// MDOUBLE res = 0.0;
	189	// int numOfSPs = distGain->categories()*distLoss->categories();
	190	// for (int i=0; i < numOfSPs; ++i) {
	191	// int gainIndex =fromIndex2gainIndex(i,distGain->categories(),distLoss->categories());
	192	// int lossIndex =fromIndex2lossIndex(i,distGain->categories(),distLoss->categories());
	193	// res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec[gainIndex][lossIndex]) distGain->ratesProb(gainIndex)*distLoss->ratesProb(lossIndex);
	194	// }
	195	// if(unObservableData_p){
	196	// res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
	197	// }
	198	// return res;
	199	//}
	200
	201
	202
	203
	204	/********************************************************************************************
	205	un-obervable data
	206	*********************************************************************************************/
	207
	208	/********************************************************************************************
	209	used to fill the likelihood for the unobervable for each category
	210	*********************************************************************************************/
	211	//doubleRep likelihoodComputationGL::getLofPos(const int pos,
	212	// const tree& tr,
	213	// const sequenceContainer& sc,
	214	// const computePijGam& pi,
	215	// const stochasticProcess& sp,
	216	// Vdouble& likePerCat) // all the likdelhoodsPerCat and rateProb are filled
	217	//{
	218	// // with the pi already computed.
	219	// int numOfCat = sp.categories();
	220	// doubleRep tmp=0;
	221	// for (int i=0; i < numOfCat;++i) {
	222	// likePerCat[i] = getLofPos(pos,tr,sc,pi[i],sp)*sp.ratesProb(i);
	223	// likePerCat[i+numOfCat] = sp.ratesProb(i);
	224	// tmp += likePerCat[i];
	225	// }
	226	// return tmp;
	227	//}
	228	///********************************************************************************************
	229	//likelihood computation - full data (1)
	230	//*********************************************************************************************/
	231	//MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
	232	// const sequenceContainer& sc,
	233	// const stochasticProcess& sp,
	234	// const Vdouble * const weights,
	235	// Vdouble *pLforMissingDataPerCat)
	236	//{
	237	// computePijGam pi;
	238	// pi.fillPij(tr,sp);
	239	// MDOUBLE res =0;
	240	// doubleRep LofPos;
	241	// int k;
	242	// for (k=0; k < sc.seqLen(); ++k) {
	243	// LofPos = likelihoodComputationGL::getLofPos(k,//pos,
	244	// tr,//const tree&
	245	// sc,// sequenceContainer& sc,
	246	// pi,//const computePijGam& ,
	247	// sp,
	248	// pLforMissingDataPerCat);
	249	// res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& );
	250	// }
	251	// return res;
	252	//}
	253	//
	254	///********************************************************************************************
	255	//likelihood computation - per pos (1.1)
	256	//*********************************************************************************************/
	257	//doubleRep likelihoodComputationGL::getLofPos(const int pos,
	258	// const tree& tr,
	259	// const sequenceContainer& sc,
	260	// const computePijGam& pi,
	261	// const stochasticProcess& sp,
	262	// Vdouble *pLforMissingDataPerCat)
	263	//{
	264	//// with the pi already computed.
	265	// doubleRep tmp=0;
	266	// int numOfCat = sp.categories();
	267	// Vdouble tmpPerCat;
	268	// tmpPerCat.resize(numOfCat);
	269	//
	270	// for (int i=0; i < sp.categories();++i) {
	271	// tmpPerCat[i] = getLofPos(pos,tr,sc,pi[i],sp);
	272	// if(pLforMissingDataPerCat){
	273	// LOG(11,<<"res before MissingData correction= "<<tmpPerCat[i]);
	274	// tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]);
	275	// LOG(11,<<" after= "<<tmpPerCat[i]<<endl);
	276	// }
	277	// tmp += tmpPerCat[i]*sp.ratesProb(i);
	278	// }
	279	// return tmp;
	280	//}
	281	//
	282	///********************************************************************************************
	283	//likelihood computation - per pos, per cat (1.1.1)
	284	//*********************************************************************************************/
	285	//doubleRep likelihoodComputationGL::getLofPos(const int pos,
	286	// const tree& tr,
	287	// const sequenceContainer& sc,
	288	// const computePijHom& pi,
	289	// const stochasticProcess& sp)
	290	//{
	291	// computeUpAlg cup;
	292	// suffStatGlobalHomPos ssc;
	293	// cup.fillComputeUp(tr,sc,pos,pi,ssc);
	294	//
	295	// doubleRep tmp = 0.0;
	296	// for (int let = 0; let < sp.alphabetSize(); ++let) {
	297	// doubleRep tmpLcat=
	298	// ssc.get(tr.getRoot()->id(),let)*
	299	// sp.freq(let);
	300	// if (!DBIG_EQUAL(convert(tmpLcat), 0.0))
	301	// {
	302	// cerr<<"tmpLcat = "<<tmpLcat<<endl;
	303	// errorMsg::reportError("error in likelihoodComputation::getLofPos. likelihood is smaller than zero");
	304	// }
	305	//
	306	// //assert(tmpLcat>=0.0);
	307	// tmp+=tmpLcat;
	308	// }
	309	//// cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP
	310	// if (!(tmp>0.0)){
	311	// LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<<endl;);
	312	// LOG(5,<<"pos = "<< pos <<endl;);
	313	// tmp = EPSILON;
	314	// //errorMsg::reportError("likelihoodComputation::getLofPos: likelihood of pos was zero!",1);
	315	//
	316	// }
	317	// return tmp;
	318	//}
	319	//
	320	//Vdouble likelihoodComputationGL::getLofPosPerCat(const int pos,
	321	// const tree& tr,
	322	// const sequenceContainer& sc,
	323	// const computePijGam& pi,
	324	// const stochasticProcess& sp)
	325	//{
	326	//// with the pi already computed.
	327	// int numOfCat = sp.categories();
	328	// Vdouble tmp;
	329	// tmp.resize(numOfCat*2);
	330	// for (int i=0; i < numOfCat;++i) {
	331	// tmp[i] = getLofPos(pos,tr,sc,pi[i],sp)*sp.ratesProb(i);
	332	// tmp[i+numOfCat] = sp.ratesProb(i);
	333	// }
	334	// return tmp;
	335	//}
	336

+100

-0

libs/phylogeny/likelihoodComputationGL.h less more

	0	#ifndef ___LIKELIHOOD_COMPUTATION_GL
	1	#define ___LIKELIHOOD_COMPUTATION_GL
	2
	3	#include "definitions.h"
	4	#include "computePijComponent.h"
	5	#include "sequenceContainer.h"
	6	#include "suffStatComponent.h"
	7	#include "unObservableData.h"
	8	#include "computeUpAlg.h"
	9
	10
	11	namespace likelihoodComputationGL {
	12
	13
	14	MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
	15	const sequenceContainer& sc,
	16	const vector<vector<stochasticProcess*> >& spVVec,
	17	const distribution * distGain, const distribution * distLoss,
	18	const Vdouble * const weights,
	19	unObservableData *unObservableData_p =NULL);
	20	void fillPijAndUp(const tree& tr,
	21	const sequenceContainer& sc,
	22	const vector<vector<stochasticProcess*> >& spVVec,
	23	const distribution * distGain, const distribution * distLoss,
	24	vector<computePijGam>& pi_vec,
	25	vector<suffStatGlobalGam>& ssc_vec,
	26	vector<computeUpAlg>& cup_vec);
	27	MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& tr,
	28	const sequenceContainer& sc,
	29	const vector<vector<stochasticProcess*> >& spVVec, // only needed for sp.freq(let)
	30	const suffStatGlobalGamPos& cup,
	31	const distribution * distGain, const distribution * distLoss);
	32
	33
	34	MDOUBLE getTreeLikelihoodFromUp2(const tree& tr,
	35	const sequenceContainer& sc,
	36	const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
	37	const suffStatGlobalGam& cup,
	38	const distribution * distGain, const distribution * distLoss,
	39	const Vdouble * weights,
	40	unObservableData *unObservableData_p,
	41	Vdouble* posLike =NULL);
	42	MDOUBLE getTreeLikelihoodFromUp2(const tree& tr,
	43	const sequenceContainer& sc,
	44	const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
	45	const vector<suffStatGlobalGam>& cup_vec,
	46	const distribution * distGain, const distribution * distLoss,
	47	const Vdouble * weights,
	48	unObservableData *unObservableData_p,
	49	Vdouble* posLike =NULL);
	50
	51	// Error
	52	//MDOUBLE getTreeLikelihoodAllPosAlphTheSameNoComputeUp(const tree& tr,
	53	// const sequenceContainer& sc,
	54	// const vector<vector<stochasticProcess*> >& spVVec,
	55	// const distribution * distGain, const distribution * distLoss,
	56	// unObservableData *unObservableData_p);
	57
	58
	59	///********************************************************************************************
	60	//un-obervable data
	61	//*********************************************************************************************/
	62	//// used to fill the likelihood for the unobservable for each category
	63	// doubleRep getLofPos(const int pos,
	64	// const tree& tr,
	65	// const sequenceContainer& sc,
	66	// const computePijGam& pi,
	67	// const stochasticProcess& sp,
	68	// Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled
	69	//// likelihood computation - full data (1)
	70	// MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
	71	// const sequenceContainer& sc,
	72	// const stochasticProcess& sp,
	73	// const Vdouble * const weights,
	74	// Vdouble *pLforMissingDataPerCat=NULL);
	75	//// likelihood computation - per pos (1.1)
	76	// doubleRep getLofPos(const int pos, // this function is used
	77	// const tree& tr, // when gamma, and the br-len
	78	// const sequenceContainer& sc, // are the same for all pos.
	79	// const computePijGam& pi,
	80	// const stochasticProcess& sp,
	81	// Vdouble *pLforMissingDataPerCat=NULL);
	82	//// likelihood computation - per pos, per cat (1.1.1)
	83	// doubleRep getLofPos(const int pos, // this function is used
	84	// const tree& tr, // when the br-len
	85	// const sequenceContainer& sc, // are the same for all
	86	// const computePijHom& pi, // positions.
	87	// const stochasticProcess& sp);
	88	//
	89	// Vdouble getLofPosPerCat(const int pos, // used when the likelihood given each category is needed, not only the sum
	90	// const tree& tr,
	91	// const sequenceContainer& sc,
	92	// const computePijGam& pi,
	93	// const stochasticProcess& sp);
	94
	95
	96
	97	};
	98
	99	#endif

+48

-0

libs/phylogeny/logFile.cpp less more

	0	// $Id: logFile.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "logFile.h"
	3	#include "errorMsg.h"
	4
	5	int myLog::_loglvl = 3;
	6	ostream *myLog::_out= NULL;
	7	bool myLog::_firstTime = true;
	8
	9	void myLog::setLog(const string logfilename, const int loglvl) {
	10	if (_out != NULL) myLog::endLog();
	11	if ((logfilename == "-")\|\| (logfilename == "")) {
	12	myLog::setLogOstream(&cout);
	13	} else {
	14	ofstream* outLF = new ofstream;
	15	if (_firstTime) {
	16	outLF->open(logfilename.c_str());
	17	_firstTime = false;
	18	}
	19	else
	20	outLF->open(logfilename.c_str(), ofstream::out \| ofstream::app); // append
	21	if (!outLF->is_open()) {
	22	errorMsg::reportError(string("Can't open for writing the log file ")+logfilename);
	23	}
	24	myLog::setLogOstream(outLF);
	25	}
	26	myLog::setLogLvl(loglvl);
	27	LOG(3,<<"START OF LOG FILE"<<endl);
	28	}
	29
	30	void myLog::endLog(void){
	31	LOG(3,<<"END OF LOG FILE"<<endl);
	32	if (_out!=&cout && _out != NULL) {
	33	((ofstream*)_out)->close();
	34	delete _out;
	35	_out = NULL;
	36	_firstTime=false;
	37	}
	38	}
	39
	40	void myLog::printArgv(int loglvl, int argc, char *argv[]) {
	41	LOG(loglvl,<<"argv =");
	42
	43	for (int i=0;i<argc;++i)
	44	LOG(loglvl,<<" \""<<argv[i]<<"\"");
	45	LOG(loglvl,<<endl);
	46
	47	}

+50

-0

libs/phylogeny/logFile.h less more

	0	// $Id: logFile.h 6067 2009-04-14 19:12:28Z itaymay $
	1
	2	#ifndef ___LOG
	3	#define ___LOG
	4
	5
	6	#include <string>
	7	#include <iostream>
	8	#include <fstream>
	9
	10	using namespace std;
	11
	12	class myLog {
	13	public:
	14	static int LogLevel() { return _loglvl;}
	15	static ostream& LogFile(void) {
	16	if (_out == NULL) return cerr;
	17	return *_out;
	18	}
	19
	20	static void setLogLvl(const int newLogLvl) {_loglvl = newLogLvl;}
	21	static void setLogOstream(ostream* out) {_out = out;}
	22
	23	// this function is problematic, because it issue a call to NEW
	24	// which because the function is static - cannot be deleted.
	25	// but, this will not effect the program, because there is only
	26	// 1 instance of _out and it will be released anyway in the end of the program.
	27	static void setLog(const string logfilename, const int loglvl);
	28	static void endLog(void);
	29	static void printArgv(int loglvl, int argc, char *argv[]) ;
	30	private:
	31	static ostream* _out;
	32	static int _loglvl;
	33	static bool _firstTime;
	34	};
	35
	36	#ifdef LOG
	37	#undef LOG
	38	#endif
	39
	40
	41	#define LOG(Lev, ex) { if( Lev <= myLog::LogLevel() ) myLog::LogFile() ex; }
	42	#define LOGnOUT(Lev, ex) { if( Lev <= myLog::LogLevel() ) {myLog::LogFile() ex; cerr ex; }}
	43	#define LOGDO(Lev, ex) { if( Lev <= myLog::LogLevel() ) ex; }
	44
	45
	46	#endif
	47
	48
	49

+30

-0

libs/phylogeny/logRep.cpp less more

	0	#ifdef LOGREP
	1	#include "logRep.h"
	2	#include <cmath>
	3
	4	//logRep::logRep()
	5	//{
	6	// _log = VERYSMALL2;
	7	//}
	8
	9	//logRep::logRep(MDOUBLE a){
	10	// _log = ((a==0.0) ? VERYSMALL2 : log(a));
	11	//}
	12
	13
	14	//logRep::logRep(const logRep& other): _log(other._log) {}
	15
	16
	17
	18	MDOUBLE convert(const logRep& a){
	19	return exp(a.getLog());
	20	}
	21
	22
	23
	24
	25	ostream& operator<<(ostream &out, const logRep& a){
	26	a.output(out);
	27	return out;
	28	}
	29	#endif

+173

-0

libs/phylogeny/logRep.h less more

	0	#ifndef __LOG_REP_H
	1	#define __LOG_REP_H
	2
	3	#ifdef LOGREP
	4
	5	#include "definitions.h"
	6	#include "AddLog.h"
	7
	8
	9
	10	#include <iostream>
	11	#include <cmath>
	12	using namespace std;
	13
	14	/* logRep: enables working with much larger or smaller numbers than normally possible
	15	by the regular double representation
	16	* Representation of a number x by the log of x
	17	Note: Base is 2!!
	18	WARNING: Note that logRep can only be used for positive values
	19	(such as probablities) - you can't have the log of a negative!
	20	For a general real number use class doubleRep.
	21	*/
	22
	23	class logRep{
	24	public:
	25
	26	logRep() : _log(VERYSMALL){}
	27	logRep(MDOUBLE a) {_log = ((a==0.0) ? VERYSMALL : log(a));}
	28	logRep(const logRep& other) : _log(other._log) {}
	29	logRep* clone() {return new logRep(*this);}
	30
	31	void output(ostream &out) const{ out<<exp(_log);}
	32
	33	friend MDOUBLE convert(const logRep& a);
	34	//inline MDOUBLE convert();
	35	inline logRep& operator=(const logRep& a);
	36	inline logRep& operator+=(logRep a);
	37	friend inline logRep operator+(const logRep& a, const logRep& b);
	38	inline logRep& operator-=(const logRep& a);
	39	friend inline logRep operator-(const logRep& a, const logRep& b);
	40	inline logRep& operator*=(const logRep& a);
	41	friend inline logRep operator*(const logRep& a, const logRep& b);
	42	inline logRep& operator/=(const logRep& a);
	43	friend inline logRep operator/(const logRep& a, const logRep& b);
	44
	45	friend inline bool operator==(const logRep& a, const logRep& b);
	46	friend inline bool operator!=(const logRep& a, const logRep& b);
	47	friend inline bool operator<(const logRep& a, const logRep& b);
	48	friend inline bool operator<=(const logRep& a, const logRep& b);
	49	friend inline bool operator>(const logRep& a, const logRep& b);
	50	friend inline bool operator>=(const logRep& a, const logRep& b);
	51	friend inline MDOUBLE log(const logRep& d);
	52	friend inline logRep exp(const logRep& d);
	53
	54	private:
	55	const MDOUBLE getLog() const {return _log;}
	56
	57	private:
	58	MDOUBLE _log;
	59	//static tAddLog_Precompute _add;
	60
	61	};
	62
	63	MDOUBLE convert(const logRep& a); //declaration of this function to be implemented cpp
	64
	65	inline logRep& logRep::operator=(const logRep& a){
	66	_log=a.getLog();
	67	return *this;
	68	}
	69
	70	//inline MDOUBLE convert(){
	71	// return exp(_log);
	72	//}
	73
	74	// Original version by Adi Stern
	75	inline logRep& logRep::operator+=(logRep a){
	76	if (_log == VERYSMALL)
	77	_log = a._log;
	78	else if (a._log == VERYSMALL ) return *this;
	79	else _log = AddLog(_log, a._log);
	80	return *this;
	81	}
	82
	83	inline logRep operator+(const logRep& a, const logRep& b){
	84	logRep temp(a);
	85	temp+=b;
	86	return temp;
	87	}
	88
	89	inline logRep& logRep::operator*=(const logRep& a){
	90	if ((_log == VERYSMALL) \|\| (a._log== VERYSMALL )){
	91	_log = VERYSMALL;
	92	return *this;
	93	}
	94	_log+=a._log;
	95	return *this;
	96	}
	97
	98	inline logRep operator*(const logRep& a, const logRep& b){
	99	logRep temp(a);
	100	temp*=b;
	101	return temp;
	102	}
	103
	104	inline logRep& logRep::operator/=(const logRep& a){
	105	_log-=a._log;
	106	return *this;
	107	}
	108
	109	inline logRep operator/(const logRep& a, const logRep& b){
	110	logRep temp(a);
	111	temp/=b;
	112	return temp;
	113	}
	114
	115	/************************
	116	* Comparison operators *
	117	************************/
	118	inline bool operator==(const logRep& a, const logRep& b){
	119	return (a.getLog()==b.getLog());
	120	}
	121	inline bool operator!=(const logRep& a, const logRep& b){
	122	return !(a==b);
	123	}
	124
	125	inline bool operator<(const logRep& a, const logRep& b){
	126	if (a.getLog()<b.getLog()) {return true;}
	127	else {return false;}
	128
	129	}
	130
	131	inline bool operator>(const logRep& a, const logRep& b){
	132
	133	if (a.getLog()>b.getLog()) {return true;}
	134	else {return false;}
	135
	136	}
	137
	138	inline bool operator<=(const logRep& a, const logRep& b){
	139	return !(a>b);
	140	}
	141
	142	inline bool operator>=(const logRep& a, const logRep& b){
	143	return !(a<b);
	144	}
	145
	146	ostream& operator<<(ostream &out, const logRep& a);
	147
	148	inline MDOUBLE log(const logRep& d) {return d.getLog();}
	149
	150	inline ostream &operator<<(ostream &out, const VlogRep &v){
	151	for (int j=0;j<v.size();++j)
	152	out<< v[j]<<" ";
	153	out <<endl;
	154	return(out);
	155	}
	156
	157	inline ostream &operator<<(ostream &out, const VVlogRep &m){
	158	for (int i=0;i<m.size();++i)
	159	out<<m[i];
	160	out <<endl;
	161	return(out);
	162	}
	163
	164
	165	inline logRep exp(const logRep& d) {
	166	logRep res;
	167	res._log = d.getLog();
	168	return res;
	169	}
	170
	171	#endif
	172	#endif

+86

-0

libs/phylogeny/maseFormat.cpp less more

	0	// $Id: maseFormat.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "maseFormat.h"
	3	#include "someUtil.h"
	4	#include "errorMsg.h"
	5
	6	sequenceContainer maseFormat::read(istream &infile, const alphabet* alph) {
	7	sequenceContainer mySeqData = readUnAligned(infile, alph);
	8	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	9	return mySeqData;
	10	}
	11
	12	sequenceContainer maseFormat::readUnAligned(istream &infile, const alphabet* alph) {
	13	if (!infile) {
	14	errorMsg::reportError("unable to read mase format, could not open file");
	15	}
	16	sequenceContainer mySeqData;;
	17
	18	vector<string> seqFileData;
	19	putFileIntoVectorStringArray(infile,seqFileData);
	20
	21	vector<string>::const_iterator it1;
	22	for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ++it1) {
	23	if (it1->empty()) continue; // empty line continue
	24	if (it1->size()>1) {
	25	if ( ((it1)[0] == ';') && ((it1)[1] == ';')) {// general file remarks
	26	mySeqData.addGeneralRemark(*it1);
	27	}
	28	}
	29	}
	30	int localid=0;
	31	for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
	32	if (it1->empty()) {++it1;continue; }// empty line continue
	33	if (it1->size()>1) {
	34	if ( ((it1)[0] == ';') && ((it1)[1] == ';')) {// general file remarks
	35	++it1;continue;
	36	}
	37	}
	38
	39	string remark;
	40	string name;
	41	string seqStr;
	42	if ((*it1)[0] != ';') {
	43	LOG(5,<<"problem in line: "<<*it1<<endl);
	44	errorMsg::reportError("Error reading mase file, error finding sequence remark",1);
	45	}
	46	if ((it1)[0] == ';') {remark += it1;++it1;}
	47	while ((*it1)[0] == ';') {
	48	remark += "\n";
	49	remark += *it1;
	50	++it1;
	51	}
	52	while (it1->empty()) it1++; // empty line continue
	53	name = *it1;
	54	++it1;
	55
	56	while (it1!= seqFileData.end()) {
	57	if ((*it1)[0] == ';') break;
	58	// the following lines are taking care of a format which is like "10 aact"
	59	// in mase format
	60	string withoutNumberAndSpaces =
	61	takeCharOutOfString("0123456789 ",*it1);
	62	seqStr+=withoutNumberAndSpaces;
	63	++it1;
	64	}
	65	mySeqData.add(sequence(seqStr,name,remark,localid,alph));
	66	localid++;
	67	}
	68
	69	return mySeqData;
	70	}
	71
	72	void maseFormat::write(ostream &out, const sequenceContainer& sd) {
	73	vector<string> gfr = sd.getGeneralRemarks();
	74
	75	if (gfr.empty()) out<<";;\n;;\n";
	76	for (vector<string>::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k )
	77	out<<(*k)<<endl;
	78	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	79	if ((it5).remark().size() > 0) out<<";"<<(it5).remark()<<endl;
	80	else out<<";\n";
	81	out<<it5->name()<<endl;
	82	out<<it5->toString()<<endl;
	83	}
	84	}
	85

+42

-0

libs/phylogeny/maseFormat.h less more

	0	// $Id: maseFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___MASE_FORMAT
	3	#define ___MASE_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class maseFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	static void write(ostream &out, const sequenceContainer& sd);
	11	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	12	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	13	};
	14
	15	#endif
	16
	17	/* EXAMPLE OF THE FORMAT:
	18
	19	;;this is the place for general remarks.
	20	;here we put sequence specific remark.
	21	Langur
	22	KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
	23	;
	24	Baboon
	25	KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
	26	;
	27	Human
	28	KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
	29	;
	30	Rat
	31	KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
	32	;
	33	Cow
	34	KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
	35	;
	36	Horse
	37	KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
	38
	39	*/
	40
	41

+374

-0

libs/phylogeny/matrixUtils.cpp less more

	0	#include "matrixUtils.h"
	1	#include "errorMsg.h"
	2	#include <cmath>
	3	#include <string>
	4	#include <ctype.h>
	5	#include <cctype>
	6	#include <cstdlib>
	7
	8
	9	Vdouble getDiagonalFromMatrix(VVdouble &mat){
	10	Vdouble diagonal;
	11	for (int i=0; i<mat.size(); i++)
	12	diagonal.push_back(mat[i][i]);
	13	return diagonal;
	14	}
	15
	16	Vdouble getSubDiagonalFromMatrix(VVdouble &mat){
	17	Vdouble diagonal;
	18	for (int i=0; i<mat.size()-1; i++)
	19	diagonal.push_back(mat[i+1][i]);
	20	return diagonal;
	21	}
	22
	23
	24
	25
	26
	27
	28	void readMatrixFromFile(VVdouble &mat,string fileName){
	29	ifstream in(fileName.c_str());
	30	if (!in){
	31	string err="in function readMatrixFromFile, empty file or non-existant:";
	32	err+=fileName;
	33	errorMsg::reportError(err);
	34	}
	35	int i=0;
	36	mat.resize(1);
	37	while (!in.eof()) {
	38	string row;
	39	int k=0;
	40	getline(in,row,'\n');
	41	while (k<row.size()){
	42	string value;
	43	while (row[k]!=' ' && k<row.size()){
	44	value+=row[k];
	45	k++;
	46	}
	47	k++;
	48	mat[i].push_back(atof(value.c_str()));
	49	//j++;
	50	//mat.resize(j);
	51	}
	52	if (!in.eof())
	53	mat.resize(++i+1);
	54	}
	55	in.close();
	56	}
	57
	58
	59	void printMatrix(const VVdouble &mat, ostream &out) {
	60	int num=mat.size();
	61	for (int row=0; row<num; row++) {
	62	for (int position=0; position<mat[row].size(); position++) {
	63	out << mat[row][position] << '\t';
	64	}
	65	out << endl ;
	66	}
	67	out << endl ;
	68
	69	}
	70
	71	void printMatrix(const VVint &mat, ostream &out) {
	72	int num=mat.size();
	73	for (int row=0; row<num; row++) {
	74	for (int position=0; position<mat[row].size(); position++) {
	75	out << mat[row][position] << '\t';
	76	}
	77	out << endl ;
	78	}
	79	out << endl ;
	80	out<<"---------------------------------------------"<<endl;
	81
	82	}
	83
	84
	85
	86	VVdouble transpose(const VVdouble &mat){
	87	VVdouble matT;
	88	int n=mat.size();
	89	resizeMatrix(matT,n,n);
	90	for (int i=0; i<n;i++){
	91	for (int j=0; j<n;j++) {
	92	matT[i][j]=mat[j][i];
	93	}
	94	}
	95	return matT;
	96	}
	97
	98
	99
	100
	101	VVdouble subtract(const VVdouble &mat1,const VVdouble &mat2){
	102	VVdouble newMat=add(mat1,reverseSign(mat2));
	103	return newMat;
	104	}
	105
	106	VVdouble reverseSign(const VVdouble &mat1){
	107	VVdouble newMat(mat1.size());
	108	for (int i=0;i<mat1.size();i++){
	109	newMat[i].resize(mat1[i].size());
	110	for (int j=0;j<mat1.size();j++){
	111	newMat[i][j]=-mat1[i][j];
	112	}
	113	}
	114	return newMat;
	115
	116	}
	117
	118
	119	void findMaxInVector(const Vdouble &vec, MDOUBLE &maxValue, int &argmax){
	120	MDOUBLE tempMax=VERYSMALL;
	121	int tempArgMax=0;
	122	for (int i=0; i<vec.size(); i++){
	123	if (vec[i]>tempMax){ // TEST DEBUG!!! if '>' is used, the first Max is chosen, if '>' the last max
	124	tempMax=vec[i];
	125	tempArgMax=i;
	126	}
	127	}
	128	maxValue=tempMax;
	129	argmax=tempArgMax;
	130	}
	131
	132	void findMinInVector(const Vdouble &vec, MDOUBLE &minValue, int &argmin) {
	133	Vdouble minusCopy(vec.size());
	134	for (int i=0; i<vec.size(); i++){
	135	minusCopy[i] = -vec[i];
	136	}
	137	findMaxInVector(minusCopy, minValue, argmin);
	138	minValue = -minValue;
	139	}
	140
	141	bool isMinEQMaxInVector(const Vdouble &vec){
	142	bool isMinEQMaxInVector = false;
	143	MDOUBLE maxValue, minValue;
	144	int argmax, argmin;
	145	findMaxInVector(vec,maxValue,argmax);
	146	findMinInVector(vec,minValue,argmin);
	147	if(maxValue == minValue)
	148	isMinEQMaxInVector = true;
	149	return isMinEQMaxInVector;
	150	}
	151
	152	MDOUBLE averageElementInVector(const Vdouble &vec) {
	153	MDOUBLE sum=0.0;
	154	for (int i=0; i<vec.size(); i++){
	155	sum+=vec[i];
	156	}
	157	return sum/vec.size();
	158	}
	159
	160	void appendBinaryVectors(Vint &vec1, const Vint &vec2){
	161	for (int i=0; i < vec2.size(); i++)
	162	if (vec2[i]==1)
	163	vec1[i]=1;
	164	}
	165
	166	void appendVectors(Vint &vec1, const Vint &vec2) {
	167	for (int i=0; i<vec2.size();i++)
	168	vec1.push_back(vec2[i]);
	169	}
	170
	171	void appendVectors(VVdouble &vec1, const VVdouble &vec2) {
	172	for (int i=0; i<vec2.size();i++)
	173	for (int j=0; j<vec2[i].size();j++)
	174	vec1[i].push_back(vec2[i][j]);
	175	}
	176
	177	Vint complementBinaryVec(Vint&bufferVec) {
	178	for (int i=0; i<bufferVec.size(); i++)
	179	bufferVec[i]=abs(bufferVec[i]-1);
	180	return bufferVec;
	181	}
	182
	183
	184	//reads a vertical vector of float numbers(separated by \n)
	185	void readDoubleVecFromFile(Vdouble &vec,string fileName){
	186	ifstream in(fileName.c_str());
	187	if (!in){
	188	string err="in function readDoubleVecFromFile, empty file or non-existant:";
	189	err+=fileName;
	190	errorMsg::reportError(err);
	191	}
	192	string row;
	193	while (!in.eof()){
	194	getline(in,row,'\n');
	195	//if (isalnum(*(row.c_str())) \|\| (row[0]=="."))
	196	if (isspace(*(row.c_str())) \|\| row=="") continue;
	197	vec.push_back(atof(row.c_str()));
	198	}
	199
	200	in.close();
	201	}
	202
	203	void normalize(Vdouble &vec){
	204	MDOUBLE sum=0.0;
	205	MDOUBLE squareSum=0.0;
	206	int N=vec.size();
	207	int i=0;
	208	for (i=0;i<N;i++) sum+=vec[i];
	209	for (i=0;i<N;i++) squareSum+=(vec[i]*vec[i]);
	210	MDOUBLE avg=sum/N;
	211	MDOUBLE sqrAvg=squareSum/N;
	212	MDOUBLE stdDev=sqrt(sqrAvg-avg*avg);
	213	for (i=0;i<N;i++) vec[i]=(vec[i]-avg)/stdDev;
	214
	215	}
	216
	217	void scaleByAverage(Vdouble &vec){
	218	MDOUBLE sum=0.0;
	219	MDOUBLE squareSum=0.0;
	220	int N=vec.size();
	221	int i=0;
	222	for (i=0;i<N;i++) sum+=vec[i];
	223	for (i=0;i<N;i++) squareSum+=(vec[i]*vec[i]);
	224	MDOUBLE avg=sum/N;
	225	for (i=0;i<N;i++) vec[i]=(vec[i])/avg;
	226	}
	227
	228	Vdouble solveLinearEquations(VVdouble A,Vdouble b){
	229	// VVdouble Acopy=A; //creating a copy, since ludcmp&lubksb destroy the input
	230	// Vdouble bcopy=b;
	231	MDOUBLE d; //required for ludcmp; irrelevant for us.
	232	Vdouble indx; //required for ludcmp; irrelevant for us.
	233	ludcmp(A,indx,d); //decomposes A into product of diagonal matrices
	234	lubksb(A,indx,b); //solves
	235	return b;
	236	}
	237
	238
	239	void ludcmp(VVdouble &a, Vdouble &indx, MDOUBLE &d)
	240	{
	241	const MDOUBLE TINY=1.0e-20;
	242	int i,imax=0,j,k;
	243	MDOUBLE big,dum,sum,temp;
	244
	245	int n=a.size();
	246	Vdouble vv(n);
	247	indx.resize(n);//my addition
	248	d=1.0;
	249	for (i=0;i<n;i++) {
	250	big=0.0;
	251	for (j=0;j<n;j++)
	252	if ((temp=fabs(a[i][j])) > big) big=temp;
	253	if (big == 0.0) errorMsg::reportError("Singular matrix in routine ludcmp");
	254	vv[i]=1.0/big;
	255	}
	256	for (j=0;j<n;j++) {
	257	for (i=0;i<j;i++) {
	258	sum=a[i][j];
	259	for (k=0;k<i;k++) sum -= a[i][k]*a[k][j];
	260	a[i][j]=sum;
	261	}
	262	big=0.0;
	263	for (i=j;i<n;i++) {
	264	sum=a[i][j];
	265	for (k=0;k<j;k++) sum -= a[i][k]*a[k][j];
	266	a[i][j]=sum;
	267	if ((dum=vv[i]*fabs(sum)) >= big) {
	268	big=dum;
	269	imax=i;
	270	}
	271	}
	272	if (j != imax) {
	273	for (k=0;k<n;k++) {
	274	dum=a[imax][k];
	275	a[imax][k]=a[j][k];
	276	a[j][k]=dum;
	277	}
	278	d = -d;
	279	vv[imax]=vv[j];
	280	}
	281	indx[j]=imax;
	282	if (a[j][j] == 0.0) a[j][j]=TINY;
	283	if (j != n-1) {
	284	dum=1.0/(a[j][j]);
	285	for (i=j+1;i<n;i++) a[i][j] *= dum;
	286	}
	287	}
	288	}
	289
	290
	291
	292	void lubksb(VVdouble &a, Vdouble &indx, Vdouble &b)
	293	{
	294	int i,ii=0,ip,j;
	295	MDOUBLE sum;
	296
	297	int n=a.size();
	298	for (i=0;i<n;i++) {
	299	ip=(int)(indx[i]);
	300	sum=b[ip];
	301	b[ip]=b[i];
	302	if (ii != 0)
	303	for (j=ii-1;j<i;j++) sum -= a[i][j]*b[j];
	304	else if (sum != 0.0)
	305	ii=i+1;
	306	b[i]=sum;
	307	}
	308	for (i=n-1;i>=0;i--) {
	309	sum=b[i];
	310	for (j=i+1;j<n;j++) sum -= a[i][j]*b[j];
	311	b[i]=sum/a[i][i];
	312	}
	313	}
	314
	315	//get the first norm sum{abs(Mij)}
	316	MDOUBLE getMatrixNorm(const VVdouble &mat) {
	317	MDOUBLE res(0.0);
	318	for (int i=0; i<mat.size(); i++){
	319	for (int j=0; j<mat[i].size();j++){
	320	res += fabs(mat[i][j]);
	321	}
	322	}
	323	return res;
	324	}
	325
	326	//get the first norm sum{abs(Mij)} from vector of Matrices
	327	MDOUBLE getVMatrixNorm(const VVVdouble &mat) {
	328	MDOUBLE res(0.0);
	329	for (int i=0; i<mat.size(); i++){
	330	for (int j=0; j<mat[i].size();j++){
	331	for (int k=0; k<mat[i][j].size();k++){
	332	res += fabs(mat[i][j][k]);
	333	}
	334	}
	335	}
	336	return res;
	337	}
	338
	339	//get the specific coordinates sum from vector of Matrices
	340	MDOUBLE getVMatrixJK(const VVVdouble &mat, const int j, const int k) {
	341	MDOUBLE res(0.0);
	342	for (int i=0; i<mat.size(); i++){
	343	res += fabs(mat[i][j][k]);
	344	}
	345	return res;
	346	}
	347
	348
	349
	350
	351	/********************************************************************************************
	352	*********************************************************************************************/
	353	void resize_VVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor){
	354
	355	vetor.resize(dim1);
	356	for (int posNum=0;posNum<vetor.size();++posNum){
	357	vetor[posNum].resize(dim2);
	358	for (int n=0;n<vetor[posNum].size();++n){
	359	resizeMatrix(vetor[posNum][n],dim3,dim4);
	360	}
	361	}
	362	}
	363	/********************************************************************************************
	364	*********************************************************************************************/
	365	void resize_VVV(int dim1, int dim2, int dim3, VVVdouble& vetor){
	366	vetor.resize(dim1);
	367	for (int n=0;n<vetor.size();++n){
	368	resizeMatrix(vetor[n],dim2,dim3);
	369	}
	370	}
	371
	372
	373

+157

-0

libs/phylogeny/matrixUtils.h less more

	0	#ifndef ___MATRIX_UTIL_H
	1	#define ___MATRIX_UTIL_H
	2
	3	#include "definitions.h"
	4	#include "logFile.h"
	5	#include "errorMsg.h"
	6	#include <string>
	7	#include <vector>
	8	#include <fstream>
	9	#include <iostream>
	10
	11	class sequenceContainer;
	12	using namespace std;
	13
	14
	15
	16	void printMatrix(const VVdouble &mat, ostream &out);
	17	void printMatrix(const VVint &mat, ostream &out) ;
	18
	19	void readMatrixFromFile(VVdouble &mat,string fileName);
	20
	21	Vdouble getDiagonalFromMatrix(VVdouble &mat);
	22	Vdouble getSubDiagonalFromMatrix(VVdouble &mat);
	23
	24	//get the first norm sum{abs(Mij)}
	25	MDOUBLE getMatrixNorm(const VVdouble &mat);
	26	// Same for vector of Matrices
	27	MDOUBLE getVMatrixNorm(const VVVdouble &mat);
	28	//get the specific coordinates sum from vector of Matrices
	29	MDOUBLE getVMatrixJK(const VVVdouble &mat, const int j, const int k);
	30
	31
	32
	33	template<typename _T>
	34	void resizeMatrix(vector<vector< _T> > &mat, int rows, int columns){
	35	mat.resize(rows);
	36	for (int i=0; i<rows;i++){
	37	mat[i].resize(columns);
	38	for (int j=0;j<columns;j++){ // initializing all values as zero
	39	mat[i][j] = 0;
	40	}
	41	}
	42	}
	43
	44	template<typename _T>
	45	void unitMatrix(vector<vector< _T> > &m, int n){
	46	resizeMatrix(m,n,n);
	47	for (int i=0; i<n; i++){
	48	for (int j=0; j<n;j++){
	49	if (i==j) m[i][j]=1;
	50	else m[i][j]=0;
	51	}
	52	}
	53	}
	54
	55	template<typename _T>
	56	void zeroMatrix(vector<vector< _T> > &m){
	57	for (int i=0; i < m.size(); i++)
	58	for (int j=0; j<m[i].size();j++)
	59	m[i][j]=0;
	60	}
	61
	62	template<typename _T>
	63	void oneMatrix(vector<vector< _T> > &m){
	64	for (int i=0; i < m.size(); i++)
	65	for (int j=0; j<m[i].size();j++)
	66	m[i][j]=1;
	67	}
	68
	69
	70	//assumes that #columns in mat1=#rows in mat2
	71	template<typename _T>
	72	vector<vector< _T> > multiplyMatrixes(vector<vector< _T> > &mat1, vector<vector< _T> > &mat2){
	73	vector<vector< _T> > mat;
	74	if ((mat1.size()==0) \|\| (mat2.size() ==0))
	75	errorMsg::reportError("Error in multiplyMatrixes, one of the matrices inputted is of size 0");;
	76	int numColumns=mat1[0].size();
	77	int numRows = mat2.size();
	78	resizeMatrix(mat,numColumns,numRows);
	79	for (int i=0; i<numColumns; i++){
	80	for (int j=0; j<numRows;j++){
	81	for (int k=0;k<numColumns;k++){
	82	mat[i][j]+=mat1[i][k]*mat2[k][j];
	83	}
	84	}
	85	}
	86	return mat;
	87	}
	88
	89	template<typename _T>
	90	vector<vector< _T> > multiplyMatrixByScalar(const vector<vector< _T> > &mat, MDOUBLE scalar) {
	91	vector<vector< _T> > mat_copy = mat;
	92	for (int i=0; i<mat.size(); i++){
	93	for (int j=0; j<mat[i].size();j++){
	94	mat_copy[i][j]*=scalar;
	95	}
	96	}
	97	return mat_copy;
	98	}
	99
	100	template<typename _T>
	101	vector<vector< _T> > add(const vector<vector< _T> > &mat1,const vector<vector< _T> > &mat2){
	102	if (mat1.size()!=mat2.size()) errorMsg::reportError("different sized matrices in matrixUtils::add");
	103	vector<vector< _T> > newMat(mat1.size());
	104	for (int i=0;i<mat1.size();i++){
	105	if (mat1[i].size()!=mat2[i].size()) errorMsg::reportError("different sized matrices in matrixUtils::add");
	106	newMat[i].resize(mat1[i].size());
	107	for (int j=0;j<mat1.size();j++){
	108	newMat[i][j]=mat1[i][j]+mat2[i][j];
	109	}
	110	}
	111	return newMat;
	112	}
	113
	114	template<typename _T>
	115	void printVec(vector< _T> &vec,ostream &out=cout,bool printVertical=true) {
	116	for (int i=0; i<vec.size();i++){
	117	out<< vec[i];
	118	out<<(printVertical?"\n":" ");
	119	}
	120	out<<endl;
	121	}
	122
	123
	124
	125	VVdouble transpose(const VVdouble &mat);
	126	VVdouble subtract(const VVdouble &mat1,const VVdouble &mat2);
	127	VVdouble reverseSign(const VVdouble &mat1);
	128
	129	void findMaxInVector(const Vdouble &vec, MDOUBLE &maxValue, int &argmax) ;
	130	void findMinInVector(const Vdouble &vec, MDOUBLE &minValue, int &argmin) ;
	131	bool isMinEQMaxInVector(const Vdouble &vec);
	132
	133	MDOUBLE averageElementInVector(const Vdouble &vec) ;
	134	void appendBinaryVectors(vector <int> &vec1, const vector <int> &vec2);
	135	void appendVectors(Vint &vec1, const Vint &vec2);
	136	void appendVectors(VVdouble &vec1, const VVdouble &vec2);
	137	Vint complementBinaryVec(vector <int>&bufferVec) ; // returns complementary binary vector
	138	void readDoubleVecFromFile(Vdouble &vec,string fileName); //reads a vertical vector (separated by \n)
	139
	140	void normalize(Vdouble &vec);
	141	void scaleByAverage(Vdouble &vec);
	142
	143
	144	//solve nxn linear equations of the form Ax=b; return x;
	145	Vdouble solveLinearEquations(VVdouble A,Vdouble b);
	146	// functions from numerical recipes that solve nxn linear equations
	147	void lubksb(VVdouble &a, Vdouble &indx, Vdouble &b);
	148	void ludcmp(VVdouble &a, Vdouble &indx, MDOUBLE &d);
	149
	150	void resize_VVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor);
	151	void resize_VVV(int dim1, int dim2, int dim3, VVVdouble& vetor);
	152
	153
	154
	155
	156	#endif

+311

-0

libs/phylogeny/mixtureDistribution.cpp less more

	0	#include "mixtureDistribution.h"
	1	#include "generalGammaDistributionLaguerre.h"
	2	#include "talRandom.h"
	3	#include "someUtil.h"
	4	#include "errorMsg.h"
	5
	6	#include <cmath>
	7
	8
	9	mixtureDistribution::mixtureDistribution(const vector<generalGammaDistribution*>& components, const Vdouble& componentsProb, quadratureType gammaType)
	10	{
	11	if (components.size() < 1)
	12	errorMsg::reportError("the number of Gamma components must be positive");
	13
	14	_components.clear();
	15	for (int i = 0; i < components.size(); ++i)
	16	{
	17	generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(components[i]->clone());
	18	_components.push_back(comp);
	19	}
	20
	21	_globalRate = 1.0;
	22	setComponentsProb(componentsProb);
	23	}
	24
	25
	26	//init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned "randomly"
	27	mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType/=LAGUERRE/, MDOUBLE maxAlpha/=5.0/, MDOUBLE maxBeta/=5.0/)
	28	{
	29	if (componentsNum < 1)
	30	errorMsg::reportError("the number of Gamma components must be positive");
	31
	32	_components.clear();
	33	Vdouble componentsProb(componentsNum, 0);
	34	for (int i = 0; i < componentsNum; ++i)
	35	{
	36	MDOUBLE alpha = talRandom::giveRandomNumberBetweenZeroAndEntry(maxAlpha);
	37	MDOUBLE beta = talRandom::giveRandomNumberBetweenZeroAndEntry(maxBeta);
	38	componentsProb[i] = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
	39	generalGammaDistribution* pComp;
	40	switch (gammaType)
	41	{
	42	case LAGUERRE:
	43	pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent);
	44	break;
	45	case QUANTILE:
	46	pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent);
	47	break;
	48	default:
	49	errorMsg::reportError("unknown quadrature type in mixtureDistribution");
	50	}
	51	_components.push_back(pComp);
	52	}
	53
	54	scaleVec(componentsProb, 1.0/componentsNum);
	55	setComponentsProb(componentsProb);
	56	_globalRate = 1.0;
	57	}
	58	//init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned with given values
	59	mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType/=LAGUERRE/, MDOUBLE maxAlpha/=5.0/, MDOUBLE maxBeta/=5.0/)
	60	{
	61	if (componentsNum < 1)
	62	errorMsg::reportError("the number of Gamma components must be positive");
	63
	64	_components.clear();
	65	Vdouble componentsProb(componentsNum, 0);
	66	for (int i = 0; i < componentsNum; ++i)
	67	{
	68	MDOUBLE alpha = AlphaInit[i];
	69	MDOUBLE beta = BetaInit[i];
	70	componentsProb[i] = componentProbInit[i];
	71	generalGammaDistribution* pComp;
	72	switch (gammaType)
	73	{
	74	case LAGUERRE:
	75	pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent);
	76	break;
	77	case QUANTILE:
	78	pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent);
	79	break;
	80	default:
	81	errorMsg::reportError("unknown quadrature type in mixtureDistribution");
	82	}
	83	_components.push_back(pComp);
	84	}
	85
	86	scaleVec(componentsProb, 1.0/componentsNum);
	87	setComponentsProb(componentsProb);
	88	_globalRate = 1.0;
	89	}
	90
	91	mixtureDistribution::mixtureDistribution(const mixtureDistribution& other)
	92	: _componentsWeight(other._componentsWeight),
	93	_globalRate(other._globalRate),
	94	_totalWeight(other._totalWeight)
	95	{
	96	_components.clear();
	97	for (int i = 0; i < other.getComponentsNum(); ++i)
	98	{
	99	generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(other._components[i]->clone());
	100	_components.push_back(comp);
	101	}
	102	}
	103
	104
	105	mixtureDistribution& mixtureDistribution::operator=(const mixtureDistribution &otherDist)
	106	{
	107	_globalRate = otherDist._globalRate;
	108	_componentsWeight = otherDist._componentsWeight;
	109	_totalWeight = otherDist._totalWeight;
	110	if (this != &otherDist) // Check for self-assignment
	111	{
	112	for (int i = 0; i < getComponentsNum(); ++i)
	113	{
	114	if (_components[i] != NULL)
	115	{
	116	generalGammaDistribution* pComp = static_cast<generalGammaDistribution*>(otherDist.getComponent(i)->clone());
	117	delete _components[i];
	118	_components[i] = pComp;;
	119	}
	120	}
	121	}
	122	return *this;
	123	}
	124
	125
	126	void mixtureDistribution::clear()
	127	{
	128	for (int i = 0; i < getComponentsNum(); ++i)
	129	{
	130	if (_components[i] != NULL)
	131	{
	132	delete _components[i];
	133	_components[i] = NULL;
	134	}
	135	}
	136	_components.clear();
	137	}
	138
	139
	140	mixtureDistribution::~mixtureDistribution()
	141	{
	142	clear();
	143	}
	144
	145	const int mixtureDistribution::categories() const
	146	{
	147	int res = 0;
	148	for (int i = 0; i < getComponentsNum(); ++i)
	149	{
	150	res += _components[i]->categories();
	151	}
	152	return res;
	153	}
	154
	155	void mixtureDistribution::setComponentsProb(const Vdouble& componentsProb)
	156	{
	157	if (getComponentsNum() != componentsProb.size())
	158	errorMsg::reportError("the number of Gamma components is not the same as the number of probabilities");
	159	_totalWeight = 0.0;
	160	for (int i = 0; i < componentsProb.size(); ++i)
	161	_totalWeight += componentsProb[i];
	162	if (!DEQUAL(_totalWeight, 1.0))
	163	errorMsg::reportError("the sum of components probabilities must sum to 1.0");
	164	_componentsWeight = componentsProb;
	165	}
	166
	167
	168	void mixtureDistribution::change_number_of_categoriesPerComp(int in_number_of_categories)
	169	{
	170	for (int i = 0; i <getComponentsNum(); ++i)
	171	_components[i]->change_number_of_categories(in_number_of_categories);
	172	}
	173
	174	//change_number_of_components: if the newCompNum is getComponentsNum()-1
	175	//then duplicate one of the components and adjust the probabilities
	176	void mixtureDistribution::change_number_of_components(const int in_number_of_components)
	177	{
	178	if (getComponentsNum() == in_number_of_components)
	179	return;
	180	else if (getComponentsNum() == in_number_of_components - 1)
	181	{
	182	//duplicate the first component
	183	normalizeProbabilities();
	184	generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(_components[0]->clone());
	185	_components.push_back(comp);
	186	//adjust the components probabilities so that the probs of the
	187	//two identical components (i.e., 0 and the new Comp) are equal
	188	_componentsWeight[0] /= 2;
	189	_componentsWeight.push_back(_componentsWeight[0]);
	190	normalizeProbabilities();
	191	}
	192	else
	193	errorMsg::reportError("cannot change the number of components in mixtureDistribution::change_number_of_components()");
	194	}
	195
	196
	197	const MDOUBLE mixtureDistribution::getCumulativeProb(const MDOUBLE x) const
	198	{
	199	MDOUBLE res = 0.0;
	200	for (int i = 0; i < getComponentsNum(); ++i)
	201	res += _components[i]->getCumulativeProb(x) * getComponentProb(i);
	202	return res;
	203	}
	204
	205	const MDOUBLE mixtureDistribution::rates(const int category) const
	206	{
	207	if (category > categories() - 1)
	208	errorMsg::reportError("the required category does not exist!");
	209	int componentNum, categoryInComponent, totalCat = 0;
	210	for (int i = 0; i < getComponentsNum(); ++i)
	211	{
	212	if (category < _components[i]->categories() + totalCat)
	213	{
	214	componentNum = i;
	215	categoryInComponent = category - totalCat;
	216	break;
	217	}
	218	totalCat += _components[i]->categories();
	219	}
	220	return _components[componentNum]->rates(categoryInComponent) * _globalRate;
	221	}
	222
	223	const MDOUBLE mixtureDistribution::ratesProb(const int category) const
	224	{
	225	if (category > categories() - 1)
	226	errorMsg::reportError("there required category does not exist!");
	227	int componentNum, categoryInComponent, totalCat = 0;
	228	for (int i = 0; i < getComponentsNum(); ++i)
	229	{
	230	if (category < _components[i]->categories() + totalCat)
	231	{
	232	componentNum = i;
	233	categoryInComponent = category - totalCat;
	234	break;
	235	}
	236	totalCat += _components[i]->categories();
	237	}
	238	return getComponentProb(componentNum) * _components[componentNum]->ratesProb(categoryInComponent);
	239	}
	240
	241
	242	void mixtureDistribution::setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb)
	243	{
	244	if (alphaVec.size() != getComponentsNum())
	245	errorMsg::reportError("the size of the alphas vector is not identical to the number of components");
	246	if (betaVec.size() != getComponentsNum())
	247	errorMsg::reportError("the size of the batas vector is not identical to the number of components");
	248	if (componentsProb.size() != getComponentsNum())
	249	errorMsg::reportError("the size of the components probabilities vector is not identical to the number of components");
	250
	251	setComponentsProb(componentsProb);
	252	int categoriesInComponent = _components[0]->categories();
	253	for (int i = 0; i < getComponentsNum(); ++i)
	254	_components[i]->setGammaParameters(categoriesInComponent, alphaVec[i], betaVec[i]);
	255	}
	256
	257	//the following functions set the components probabilities.
	258	//Note, that the new prob is not inWeight, but is scaled so that the total probabilities are 1.0
	259	void mixtureDistribution::setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight/=0.01/)
	260	{
	261	if((inWeight<0.0) \|\| (inWeight>1.0)){
	262	errorMsg::reportError("the probability assignment is not [0,1]");
	263	}
	264	if (inWeight < minWeight)
	265	inWeight = minWeight;
	266	MDOUBLE otherProbs = 1-inWeight;
	267	Vdouble probs(getComponentsNum(), 0.0);
	268	MDOUBLE sumOther = 0.0;
	269	int i;
	270	for (i = 0; i < getComponentsNum(); ++i)
	271	{
	272	if (i != componentNum)
	273	sumOther += _componentsWeight[i];
	274	}
	275	MDOUBLE factor = otherProbs / sumOther;
	276	for (i = 0; i < getComponentsNum(); ++i)
	277	{
	278	probs[i] = _componentsWeight[i] * factor ;
	279	}
	280	probs[componentNum] = inWeight;
	281	setComponentsProb(probs);
	282
	283	//_totalWeight -= _componentsWeight[componentNum];
	284	// _componentsWeight[componentNum] = inWeight;
	285	//_totalWeight += _componentsWeight[componentNum];
	286	}
	287
	288	//scale the components weights so that they sum to 1.0.
	289	void mixtureDistribution::normalizeProbabilities()
	290	{
	291	if (_componentsWeight.size() != getComponentsNum())
	292	errorMsg::reportError("problem in mixtureDistribution::normalizeProbabilities()");
	293	int i;
	294	for(i = 0; i < getComponentsNum(); ++i)
	295	{
	296	_componentsWeight[i] /= _totalWeight;
	297	}
	298	_totalWeight = 1.0;
	299	}
	300
	301	void mixtureDistribution::printParams(ostream& outF)
	302	{
	303	MDOUBLE avgRate = 0.0;
	304	for (int k = 0; k < getComponentsNum(); ++k)
	305	{
	306	outF << "comp="<<k<<" Alp/Beta= "<<getAlpha(k)/getBeta(k)<<" alpha= "<<getAlpha(k) << " beta= " <<getBeta(k)<<" Prob= "<<getComponentProb(k)<<endl;
	307	avgRate += (getAlpha(k) / getBeta(k)) * getComponentProb(k);
	308	}
	309	outF<<"# The prior average rate is: " <<avgRate<<endl;
	310	}⏎

+67

-0

libs/phylogeny/mixtureDistribution.h less more

	0	#ifndef ___MIXTURE_DIST
	1	#define ___MIXTURE_DIST
	2	/************************************************************
	3	The mixture distribution is combined of several gamma distributions (components).
	4	Each one of the gamma component has its own probability of occurance = Hi,
	5	such that the sum of Hi equals 1.0.
	6	The categories probabilities are the probability of each component multiply by the category probabilty in the component.
	7	In case the Laguerre option is on:
	8	the actuall number of cateories (per component) can be lower than the requested number of categories.
	9	************************************************************/
	10	#include "definitions.h"
	11	#include "generalGammaDistribution.h"
	12
	13	class mixtureDistribution : public distribution {
	14	public:
	15	explicit mixtureDistribution(const vector<generalGammaDistribution*>& components, const Vdouble& componentsProb, quadratureType gammaType);
	16	explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType = LAGUERRE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0);
	17	explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType = QUANTILE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0);
	18
	19	mixtureDistribution(const mixtureDistribution& other);
	20
	21	mixtureDistribution& operator=(const mixtureDistribution &otherDist);
	22	virtual distribution* clone() const { return new mixtureDistribution(*this); }
	23	virtual ~mixtureDistribution();
	24
	25	//get+set the parameters of the mixture
	26	void setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb);
	27	const generalGammaDistribution* getComponent(int componentNum) const {return _components[componentNum];}
	28	const int getComponentsNum() const {return _components.size();}
	29	const int categories() const;
	30	//change_number_of_categoriesPerComp: change the number of categorites for each component. The total number of categories will be (in_number_of_categories*componentNum)
	31	void change_number_of_categoriesPerComp(int in_number_of_categories);
	32	void change_number_of_components(const int in_number_of_components);
	33	const int categoriesForOneComponent() const {return _components[0]->categories();}
	34	MDOUBLE getAlpha(int componentNum) const {return _components[componentNum]->getAlpha();}
	35	void setAlpha(MDOUBLE newAlpha, int componentNum) {_components[componentNum]->setAlpha(newAlpha);}
	36	MDOUBLE getBeta(int componentNum) const {return _components[componentNum]->getBeta();}
	37	void setBeta(MDOUBLE newBeta, int componentNum) {_components[componentNum]->setBeta(newBeta);}
	38	void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta, int componentNum) {_components[componentNum]->setGammaParameters(numOfCategories ,alpha, beta);}
	39	const MDOUBLE getComponentProb(int componentNum) const {return _componentsWeight[componentNum] / _totalWeight;}
	40	void setComponentsProb(const Vdouble& componentsProb);
	41	void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
	42	MDOUBLE getGlobalRate() const {return _globalRate;}
	43
	44	//the following function set the components weights.
	45	//Note that the new component prob is not inWeight, but is scaled so that the total probabilities are 1.0
	46	void setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight =0.01);
	47	const MDOUBLE getComponentWeight(int componentNum) const {return _componentsWeight[componentNum];}
	48	//scale the components weights so that they sum to 1.0.
	49	void normalizeProbabilities();
	50
	51	//get distribution statistics
	52	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	53	virtual const MDOUBLE rates(const int category) const;
	54	virtual const MDOUBLE ratesProb(const int i) const;
	55
	56	void printParams(ostream& outF );
	57
	58	private:
	59	void clear();
	60	private:
	61	vector<generalGammaDistribution*> _components;
	62	Vdouble _componentsWeight;
	63	MDOUBLE _globalRate;
	64	MDOUBLE _totalWeight; //holds the sum of the components probabilities. This is saved so that we don't need to sum all weight each time getProb() is called
	65	};
	66	#endif

+85

-0

libs/phylogeny/molphyFormat.cpp less more

	0	// $Id: molphyFormat.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "molphyFormat.h"
	2	#include "someUtil.h"
	3	#include "errorMsg.h"
	4
	5	sequenceContainer molphyFormat::read(istream &infile, const alphabet* alph) {
	6	sequenceContainer mySeqData = readUnAligned(infile, alph);
	7	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	8	return mySeqData;
	9	}
	10	sequenceContainer molphyFormat::readUnAligned(istream &infile, const alphabet* alph) {
	11
	12	vector<string> seqFileData;
	13	putFileIntoVectorStringArray(infile,seqFileData);
	14	if (seqFileData.empty()){
	15	errorMsg::reportError("unable to open file, or file is empty in molphy format");
	16	}
	17	vector<string>::iterator currentLinePosition = seqFileData.begin();
	18
	19	string::const_iterator itStr = seqFileData.begin()->begin();
	20	string::const_iterator itStrEnd = seqFileData.begin()->end();
	21
	22	int f_numSeq;
	23	bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
	24	if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading MOLPHY sequence format");
	25	int f_seqLength;
	26	bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
	27	if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading MOLPHY sequence format");
	28	currentLinePosition++; // we read the first line.
	29
	30	//---------------------------------------------------------------------
	31	sequenceContainer mySeqData;
	32
	33	//---------------------------------------------------------------------
	34	// vector<sequenceContainer::sequenceDatum*> vec;
	35	// seqDataPtr->getSequenceDatumPtrVectorNonConst(vec);
	36
	37	int localID=-1;
	38
	39	vector<string>::const_iterator it1 = seqFileData.begin();
	40	++it1; //skipping the first line that was read already.
	41	while (it1!= seqFileData.end()) {
	42	localID++;
	43	if (it1->empty()) {
	44	it1++;
	45	continue; // empty line continue
	46	}
	47	// read the name.
	48	string name(*it1);
	49	it1++;
	50
	51	string tmpString;
	52	while (it1 != seqFileData.end()) {
	53	if (tmpString.size() < f_seqLength) {
	54	tmpString+=*it1;
	55	++it1;
	56	}
	57	else break;
	58	}
	59
	60	mySeqData.add(sequence(tmpString,name,"",localID,alph));
	61
	62	}
	63	return mySeqData;
	64	}
	65
	66
	67
	68
	69	void molphyFormat::write(ostream &out, const sequenceContainer& sd) {
	70	out<<sd.numberOfSeqs()<<" "<<sd.seqLen()<<endl;
	71	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	72	out<<it5->name()<<endl;
	73	string seqString = it5->toString();
	74	int k=0;
	75	for (string::const_iterator cPos=seqString.begin() ; cPos != seqString.end() ; cPos ++,k++ ) {
	76	if (k>0 && ((k%60)==0)) out<<endl;
	77	out<<*cPos;
	78	}
	79	out<<endl;
	80	}
	81	}
	82
	83
	84

+47

-0

libs/phylogeny/molphyFormat.h less more

	0	// $Id: molphyFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___MOLPHY_FORMAT
	3	#define ___MOLPHY_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class molphyFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	static void write(ostream &out, const sequenceContainer& sd);
	11	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	12	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	13	};
	14
	15	#endif
	16
	17	/* EXAMPLE OF MOLPHY FORMAT:
	18
	19	6 128
	20	Langur
	21	KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN
	22	SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS
	23	QYVKGCGV
	24	Baboon
	25	KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN
	26	SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS
	27	QYVQGCGV
	28	Human
	29	KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN
	30	SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR
	31	QYVQGCGV
	32	Rat
	33	KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN
	34	SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS
	35	GYIRNCGV
	36	Cow
	37	KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN
	38	SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS
	39	SYVEGCTL
	40	Horse
	41	KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN
	42	NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS
	43	EYLASCNL
	44
	45	*/
	46

+35

-0

libs/phylogeny/mtREV24.dat.q less more

	0	" "
	1	" 23.18 "
	2	" 26.95 13.24 "
	3	" 17.67 1.90 794.38 "
	4	" 59.93 103.33 58.94 1.90 "
	5	" 1.90 220.99 173.56 55.28 75.24 "
	6	" 9.77 1.90 63.05 583.55 1.90 313.56 "
	7	" 120.71 23.03 53.30 56.77 30.71 6.75 28.28 "
	8	" 13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90 "
	9	" 96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26 "
	10	" 25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09 "
	11	" 8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88 "
	12	" 141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37 "
	13	" 6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82 "
	14	" 54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31 "
	15	" 387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90 "
	16	" 480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21 "
	17	" 1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99 "
	18	" 6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25 "
	19	" 195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90 "
	20	" 0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169 "
	21	" 0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043 "
	22	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	23	" S_ij = S_ji and PI_i for the mtREV24 model (Adachi and Hasegawa 1996). "
	24	" The PI's used to sum to 0.999 and I changed one of the freq from 0.168 "
	25	" into 0.169 so that the sum is 1. Prepared by Z. Yang according to "
	26	" data sent by Dr M. Hasegawa. This matrix was obtained from the 12 "
	27	" mitochondrial proteins encoded by the same strand of the DNA from a "
	28	" diverse range of species including bird, fish, frog, lamprey, as well "
	29	" as mammals (see Adachi and Hasegawa 1996 for details). The other "
	30	" matrix (mtmam.dat) included in the package is based on the same "
	31	" proteins from mammals only. "
	32	" Adachi, J. and Hasegawa, M. (1996) MOLPHY version 2.3: programs for "
	33	" molecular phylogenetics based on maximum likelihood. Computer Science "
	34	" Monographs of Institute of Statistical Mathematics 28:1-150. "

+198

-0

libs/phylogeny/mulAlphabet.cpp less more

	0	// $Id: mulAlphabet.cpp 6420 2009-06-25 11:17:08Z adist $
	1
	2	#include "mulAlphabet.h"
	3	#include "distribution.h"
	4	#include "errorMsg.h"
	5	#include <iostream>
	6	#include "logFile.h"
	7
	8
	9	mulAlphabet::mulAlphabet(const alphabet* baseAlphabet, int mulFactor) :
	10	_baseAlphabet(baseAlphabet->clone()),
	11	_mulFactor(mulFactor),
	12	_size(baseAlphabet->size() * mulFactor)
	13	{}
	14
	15	mulAlphabet::mulAlphabet(const mulAlphabet& other) :
	16	_baseAlphabet(other._baseAlphabet->clone()),
	17	_mulFactor(other._mulFactor),
	18	_size(other._size)
	19	{}
	20
	21	mulAlphabet::~mulAlphabet()
	22	{
	23	if (_baseAlphabet) delete (_baseAlphabet);
	24	}
	25
	26	mulAlphabet& mulAlphabet::operator=(const mulAlphabet &other)
	27	{
	28	if (_baseAlphabet) delete (_baseAlphabet);
	29	_baseAlphabet = other._baseAlphabet->clone();
	30	_mulFactor = other._mulFactor;
	31	_size = other._size;
	32	return (*this);
	33	}
	34
	35	int mulAlphabet::unknown() const
	36	{
	37	return (convertFromBasedAlphaInt(_baseAlphabet->unknown()));
	38	}
	39
	40	int mulAlphabet::gap() const
	41	{
	42	return (convertFromBasedAlphaInt(_baseAlphabet->gap()));
	43	}
	44
	45	int mulAlphabet::stringSize() const
	46	{
	47	return _baseAlphabet->stringSize();
	48	}
	49
	50	bool mulAlphabet::isSpecific(const int id) const
	51	{
	52	if (id >= _size)
	53	return false;
	54	else
	55	return _baseAlphabet->isSpecific(convertToBasedAlphaInt(id));
	56	}
	57
	58	/* The first _size characters should be first. The rest of the characters aren't multiplied.
	59	For example, when using nucleotides as the based alphabet and _mulFactor = 2 :
	60	0 A0
	61	1 C0
	62	2 G0
	63	3 T0
	64	4 A1
	65	5 C1
	66	6 G1
	67	7 T1
	68	8 A
	69	9 C
	70	10 G
	71	11 T
	72	12 U
	73	13 R
	74	14 Y
	75	15 K
	76	16 M
	77	17 S
	78	18 W
	79	19 B
	80	20 D
	81	21 H
	82	22 V
	83	23 N
	84	-1 -
	85	*/
	86
	87	string mulAlphabet::fromInt(const int id) const
	88	{
	89	// category and categoryName are for debug purpose
	90	int category(_mulFactor);
	91	if (id>=0)
	92	category = min(id / _baseAlphabet->size() , _mulFactor) ;
	93	string categoryName("");
	94	categoryName = int2string(category);
	95	int inCategoryId = convertToBasedAlphaInt(id);
	96	return (_baseAlphabet->fromInt(inCategoryId) + categoryName);
	97	}
	98
	99	int mulAlphabet::convertFromBasedAlphaInt(int id) const
	100	{
	101	if (id < 0)
	102	return (id);
	103
	104	return (id + _size);
	105	}
	106
	107	int mulAlphabet::fromChar(const string& str, const int pos) const
	108	{
	109	int id = _baseAlphabet->fromChar(str,pos);
	110	return (convertFromBasedAlphaInt(id));
	111	}
	112
	113
	114	vector<int> mulAlphabet::fromString(const string &str) const
	115	{
	116	vector<int> result = _baseAlphabet->fromString(str);
	117	vector<int>::iterator itr = result.begin();
	118	for (; itr != result.end(); ++itr)
	119	itr = convertFromBasedAlphaInt(itr);
	120
	121	return (result);
	122	}
	123
	124
	125	int mulAlphabet::convertToBasedAlphaInt(int id) const
	126	{
	127	if (id<0)
	128	return (id);
	129	if (id >= _size)
	130	return (id - _size);
	131
	132	return (id % _baseAlphabet->size());
	133	}
	134
	135
	136
	137	int mulAlphabet::relations(const int charInSeq, const int charToCheck) const
	138	{
	139	int baseAlphabetSize = _baseAlphabet->size();
	140	int categoryInSeq(_mulFactor);
	141	if (charInSeq>=0)
	142	categoryInSeq = min(charInSeq/baseAlphabetSize , _mulFactor);
	143
	144	int categoryToCheck(_mulFactor);
	145	if (charToCheck>=0)
	146	categoryToCheck = min(charToCheck/baseAlphabetSize , _mulFactor);
	147
	148	if (categoryToCheck == _mulFactor)
	149	LOG(4,<<"mulAlphabet::relations charToCheck should belong to category < _mulFactor = " << _mulFactor << endl);
	150
	151	if ((categoryInSeq == categoryToCheck) \|\| (categoryInSeq == _mulFactor))
	152	return _baseAlphabet->relations(convertToBasedAlphaInt(charInSeq),convertToBasedAlphaInt(charToCheck));
	153
	154	return 0;
	155	}
	156
	157
	158	int mulAlphabet::compareCategories(int charA, int charB) const
	159	{
	160	// TO DO should combine code by calling mulAlphabet::rateShiftType mulAlphabet::compareCategories
	161
	162	int baseAlphabetSize = _baseAlphabet->size();
	163	int categoryA(_mulFactor);
	164	if (categoryA>=0)
	165	categoryA = min(charA/baseAlphabetSize,_mulFactor);
	166
	167	int categoryB(_mulFactor);
	168	if (categoryB>=0)
	169	categoryB = min(charB/baseAlphabetSize,_mulFactor);
	170
	171	if (categoryA<categoryB)
	172	return 1;
	173	else if (categoryB<categoryA)
	174	return -1;
	175	return (0);
	176	}
	177
	178
	179	mulAlphabet::rateShiftType mulAlphabet::compareCategories(int charA, int charB, int baseAlphabetSize, int multiplicationFactor)
	180	{
	181	int categoryA(multiplicationFactor);
	182	if (categoryA>=0)
	183	categoryA = min(charA/baseAlphabetSize,multiplicationFactor);
	184
	185	int categoryB(multiplicationFactor);
	186	if (categoryB>=0)
	187	categoryB = min(charB/baseAlphabetSize,multiplicationFactor);
	188
	189	if (categoryA<categoryB)
	190	return acceleration;
	191	else if (categoryB<categoryA)
	192	return deceleration;
	193	return noRateShift;
	194
	195
	196
	197	}⏎

+53

-0

libs/phylogeny/mulAlphabet.h less more

	0	// $Id: mulAlphabet.h 6420 2009-06-25 11:17:08Z adist $
	1
	2	// version 1.01
	3	// last modified 1 Jan 2004
	4
	5	#ifndef ___MUL_ALPHABET_H
	6	#define ___MUL_ALPHABET_H
	7
	8	#include "definitions.h"
	9	#include "alphabet.h"
	10	#include "someUtil.h"
	11
	12	class mulAlphabet : public alphabet {
	13
	14	public:
	15	mulAlphabet(const alphabet* baseAlphabet, int mulFactor);
	16	mulAlphabet(const mulAlphabet& other);
	17	virtual ~mulAlphabet();
	18	virtual alphabet* clone() const { return new mulAlphabet(*this); }
	19	mulAlphabet& operator=(const mulAlphabet &other);
	20
	21	int unknown() const ;
	22	int gap() const;
	23
	24	int size() const {return _size;}
	25	int stringSize() const ;
	26	bool isSpecific(const int id) const ;
	27
	28	int fromChar(const string& str, const int pos) const;
	29	vector<int> fromString(const string& str) const;
	30
	31	string fromInt(const int id) const;
	32
	33	int relations(const int charInSeq, const int charToCheck) const;
	34	int compareCategories(int charA, int charB) const;
	35	enum rateShiftType {noRateShift=0, acceleration, deceleration};
	36	static rateShiftType compareCategories(int charA, int charB, int baseAlphabetSize, int multiplicationFactor) ;
	37	const alphabet* getBaseAlphabet() const {return _baseAlphabet;}
	38
	39	public:
	40	int convertFromBasedAlphaInt(int id) const;
	41	int convertToBasedAlphaInt(int id) const;
	42
	43	private:
	44	alphabet* _baseAlphabet; // This alphabet must use single characters, i.e. - not codon. (or we will have to add to every alphabet a member which holds its character's size)
	45	int _mulFactor ; // number of times that the alphabet is multiplied by = Number of categories (g in Galtier paper)
	46	int _size ; // this is simply the _baseAlphabet->size() * _mulFactor
	47
	48
	49	};
	50
	51	#endif
	52

+38

-0

libs/phylogeny/multipleStochasticProcess.cpp less more

	0	#include "multipleStochasticProcess.h"
	1	#include "errorMsg.h"
	2
	3	multipleStochasticProcess::multipleStochasticProcess()
	4	{
	5	}
	6
	7
	8	multipleStochasticProcess::~multipleStochasticProcess()
	9	{
	10	}
	11
	12
	13	void multipleStochasticProcess::copy(const multipleStochasticProcess *pOther)
	14	{
	15	_spVec = pOther->_spVec;
	16	_spProb = pOther->_spProb;
	17	}
	18
	19
	20	MDOUBLE multipleStochasticProcess::getProb(int spPlace) const {
	21	if (spPlace >= _spProb.size())
	22	errorMsg::reportError("error in multipleStochasticProcess::getProb");
	23	return _spProb[spPlace];
	24	}
	25
	26	stochasticProcess* multipleStochasticProcess::getSp(int spPlace) {
	27	if (spPlace >= _spVec.size())
	28	errorMsg::reportError("error in multipleStochasticProcess::getSp");
	29	return &_spVec[spPlace];
	30	}
	31
	32	void multipleStochasticProcess::setSpVec(vector<stochasticProcess>& spVec)
	33	{
	34	_spVec.clear();
	35	_spVec = spVec;
	36	}
	37

+23

-0

libs/phylogeny/multipleStochasticProcess.h less more

	0	#ifndef _MULTIPLE_STOCHASTIC_PROCESS
	1	#define _MULTIPLE_STOCHASTIC_PROCESS
	2
	3	#include "stochasticProcess.h"
	4
	5
	6	class multipleStochasticProcess {
	7	public:
	8	multipleStochasticProcess();
	9	virtual ~multipleStochasticProcess();
	10	virtual MDOUBLE getProb(int spPlace) const;
	11	virtual stochasticProcess* getSp(int spPlace);
	12	virtual int getSPVecSize() const {return _spVec.size();}
	13	virtual void setSpVec(vector<stochasticProcess>& spVec);
	14
	15
	16	protected:
	17	virtual void copy(const multipleStochasticProcess * pOther);
	18	protected:
	19	vector<stochasticProcess> _spVec;
	20	Vdouble _spProb;
	21	};
	22	#endif

+152

-0

libs/phylogeny/nexusFormat.cpp less more

	0	// $Id: nexusFormat.cpp 5987 2009-03-18 18:13:53Z itaymay $
	1
	2	#include "nexusFormat.h"
	3	#include "someUtil.h"
	4	#include "errorMsg.h"
	5	#include <map>
	6
	7	sequenceContainer nexusFormat::read(istream &infile, const alphabet* pAlph) {
	8	sequenceContainer mySeqData = readUnAligned(infile, pAlph);
	9	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	10	return mySeqData;
	11	}
	12
	13	sequenceContainer nexusFormat::readUnAligned(istream &infile, const alphabet* pAlph) {
	14	if (!infile) {
	15	errorMsg::reportError("unable to read mase format, could not open file");
	16	}
	17	sequenceContainer mySeqData;;
	18
	19	vector<string> seqFileData;
	20	putFileIntoVectorStringArray(infile,seqFileData);
	21
	22	vector<string>::const_iterator it1 = seqFileData.begin();
	23	// make sure that the first 6 chars in the first line is #NEXUS
	24	if (it1->size()<6) errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1);
	25	if ( ((*it1)[0] != '#')
	26	\|\| (((it1)[1] != 'N') && ((it1)[1] != 'n'))
	27	\|\| (((it1)[2] != 'E') && ((it1)[2] != 'e'))
	28	\|\| (((it1)[3] != 'X') && ((it1)[3] != 'x'))
	29	\|\| (((it1)[4] != 'U') && ((it1)[4] != 'u'))
	30	\|\| (((it1)[5] != 'S') && ((it1)[5] != 's')) ) {
	31	errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1);
	32	}
	33	it1++;
	34
	35	while ( ( (it1).find("matrix") == -1) && ( (it1).find("MATRIX") == -1) && (it1!= seqFileData.end()))
	36	{ //check for the word matrix
	37	++it1;
	38	}
	39
	40	int localid=0;
	41	//int x1 = ((*it1).find("matrix") != -1);
	42	//int x2 = ((*it1).find("MATRIX") != -1);
	43	if (((it1).find("matrix") != -1) \|\| ((it1).find("MATRIX") != -1))
	44	{
	45	//taken from clustalFormat:
	46	//In case of codon alpahabet we cannot add a seqeunce that is not dividable by 3.
	47	//In this case the last nucleotides in each line (zero, one or two)
	48	//should be saved. The next time the same sequence name appears -
	49	//these saveed nucleotidea and are added to the begining of the line.
	50	map<string ,string> stringsToAdd;
	51
	52
	53	for (++it1; it1 != seqFileData.end() ; ++it1)
	54	{
	55	if (((it1).find("end;") != -1) \|\| ((it1).find("END;") != -1))
	56	break;
	57	if (it1->empty() \|\| ((*it1).find(';') != -1))
	58	{ // empty line constinue
	59	continue;
	60	}
	61	sequence seq(pAlph);
	62
	63	string taxonName;
	64	string remark;
	65	string stringSeq;
	66	bool beforeName = true;
	67	string::const_iterator stringIt = (it1)->begin();
	68	for (; stringIt != (it1)->end(); ++stringIt)
	69	{ //first loop finds the taxon name
	70	if ( ((stringIt) == ' ') \|\| ((stringIt) == '\t'))
	71	if (beforeName == true)
	72	continue; //spaces before taxon name are legal
	73	else
	74	break; //A space marks the end of the taxon name
	75	else
	76	{
	77	taxonName += (*stringIt);
	78	beforeName = false;
	79	}
	80	}
	81
	82	//check if a new sequence.
	83	//if the name already exists then init stringSeq with the nucleotide from the previous line of the same sequence
	84	if (stringsToAdd.find(taxonName)!=stringsToAdd.end())
	85	stringSeq = stringsToAdd[taxonName];
	86
	87	for (; stringIt != (it1)->end(); ++stringIt)
	88	{//second loop finds the sequecne
	89	if ( ((stringIt)==' ') \|\| ((stringIt) == '\t'))
	90	continue;
	91	else stringSeq += (*stringIt);
	92	}
	93
	94	//when alphabet is codon stringSeq must be dividable by 3.
	95	// 1. save the reminder (0,1 or 2 last nucleotides) in stringToAdd
	96	// 2. substr the reminder from the sequence line.
	97	// 3. keep stringToAdd in map (according the name) to be added later.
	98	string stringToAdd="";
	99	if (pAlph->size()>=60){ // codon?
	100	if ((stringSeq.size()%3)==1){ //add the last nucleotide to the next line
	101	stringToAdd += stringSeq[stringSeq.size()-1];
	102	stringSeq = stringSeq.substr(0,stringSeq.size()-1);
	103	}
	104	if ((stringSeq.size() % 3) == 2){ //add the 2 last nucleotide to the next line
	105	stringToAdd+=stringSeq[stringSeq.size()-2];
	106	stringToAdd+=stringSeq[stringSeq.size()-1];
	107	stringSeq = stringSeq.substr(0, stringSeq.size() - 2);
	108	}
	109	}
	110	stringsToAdd[taxonName] = stringToAdd; //update the map with the new stringToAdd
	111	//add sequence to container
	112	int id = mySeqData.getId(taxonName, false);
	113	if (id==-1) { // new sequence.
	114	mySeqData.add(sequence(stringSeq, taxonName,remark,localid, pAlph));
	115	localid++;
	116	}
	117	else {// the sequence is already there...
	118	sequence tmp(stringSeq,taxonName, remark, id, pAlph);
	119	mySeqData[id].operator += (tmp);
	120	}
	121	}
	122	}
	123	else
	124	{
	125	errorMsg::reportError("no sequence data in nexus file - no matrix keyword found");
	126	}
	127
	128	return mySeqData;
	129	}
	130
	131	void nexusFormat::write(ostream &out, const sequenceContainer& sc) {
	132	//vector<string> gfr = sd.getGeneralFileRemarks();
	133	//if (gfr.empty()) out<<";;\n;;\n";
	134	//for (vector<string>::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k )
	135	// out<<(*k)<<endl;
	136	out<<"#NEXUS"<<endl;
	137	out<<"begin data;"<<endl;
	138	out<<"dimensions ntax="<<sc.numberOfSeqs()<<" nchar="<<sc.seqLen() <<";"<<endl;
	139	if (sc.alphabetSize() == 4)
	140	out<<"format datatype=dna gap=-;"<<endl;
	141	else
	142	out<<"format datatype=protein gap=-;"<<endl;
	143	out<<"matrix"<<endl;
	144
	145	for (sequenceContainer::constTaxaIterator itSeq=sc.constTaxaBegin();itSeq!=sc.constTaxaEnd();++itSeq) {
	146	out<<"\t"<<itSeq->name()<<"\t"<<itSeq->toString()<<endl;
	147	}
	148	out<<";"<<endl;
	149	out<<"end;"<<endl;
	150	}
	151

+43

-0

libs/phylogeny/nexusFormat.h less more

	0	// $Id: nexusFormat.h 5158 2008-11-06 17:44:08Z itaymay $
	1
	2	#ifndef ___NEXUS_FORMAT
	3	#define ___NEXUS_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class nexusFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	static void write(ostream &out, const sequenceContainer& sd);
	11	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	12	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	13	};
	14
	15	#endif
	16
	17	/* EXAMPLE OF THE FORMAT:
	18	#NEXUS
	19
	20	begin data;
	21	dimensions ntax=6 nchar=128;
	22	format datatype=Protein gap=-;
	23	matrix
	24	Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
	25	Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
	26	Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
	27	Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
	28	Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
	29	Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
	30	;
	31	end;
	32
	33	NOTE!!!!
	34	The seqeunces can also be ordered in an "interleaved" way:
	35	Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGS
	36	Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDES
	37
	38	Horse SDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
	39	Langur TDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
	40	*/
	41
	42

+411

-0

libs/phylogeny/nj.cpp less more

	0	// $Id: nj.cpp 9948 2011-10-23 15:53:03Z cohenofi $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#include "nj.h"
	6	#include "errorMsg.h"
	7	#include "logFile.h"
	8	#include "treeUtil.h"
	9	#include <cassert>
	10	#include <algorithm>
	11	#include <map>
	12	using namespace std;
	13
	14
	15	//------------------------------------------
	16	// general outline:
	17	// we follow Swofford's book, "Molecular Systematics" pg489.
	18	// currentNodes is the vector of the nodes that are "in process".
	19	// in the beggining, these are all the leaves. Once, 2 leaves are separeted,
	20	// they are excluded from currentNodes, and their father is added to currentNodes.
	21	// we (almost) finish the algorithm when currentNodes's size is 3. (i.e., we know the topology).
	22	// thus when we start from an evolutionary tree, all we do, is to construct a star (start) tree
	23	//------------------------------------------
	24
	25
	26
	27
	28	//------------------------------------------
	29	// constructor and start
	30	//------------------------------------------
	31	tree NJalg::computeTree(VVdouble distances,const vector<string>& names, const tree * const constriantTree /= NULL/){
	32	assert(distances.size() == names.size());
	33	tree resTree = startingTree(names);
	34	if (distances.size()<3) return resTree;
	35	vector<tree::nodeP> currentNodes;
	36	resTree.getAllLeaves(currentNodes,resTree.getRoot());
	37	if (constriantTree) {
	38	njConstraint njc(resTree, *constriantTree);
	39	while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances, njc);
	40	} else {
	41	while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances);
	42	}
	43	resTree.create_names_to_internal_nodes();
	44	resTree.makeSureAllBranchesArePositive();
	45	LOGDO(5,resTree.output(myLog::LogFile()));
	46	return resTree;
	47	}
	48
	49	tree NJalg::startingTree(const vector<string>& names) {
	50	return starTree(names);
	51	}
	52
	53	tree NJalg::startingTree(const tree& inTree) {
	54	tree et;
	55	et.createRootNode();
	56	vector<tree::nodeP> allLeaves;
	57	inTree.getAllLeaves(allLeaves,inTree.getRoot());
	58
	59	vector<string> names(allLeaves.size());
	60	for (int k = 0 ; k < allLeaves.size(); ++k)
	61	names[k]=allLeaves[k]->name();
	62
	63	return startingTree(names);
	64	}
	65
	66	void NJalg::updateBranchDistance(const VVdouble& distanceTable,
	67	const Vdouble& rValues,
	68	tree::nodeP nodeNew,
	69	tree::nodeP nodeI,
	70	tree::nodeP nodeJ,
	71	int Iplace,
	72	int Jplace) {
	73	MDOUBLE dis= (Iplace<Jplace) ? distanceTable[Iplace][Jplace] : distanceTable[Jplace][Iplace];
	74	MDOUBLE DisI_new = dis/2.0;
	75	MDOUBLE tmp = rValues[Iplace] - rValues[Jplace];
	76	tmp/= ( 2.0*(distanceTable.size()-2) );
	77	DisI_new = DisI_new+ tmp;
	78	MDOUBLE DisJ_new = dis - DisI_new;
	79	if (DisI_new<tree::SHORT_LENGTH_VALUE) DisI_new=tree::SHORT_LENGTH_VALUE; // no negative..
	80	if (DisJ_new<tree::SHORT_LENGTH_VALUE) DisJ_new=tree::SHORT_LENGTH_VALUE; // no negative..
	81	nodeI->setDisToFather(DisI_new);
	82	nodeJ->setDisToFather(DisJ_new);
	83	}
	84
	85	void NJalg::NJiterate(tree& et,
	86	vector<tree::nodeP>& currentNodes,
	87	VVdouble& distanceTable) {
	88	Vdouble rVector = calc_r_values(currentNodes,distanceTable);//CHECK2
	89
	90	if (currentNodes.size() == 3) {
	91	update3taxaLevel(distanceTable,rVector,currentNodes);
	92	currentNodes.clear();
	93	return;
	94	}
	95
	96	int minRaw,minCol;
	97	calc_M_matrix(currentNodes,distanceTable,rVector,minRaw,minCol);//CHECK3
	98	tree::nodeP nodeI = currentNodes[minRaw];
	99	tree::nodeP nodeJ = currentNodes[minCol];
	100	tree::nodeP theNewNode;
	101	theNewNode= SeparateNodes(et,nodeI,nodeJ);
	102	//CHECK4
	103	updateBranchDistance(distanceTable,rVector,theNewNode,nodeI,nodeJ,minRaw,minCol);
	104	//CHECK6
	105	et.create_names_to_internal_nodes();
	106	UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol);
	107	}
	108
	109	void NJalg::NJiterate(tree& et,
	110	vector<tree::nodeP>& currentNodes,
	111	VVdouble& distanceTable,
	112	njConstraint& njc) {
	113	Vdouble rMatrix = calc_r_values(currentNodes,distanceTable);//CHECK2
	114
	115	if (currentNodes.size() == 3) {
	116	update3taxaLevel(distanceTable,rMatrix,currentNodes);
	117	currentNodes.clear();
	118	return;
	119	}
	120
	121	int minRaw,minCol;
	122	calc_M_matrix(currentNodes,distanceTable,rMatrix,minRaw,minCol, njc);//CHECK3
	123	tree::nodeP nodeI = currentNodes[minRaw];
	124	tree::nodeP nodeJ = currentNodes[minCol];
	125	tree::nodeP theNewNode;
	126	theNewNode= SeparateNodes(et,nodeI,nodeJ);
	127	njc.join(nodeI, nodeJ, theNewNode);
	128	//CHECK4
	129	updateBranchDistance(distanceTable,rMatrix,theNewNode,nodeI,nodeJ,minRaw,minCol);
	130	//CHECK6
	131	et.create_names_to_internal_nodes();
	132	UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol);
	133	LOGDO(15,et.output(myLog::LogFile(),tree::ANCESTORID));
	134
	135	}
	136
	137
	138
	139	Vdouble NJalg::calc_r_values(vector<tree::nodeP>& currentNodes,
	140	const VVdouble& distanceTable) {
	141	Vdouble r_values(currentNodes.size(),0.0);
	142	for (int i=0; i <r_values.size();++i) {
	143	for (int j =0; j < r_values.size();++j) {
	144	MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
	145	r_values[i] += dis;
	146	}
	147	}
	148	return r_values;
	149	}
	150
	151	void NJalg::calc_M_matrix(vector<tree::nodeP>& currentNodes,
	152	const VVdouble& distanceTable,
	153	const Vdouble & r_values,
	154	int& minRaw,int& minCol){
	155	MDOUBLE min = VERYBIG;
	156	for (int i=0; i < currentNodes.size();++i){
	157	for (int j =i+1; j < currentNodes.size();++j) {
	158	MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
	159	MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
	160	if (tmp<min) {minRaw = i;minCol=j;min=tmp;}
	161
	162	}
	163	}
	164	}
	165
	166	void NJalg::calc_M_matrix(vector<tree::nodeP>& currentNodes,
	167	const VVdouble& distanceTable,
	168	const Vdouble & r_values,
	169	int& minRaw,int& minCol,
	170	const njConstraint& njc){
	171	MDOUBLE min = VERYBIG;
	172	MDOUBLE min_noc = VERYBIG;
	173	int minRaw_noc=-1,minCol_noc=-1;
	174	for (int i=0; i < currentNodes.size();++i){
	175	for (int j =i+1; j < currentNodes.size();++j) {
	176	if (njc.isCompatible(currentNodes[i],currentNodes[j])) {
	177	MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
	178	MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
	179	if (tmp<min) {minRaw = i;minCol=j;min=tmp;}
	180	}
	181	LOGDO(10,{
	182	MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
	183	MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
	184	if (tmp<min_noc) {minRaw_noc = i;minCol_noc=j;min_noc=tmp;}
	185	});
	186
	187	}
	188	}
	189	LOGDO(10, {if (min_noc != min) {myLog::LogFile()
	190	<< "NJ-constratin changes outcome " <<
	191	currentNodes[minRaw_noc]->name()<<","<<currentNodes[minCol_noc]->name() <<"-> " <<
	192	currentNodes[minRaw] ->name()<<","<<currentNodes[minCol] ->name()<<
	193	" ("<<min-min_noc<<")"<<endl;
	194	njc.isCompatible(currentNodes[minRaw_noc], currentNodes[minCol_noc], true);
	195	myLog::LogFile() << njc <<endl;
	196	}
	197	});
	198	}
	199
	200	tree::nodeP NJalg::SeparateNodes(tree& et, tree::nodeP node1,
	201	tree::nodeP node2) {
	202	if (node1->father() != node2->father())
	203	errorMsg::reportError(" error in function NJalg::SeparateNodes - nodes don't have the same father");
	204
	205	tree::nodeP fatherNode = node1->father();
	206
	207	tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum());
	208	node1->setFather(theNewNode);
	209	theNewNode->setSon(node1);
	210	node2->setFather(theNewNode);
	211	theNewNode->setSon(node2);
	212
	213	// remove from son list of father node.
	214	fatherNode->removeSon(node1);
	215
	216	fatherNode->removeSon(node2);
	217	return theNewNode;
	218	}
	219
	220	void NJalg::update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values,
	221	vector<tree::nodeP>& currentNodes) {
	222	// update the distance of the 3 taxa that are left in the end, to the root.
	223
	224	MDOUBLE dis0root = distanceTable[0][1]/2+0.5*(r_values[0]-r_values[1]);
	225	MDOUBLE dis1root = distanceTable[0][1]/2+0.5*(r_values[1]-r_values[0]);
	226	MDOUBLE dis2root = distanceTable[0][2]/2+0.5*(r_values[2]-r_values[0]);
	227	if (dis0root<tree::SHORT_LENGTH_VALUE) dis0root=tree::SHORT_LENGTH_VALUE; // no negative..
	228	if (dis1root<tree::SHORT_LENGTH_VALUE) dis1root=tree::SHORT_LENGTH_VALUE; // no negative..
	229	if (dis2root<tree::SHORT_LENGTH_VALUE) dis2root=tree::SHORT_LENGTH_VALUE; // no negative..
	230	currentNodes[0]->setDisToFather(dis0root);
	231	currentNodes[1]->setDisToFather(dis1root);
	232	currentNodes[2]->setDisToFather(dis2root);
	233	}
	234
	235	void NJalg::UpdateDistanceTableAndCurrentNodes(vector<tree::nodeP>& currentNodes,
	236	VVdouble& distanceTable,
	237	tree::nodeP nodeI,
	238	tree::nodeP nodeJ,
	239	tree::nodeP theNewNode,
	240	int Iplace,
	241	int Jplace) {
	242	// Iplace is the place of i in the "old" currentNodes vector
	243	int i,j;
	244	// updating currentNodes
	245	vector<tree::nodeP> newCurrentNode= currentNodes;
	246
	247	vector<tree::nodeP>::iterator vec_iter1=remove(
	248	newCurrentNode.begin(),newCurrentNode.end(),nodeI );
	249	newCurrentNode.erase(vec_iter1,newCurrentNode.end());
	250
	251	vector<tree::nodeP>::iterator vec_iter2=remove(
	252	newCurrentNode.begin(),newCurrentNode.end(),nodeJ );
	253	newCurrentNode.erase(vec_iter2,newCurrentNode.end());
	254
	255	newCurrentNode.push_back(theNewNode);
	256
	257	map<tree::nodeP,int> nodeIntMap1;
	258	for (int z=0; z<currentNodes.size();++z) {
	259	nodeIntMap1.insert(map<tree::nodeP,int>::value_type(currentNodes[z],z));
	260	}
	261
	262	VVdouble newDisTable;
	263	newDisTable.resize(newCurrentNode.size());
	264	for (int z1=0;z1<newDisTable.size();++z1) newDisTable[z1].resize(newCurrentNode.size(),0.0);
	265
	266	// updatine the table
	267	for (i=0; i < newCurrentNode.size(); i++) {
	268	for (j=i+1; j < newCurrentNode.size() ; j++) {
	269	if ((i!=newCurrentNode.size()-1) && (j!=newCurrentNode.size()-1)) {// both old nodes
	270	int oldI = nodeIntMap1[newCurrentNode[i]];
	271	int oldJ = nodeIntMap1[newCurrentNode[j]];
	272	MDOUBLE dis= (oldI<oldJ) ? distanceTable[oldI][oldJ] : distanceTable[oldJ][oldI];
	273	newDisTable[i][j] = dis;
	274	} //else if (i==newCurrentNode.size()-1) { // i is new
	275	// newDisTable[i][j] = (dis(Iplace,NewOldPlaces[j])+dis(Jplace,NewOldPlaces[j])-dis(Iplace,Jplace))/2.0;
	276	//}
	277	else if (j==newCurrentNode.size()-1) { // j is new
	278	int oldI = Iplace;
	279	int oldJ = Jplace;
	280	int oldK = nodeIntMap1[newCurrentNode[i]];
	281	MDOUBLE disIK= (oldI<oldK) ? distanceTable[oldI][oldK] : distanceTable[oldK][oldI];
	282	MDOUBLE disIJ= (oldI<oldJ) ? distanceTable[oldI][oldJ] : distanceTable[oldJ][oldI];
	283	MDOUBLE disJK= (oldJ<oldK) ? distanceTable[oldJ][oldK] : distanceTable[oldK][oldJ];
	284	newDisTable[i][j] = 0.5*(disIK+disJK-disIJ); //EQ. 43 SWOFFORD PAGE 489.
	285	}
	286	}
	287	}
	288
	289	currentNodes=newCurrentNode;
	290	distanceTable=newDisTable;
	291	}
	292
	293	/*
	294	NJalg::NJalg(){
	295	_myET = NULL;
	296	}
	297
	298
	299
	300	//-----------------------------
	301	// The algorithm
	302	//-----------------------------
	303
	304	void NJalg::GetDisTable(const sequenceContainer& sd,const vector<MDOUBLE> * weights) {
	305
	306	VVresize(_startingDistanceTable,distanceTable.size(),distanceTable.size());// for printing stuff later.
	307	VVresize(LTable,distanceTable.size(),distanceTable.size());// for printing stuff later.
	308
	309	int i,j;
	310	_nodeNames.resize(currentNodes.size());
	311	for ( i=0; i < currentNodes.size(); i++) {
	312	_nodeNames[i] =(currentNodes[i]->name());
	313	for ( j=i+1; j < currentNodes.size(); j++) {
	314	MDOUBLE tempDis = -2000.0;
	315	MDOUBLE resLikelihood;
	316	int seqnodeI_ID = sd.getId(currentNodes[i]->name());
	317	int seqnodeJ_ID = sd.getId(currentNodes[j]->name());
	318	const sequence& snodeI = *sd.getSeqPtr(seqnodeI_ID,true);
	319	const sequence& snodeJ = *sd.getSeqPtr(seqnodeJ_ID,true);
	320	tempDis = _cd->giveDistance(snodeI,snodeJ,weights,&resLikelihood);
	321	distanceTable[i][j] = tempDis;
	322	LTable[i][j] = resLikelihood;
	323	}
	324	}
	325	if (myLog::LogLevel()>4) {
	326	for (i=0; i < currentNodes.size(); i++) {
	327	for (j=i+1; j < currentNodes.size(); j++) {
	328	LOG(100,<<"nj distance ["<<i<<"]["<<j<<"] ="<<distanceTable[i][j]<<endl);
	329	}
	330	}
	331	}
	332	//if (myLog::LogLevel()>4) {
	333	// for (i=0; i < currentNodes.size(); i++) {
	334	// for (j=i+1; j < currentNodes.size(); j++) {
	335	// LOG(4,<<"nj likelihood for distance["<<i<<"]["<<j<<"] ="<<LTable[i][j]<<endl);
	336	// }
	337	// }
	338	//}
	339	// for printing stuff later.
	340	for (int tmp1=0; tmp1<distanceTable.size();++tmp1)
	341	for (int tmp2=0; tmp2<distanceTable.size();++tmp2)
	342	_startingDistanceTable[tmp1][tmp2] = distanceTable[tmp1][tmp2];
	343	}
	344
	345
	346
	347
	348
	349
	350	void NJalg::NJiterate() {
	351	getMmatrixFromDistanceTable();
	352	int minRaw,minCol;
	353	findMinM(minRaw,minCol);
	354
	355	tree::nodeP nodeI = currentNodes[minRaw];
	356	tree::nodeP nodeJ = currentNodes[minCol];
	357	tree::nodeP theNewNode;
	358	theNewNode= SeparateNodes(nodeI,nodeJ);
	359
	360	//CHECK4
	361
	362	updateBranchDistance(theNewNode,nodeI,nodeJ,minRaw,minCol);
	363	//CHECK6
	364
	365	UpdateDistanceTableAndCurrentNodes(nodeI,nodeJ,theNewNode,minRaw,minCol);
	366	}
	367
	368
	369
	370
	371
	372
	373
	374
	375
	376
	377
	378
	379
	380
	381
	382	//CHECK1
	383	//cout<<"\n-----------------------------------------------"<<endl;
	384	//for (int h=0; h < currentNodes.size(); h++) cout<<currentNodes[h]->name()<<" = "<<h<<endl;
	385
	386	//CHECK2
	387	// for (int i =0; i < r_values.size();++i) cout<<"r["<<i<<"] = "<<r_values[i]<<endl;
	388
	389	//CHECK3
	390	// for (i =0; i < currentNodes.size();++i)
	391	// for (int j =i+1; j <currentNodes.size();++j)
	392	// cout<<"M["<<i<<"]["<<j<<"] = "<<Mmatrix[i][j]<<endl;
	393
	394	//CHECK4
	395	// string htuname = "HTU";
	396	// char k = 'a'+currentNodes.size();
	397	// htuname+=k;
	398	// theNewNode->SetName(htuname);
	399
	400	//CHECK5
	401	//_myET->getRoot()->SetName("RootOfStar");
	402
	403	//CHECK6
	404	// et.output(cout,et.getRoot(),tree::ANCESTOR);
	405
	406
	407
	408
	409
	410	*/

+90

-0

libs/phylogeny/nj.h less more

	0	// $Id: nj.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	// version 1.00
	3	// last modified 3 Nov 2002
	4
	5	#ifndef ___NJ
	6	#define ___NJ
	7	#include "definitions.h"
	8	#include "tree.h"
	9	#include "sequenceContainer.h"
	10	#include "njConstrain.h"
	11	#include "distances2Tree.h"
	12	using namespace std;
	13
	14	class NJalg : public distances2Tree {
	15	public:
	16	virtual NJalg* clone() const {return new NJalg(*this);}
	17	// changed from computeNJtree to computeTree for competability to "distances2Tree"
	18	virtual tree computeTree(VVdouble distances, const vector<string>& names, const tree * const constriantTree = NULL);
	19	tree startingTree(const vector<string>& names);
	20	tree startingTree(const tree& inTree);
	21	void NJiterate(tree& et,vector<tree::nodeP>& currentNodes,
	22	VVdouble& distanceTable);
	23	void NJiterate(tree& et,vector<tree::nodeP>& currentNodes,
	24	VVdouble& distanceTable, njConstraint& njc);
	25	void calc_M_matrix(vector<tree::nodeP>& currentNodes,
	26	const VVdouble& distanceTable,
	27	const Vdouble & r_values,
	28	int& minRaw,int& minCol);
	29	void calc_M_matrix(vector<tree::nodeP>& currentNodes,
	30	const VVdouble& distanceTable,
	31	const Vdouble & r_values,
	32	int& minRaw,int& minCol, const njConstraint& njc);
	33	Vdouble calc_r_values(vector<tree::nodeP>& currentNodes,const VVdouble& distanceTable);
	34	tree::nodeP SeparateNodes(tree& et,tree::nodeP node1,tree::nodeP node2);
	35	void update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values,vector<tree::nodeP>& currentNodes);
	36	void updateBranchDistance(const VVdouble& disT,
	37	const Vdouble& rValues,
	38	tree::nodeP nodeNew,
	39	tree::nodeP nodeI,
	40	tree::nodeP nodeJ,
	41	int Iplace, int Jplace);
	42
	43	void UpdateDistanceTableAndCurrentNodes(vector<tree::nodeP>& currentNodes,
	44	VVdouble& distanceTable,
	45	tree::nodeP nodeI,
	46	tree::nodeP nodeJ,
	47	tree::nodeP theNewNode,
	48	int Iplace, int Jplace);
	49
	50	};
	51
	52	/*
	53	//explicit NJalg(const tree& inTree, const computeDistance* cd);
	54	explicit NJalg();
	55	tree getNJtree() const {return *_myET;}// return a copy...
	56	void computeTree(const sequenceContainer& sd,const computeDistance* cd,const vector<MDOUBLE> * weights = NULL);
	57	VVdouble getDistanceTable(vector<string>& names) {
	58	names.erase(names.begin(),names.end());
	59	names = _nodeNames;
	60	return _startingDistanceTable;}
	61	VVdouble getLTable(vector<string>& names) {
	62	names.erase(names.begin(),names.end());
	63	names = _nodeNames;
	64	return LTable;}
	65	private:
	66	//void starTreeFromInputTree(const tree& inTree);
	67	void starTreeFromInputsequenceContainer(const sequenceContainer& sd);
	68	void GetDisTable(const sequenceContainer& sd,const vector<MDOUBLE> * weights);
	69	MDOUBLE dis(const int i, const int j) const{
	70	return (i<j) ? distanceTable[i][j] : distanceTable[j][i];
	71	}
	72	void findMinM(int& minRaw,int& minCol);
	73
	74
	75	tree* _myET;
	76	VVdouble distanceTable;
	77	VVdouble Mmatrix;
	78	Vdouble r_values;
	79	vector<tree::nodeP> currentNodes;
	80	const computeDistance* _cd;
	81
	82	VVdouble _startingDistanceTable; // for printing etc... not used by the algorithm.
	83	vector<string> _nodeNames; // for printing etc... not used by the algorithm.
	84	VVdouble LTable;// for printing etc... not used by the algorithm.
	85
	86	*/
	87	#endif
	88
	89

+130

-0

libs/phylogeny/njConstrain.cpp less more

	0	// $Id: njConstrain.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include <cassert>
	4	#include "njConstrain.h"
	5	#include "logFile.h"
	6
	7
	8
	9	njConstraint::njConstraint(const tree& starttree, const tree& constraintTree):_cTree(constraintTree), _interTreeMap(){
	10	vector<tree::nodeP> currentNodes;
	11	starttree.getAllLeaves(currentNodes,starttree.getRoot());
	12	vector<tree::nodeP> constraintNodes;
	13	_cTree.getAllLeaves(constraintNodes,_cTree.getRoot());
	14	assert(currentNodes.size()==constraintNodes.size());
	15
	16	map<string,tree::nodeP> name2Node;
	17	for (vector<tree::nodeP>::iterator vec_iter=constraintNodes.begin();vec_iter!=constraintNodes.end();++vec_iter){
	18	// name2Node[test];//=*vec_iter;
	19	name2Node[(vec_iter)->name()]=vec_iter;
	20	}
	21
	22	for (vector<tree::nodeP>::iterator vec_iter2=currentNodes.begin();vec_iter2!=currentNodes.end();++vec_iter2){
	23	assert(name2Node.find((*vec_iter2)->name()) != name2Node.end()); // cant find the taxa in the constratin tree!
	24	_interTreeMap[vec_iter2]=name2Node[(vec_iter2)->name()];
	25	}
	26	}
	27
	28
	29	bool njConstraint::isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose) const
	30	{
	31	bool compatible;
	32	assert( _interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map!
	33	assert( _interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map!
	34
	35	tree::nodeP s1=_interTreeMap.find(n1)->second;
	36	tree::nodeP s2=_interTreeMap.find(n2)->second;
	37
	38	if (s1==_cTree.getRoot()) { // we are asking undirected questions from a directed tree
	39	compatible = (s2 != _cTree.getRoot()) && (s2->father() != _cTree.getRoot()) && (s2->father()->father() == _cTree.getRoot());
	40	if (verbose) LOG(11,<<"isCompatible - s1 is root"<<endl);
	41	} else if (s2==_cTree.getRoot()) { // we are asking undirected questions from a directed tree
	42	compatible = (s1 != _cTree.getRoot()) && (s1->father() != _cTree.getRoot()) && (s1->father()->father() == _cTree.getRoot());
	43	if (verbose) LOG(11,<<"isCompatible - s2 is root"<<endl);
	44	} else {
	45	compatible = (s1->father()==s2->father());
	46	}
	47
	48	if (verbose) LOG(11,<<"isCompatible:" <<s1->name()<<" + "<<s2->name()<<"-->" <<compatible<< endl);
	49	return (compatible);
	50	}
	51
	52	tree::nodeP joinNodesToSubtree(tree& t,tree::nodeP& s1, tree::nodeP& s2)
	53	{
	54	assert (s1->father()==s2->father()); // we can only do this if both nodes have same father
	55
	56	LOG(10,<<endl<<s1->name()<<" and "<<s2->name()<<endl);
	57
	58	tree::nodeP fatherNode=s1->father();
	59
	60	if (fatherNode->getNumberOfSons()==2) {
	61	// fatherNode->sons.clear();
	62	return (fatherNode); // no splitting needed
	63	}
	64
	65	if (s1->father()==t.getRoot() && t.getRoot()->getNumberOfSons()==3) { // no split needed, but the root needs to change
	66
	67	LOG(10,<<"************************* spacial case of constratin join"<<endl);
	68	LOGDO(10,t.output(myLog::LogFile(),tree::ANCESTORID));
	69	LOG(10,<<endl<<s1->name()<<" and "<<s2->name()<<endl);
	70	LOG(10,<<endl<<s1->father()->name()<<" and father "<<s2->father()->name()<<endl);
	71
	72	tree::nodeP newFatherNode = s1->father();
	73	for (int i=0; i<3; ++i)
	74	if (t.getRoot()->getSon(i)!= s1 && t.getRoot()->getSon(i)!= s2){
	75	t.rootAt(t.getRoot()->getSon(i));
	76	LOGDO(10,t.output(myLog::LogFile(),tree::ANCESTORID));
	77	LOG(10,<<endl<<endl);
	78	return (newFatherNode); // this is the new root;
	79	}
	80	}
	81
	82	tree::nodeP newNode = t.createNode(fatherNode, t.getNodesNum());
	83	newNode->setSon(s1);
	84	newNode->setSon(s2);
	85	newNode->claimSons();
	86
	87
	88	int k = fatherNode->getNumberOfSons();
	89	fatherNode->removeSon(s1);
	90	fatherNode->removeSon(s2);
	91	assert (k=fatherNode->getNumberOfSons()+2); // both s1 and s2 should have been skiped
	92	// fatherNode->sons.resize(k);
	93
	94	t.updateNumberofNodesANDleaves();
	95	t.create_names_to_internal_nodes();
	96	return(newNode);
	97	}
	98
	99	void njConstraint::join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather)
	100	{
	101	assert(_interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map!
	102	assert(_interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map!
	103	assert(_interTreeMap.find(newFather) == _interTreeMap.end()); // should not find the new father in the map!
	104	assert(isCompatible(n1,n2));
	105
	106	// tree::nodeP origFather=_interTreeMap.find(n1)->father();
	107
	108	// do tree things
	109	LOG(10,<<endl<<n1->name()<<" AND "<<n2->name()<<endl);
	110	tree::nodeP newNode=joinNodesToSubtree(_cTree, _interTreeMap[n1], _interTreeMap[n2]);
	111
	112
	113	_interTreeMap.erase(n1);
	114	_interTreeMap.erase(n2);
	115	_interTreeMap[newFather]=newNode;
	116
	117
	118	LOGDO(17,_cTree.output(myLog::LogFile()));
	119
	120	}
	121	void njConstraint::output(ostream &out) const{
	122	_cTree.output(out,tree::ANCESTORID);
	123	out <<endl;
	124	}
	125
	126	ostream &operator<<(ostream &out, const njConstraint &c){
	127	c.output(out);
	128	return(out);
	129	}

+29

-0

libs/phylogeny/njConstrain.h less more

	0	// $Id: njConstrain.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___NJ_CONSTRAINT
	3	#define ___NJ_CONSTRAINT
	4
	5	#include <map>
	6
	7
	8	#include "sequenceContainer.h"
	9	#include "tree.h"
	10	using namespace std;
	11
	12	class njConstraint {
	13	public:
	14	njConstraint(const tree& starttree, const tree& constraintTree);
	15	bool isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose=false) const;
	16	void join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather);
	17	void output(ostream &out) const;
	18
	19	private:
	20	tree _cTree; // constriant tree
	21	map<tree::nodeP,tree::nodeP> _interTreeMap;
	22
	23
	24	};
	25
	26	ostream &operator<<(ostream &out, const njConstraint &c);
	27
	28	#endif // ___NJ_CONSTRAINT

+67

-0

libs/phylogeny/normalDist.cpp less more

	0	// $Id: normalDist.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "normalDist.h"
	2	#include <cmath>
	3
	4	/*
	5	This function evaluates the standard normal density function-N(0,1):
	6	integral from -infinity to x over exp(-.5t^2/sqrt(2pi)) (copied from the web) using
	7	Milton Abramowiz and Irene A Stegun.
	8	Handbook of Mathematical Functions.
	9	National Bureau of Standards, 1964.
	10	*/
	11	MDOUBLE Phi(MDOUBLE x)
	12	{
	13	if (x>6.0) return 1;
	14	if (x<-6.0) return 0;
	15	MDOUBLE b1=0.31938153;
	16	MDOUBLE b2=-0.356563782;
	17	MDOUBLE b3=1.781477937;
	18	MDOUBLE b4=-1.821255978;
	19	MDOUBLE b5=1.330274429;
	20	MDOUBLE p=0.2316419;
	21	MDOUBLE c2=0.3989423;
	22	MDOUBLE a=fabs(x);
	23	MDOUBLE t=1.0/(1.0+a*p);
	24	MDOUBLE b=c2exp((-x)(x/2.0));
	25	MDOUBLE n=((((b5t+b4)t+b3)t+b2)t+b1)*t;
	26	n=1.0-b*n;
	27	if (x<0.0) n=1.0-n;
	28	return n;
	29	}
	30
	31	/*
	32	Computes the inverse normal distribution function (downloaded from the web)
	33	i.e. computes x when c=Phi(x)
	34	*/
	35	MDOUBLE normsinv(MDOUBLE p)
	36	{
	37	if (p<EPSILON) return VERYSMALL;
	38	if ((1-p)<EPSILON)return VERYBIG;
	39	MDOUBLE x(0.0);
	40	MDOUBLE q, r;
	41	if ((0 < p ) && (p < P_LOW))
	42	{
	43	q = sqrt(-2*log(p));
	44	x = (((((C1q+C2)q+C3)q+C4)q+C5)q+C6) / ((((D1q+D2)q+D3)q+D4)*q+1);
	45	}
	46	else
	47	{
	48	if ((P_LOW <= p) && (p <= P_HIGH))
	49	{
	50	q = p - 0.5;
	51	r = q*q;
	52	x = (((((A1r+A2)r+A3)r+A4)r+A5)r+A6)q /(((((B1r+B2)r+B3)r+B4)r+B5)*r+1);
	53	}
	54	else
	55	{
	56	if ((P_HIGH < p)&&(p < 1))
	57	{
	58	q = sqrt(-2*log(1-p));
	59	x = -(((((C1q+C2)q+C3)q+C4)q+C5)q+C6) / ((((D1q+D2)q+D3)q+D4)*q+1);
	60	}
	61	}
	62	}
	63	return x;
	64	}
	65
	66

+35

-0

libs/phylogeny/normalDist.h less more

	0	// $Id: normalDist.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef ___NORMAL_DIST
	2	#define ___NORMAL_DIST
	3
	4	#include "definitions.h"
	5
	6
	7	#define A1 (-3.969683028665376e+01)
	8	#define A2 2.209460984245205e+02
	9	#define A3 (-2.759285104469687e+02)
	10	#define A4 1.383577518672690e+02
	11	#define A5 (-3.066479806614716e+01)
	12	#define A6 2.506628277459239e+00
	13	#define B1 (-5.447609879822406e+01)
	14	#define B2 1.615858368580409e+02
	15	#define B3 (-1.556989798598866e+02)
	16	#define B4 6.680131188771972e+01
	17	#define B5 (-1.328068155288572e+01)
	18	#define C1 (-7.784894002430293e-03)
	19	#define C2 (-3.223964580411365e-01)
	20	#define C3 (-2.400758277161838e+00)
	21	#define C4 (-2.549732539343734e+00)
	22	#define C5 4.374664141464968e+00
	23	#define C6 2.938163982698783e+00
	24	#define D1 7.784695709041462e-03
	25	#define D2 3.224671290700398e-01
	26	#define D3 2.445134137142996e+00
	27	#define D4 3.754408661907416e+00
	28	#define P_LOW 0.02425
	29	/* P_high = 1 - p_low*/
	30	#define P_HIGH 0.97575
	31
	32	MDOUBLE Phi(MDOUBLE x);
	33	MDOUBLE normsinv(MDOUBLE p);
	34	#endif

+5

-0

libs/phylogeny/nucJC.cpp less more

	0	// $Id: nucJC.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "nucJC.h"
	3
	4

+53

-0

libs/phylogeny/nucJC.h less more

	0	// $Id: nucJC.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___NUC_JC
	3	#define ___NUC_JC
	4
	5	#include <cmath>
	6	#include "replacementModel.h"
	7
	8	namespace nucDef {
	9	const MDOUBLE Alp = 4.0;
	10	const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
	11	const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
	12	const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
	13	const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
	14	}
	15
	16	class nucJC : public replacementModel {
	17	public:
	18	const int alphabetSize() const {return 4;}
	19
	20	virtual replacementModel* clone() const { return new nucJC(*this); }
	21
	22	explicit nucJC(){};
	23	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	24	// return ((i==j) ? 0.25+0.75exp(-4.0/3.0d): 0.25-0.25exp(-4.0/3.0d));
	25	return ((i==j) ? nucDef::odAl+nucDef::om_odAlexp(nucDef::m_alDiv_omalpd): nucDef::odAl-nucDef::odAlexp(nucDef::m_alDiv_omalpd));
	26	}
	27
	28	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	29	// return ((i==j) ? -exp(-4.0/3.0d): exp(-4.0/3.0d)/3.0);
	30	return ((i==j) ? -exp(nucDef::m_alDiv_omalpd): exp(nucDef::m_alDiv_omalpd)/(nucDef::Alp-1));
	31	}
	32	const MDOUBLE freq(const int i) const {return 0.25;};
	33
	34	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	35	// return ((i==j) ? 4.0/3.0exp(-4.0/3.0d): -4.0/3.0exp(-4.0/3.0d));
	36	return ((i==j) ? nucDef::alDiv_omalpexp(nucDef::m_alDiv_omalpd): nucDef::m_alDiv_omalpexp(nucDef::m_alDiv_omalpd));
	37	}
	38
	39	const MDOUBLE Q(const int i, const int j) const {
	40	return ((i == j) ? ( - 1.0) : (1.0 / 3.0));
	41	}
	42
	43
	44	};
	45
	46	#endif
	47
	48	// note: according to the new C++ rules, the clone function should be like this:
	49	// virtual nucJC* clone() const { return new nucJC(*this); }
	50	// however, not all compiler support it yet. look at More Effective C++ page 126.
	51
	52

+122

-0

libs/phylogeny/nucleotide.cpp less more

	0	// $Id: nucleotide.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "nucleotide.h"
	3	#include "errorMsg.h"
	4
	5
	6	nucleotide::nucleotide() {
	7	_relation.resize(4);
	8	for (int i=0; i < _relation.size(); ++i) {
	9	_relation[i].resize(16);
	10	}
	11	for (int s=0;s<4;++s) {
	12	for (int t=0;t<16;++t){
	13	_relation[s][t]=relationsInternal(s,t);
	14	}
	15	}
	16	}
	17
	18	int nucleotide::fromChar(const string& str, const int pos) const {
	19	return fromChar(str[pos]);
	20	}
	21
	22	vector<int> nucleotide::fromString(const string &str) const {
	23	vector<int> vec;
	24	for (int i=0;i<str.size();i++)
	25	vec.push_back(fromChar(str[i]));
	26	return vec;
	27	}
	28
	29	int nucleotide::fromChar(const char s) const {
	30	switch (s) {
	31	case 'A' : case'a' : return 0 ; break;// A = adenine
	32	case 'C' : case'c' : return 1 ; break;// C = cytosine
	33	case 'G' : case'g' : return 2 ; break;// G = guanine
	34	case 'T' : case't' : return 3 ; break;// T = thymine
	35	case 'U' : case'u' : return 4 ; break; // U = uracil
	36	case 'R' : case'r' : return 5 ; break;// R = purine (same as [GA])
	37	case 'Y' : case'y' : return 6 ; break;// Y = pyrimidine (same as [TC])
	38	case 'K' : case'k' : return 7 ; break;// K = keto (same as [GT])
	39	case 'M' : case'm' : return 8 ; break;// M = amino (same as [AC])
	40	case 'S' : case's' : return 9 ; break;// S = strong (same as [GC])
	41	case 'W' : case'w' : return 10; break;// W = weak (same as [AT])
	42	case 'B' : case'b' : return 11; break;// B = (same as [GTC])
	43	case 'D' : case'd' : return 12; break;// D = (same as [GAT])
	44	case 'H' : case'h' : return 13; break;// H = (same as [ACT])
	45	case 'V' : case'v' : return 14; break;// V = (same as [GCA])
	46	case 'N' : case'n' : return 15; break;// N = any (same as [ACGT])
	47	case '?' : case'*' : return 15; break;
	48	case '-' : case'_' : return -1; break;
	49	case 'x' : case'X' : return 15; break;
	50	case '.' : return -3; break; // . is used in some sequence files as the character just in the line above...
	51	default:
	52	vector<string> err;
	53	err.push_back(" The nucleotide sequences contained the character: ");
	54	err[0]+=s;
	55	err.push_back(" The nucleotide was not one of the following: ");
	56	err.push_back("A, C, G, T, X, -, ?");
	57	err.push_back("a, c, g, t, x, _, *");
	58	errorMsg::reportError(err);
	59	}
	60	return -99;
	61	}
	62
	63	string nucleotide::fromInt(const int id) const {
	64	char x= fromIntInternal(id);
	65	string res;
	66	res.append(1,x);
	67	return res;
	68	}
	69
	70	char nucleotide::fromIntInternal(const int in_id) const {
	71	switch (in_id) {
	72	case 0 : return 'A' ; break;
	73	case 1 : return 'C' ; break;
	74	case 2 : return 'G' ; break;
	75	case 3 : return 'T' ; break;
	76	case -1: return '-' ; break;
	77	case 4 : return 'U'; break;
	78	case 5 : return 'R'; break;
	79	case 6 : return 'Y'; break;
	80	case 7 : return 'K'; break;
	81	case 8 : return 'M'; break;
	82	case 9 : return 'S'; break;
	83	case 10 : return 'W'; break;
	84	case 11 : return 'B'; break;
	85	case 12 : return 'D'; break;
	86	case 13 : return 'H'; break;
	87	case 14 : return 'V'; break;
	88	case 15 : return 'N'; break;
	89	default:
	90	vector<string> err;
	91	err.push_back(" unable to print nucleotide. nucleotide was not one of the following: ");
	92	err.push_back("A, C, G, T, -, ?");
	93	err.push_back("a, c, g, t, _, *");
	94	errorMsg::reportError(err); // make the program quit
	95	}//end of switch
	96	return '!' ; // for the lousy compiler
	97	}
	98
	99	int nucleotide::relationsInternal(const int ctc,const int charInSeq
	100	) const{ //ctc=charToCheck
	101	switch (charInSeq){
	102	case 0 : if (ctc==0) return 1 ; break;// A = adenine
	103	case 1 : if (ctc==1) return 1 ; break;// C = cytosine
	104	case 2 : if (ctc==2) return 1 ; break;// G = guanine
	105	case 3 : if (ctc==3) return 1 ; break;// T = thymine
	106	case 4 : if (ctc==4) return 1 ; break; // U = uracil
	107	case 5 : if (ctc==2\|\|ctc==0) return 1 ; break;// R = purine (same as [GA])
	108	case 6 : if (ctc==3\|\|ctc==1) return 1 ; break;// Y = pyrimidine (same as [TC])
	109	case 7 : if (ctc==2\|\|ctc==3) return 1 ; break;// K = keto (same as [GT])
	110	case 8 : if (ctc==0\|\|ctc==1) return 1 ; break;// M = amino (same as [AC])
	111	case 9 : if (ctc==2\|\|ctc==1) return 1 ; break;// S = (same as [GC])
	112	case 10: if (ctc==0\|\|ctc==3) return 1 ; break;// W = (same as [AT])
	113	case 11: if (ctc==2\|\|ctc==3\|\|ctc==1) return 1 ; break;// B = (same as [GTC])
	114	case 12: if (ctc==2\|\|ctc==0\|\|ctc==3) return 1 ; break;// D = (same as [GAT])
	115	case 13: if (ctc==0\|\|ctc==1\|\|ctc==3) return 1 ; break;// H = (same as [ACT])
	116	case 14: if (ctc==2\|\|ctc==1\|\|ctc==0) return 1 ; break;// V = (same as [GCA])
	117	case 15: if (ctc==0\|\|ctc==1\|\|ctc==2\|\|ctc==3) return 1 ; break;// N = any (same as [ACGT])
	118	};
	119	return 0;
	120	};
	121

+110

-0

libs/phylogeny/nucleotide.h less more

	0	// $Id: nucleotide.h 1901 2007-03-15 13:21:06Z nimrodru $
	1
	2	#ifndef ___NUCLEOTIDE_H
	3	#define ___NUCLEOTIDE_H
	4
	5	#include <cassert>
	6	#include "definitions.h"
	7	#include "alphabet.h"
	8
	9	/* =======================================================================
	10	This is the nucleotide class. It is derived from the class alphabet.
	11	All alphabets are internally stored as integers. So what has to implement
	12	is a way to translate from strings to array (vector) of integers and back.
	13
	14	Starting with the easiest functions to explain:
	15	size() gives the size of the alphabet: 4 in this case.
	16	stringSize() say if it is a one letter code (unlike codon which is 3 letters code).
	17
	18	clone() is a general machanism in C++. The idea is that if you have a derived class,
	19	and a pointer to the base class, and you want to self-copy the derived class.
	20	In such case you use the clone() machanism. Ref: More effective C++ page. 126.
	21
	22	int unknown(): sometimes one doesn't know if it is A, C, G, or T. In such case we use
	23	the int that represents unknown. In this class it is set to 15. This is used for example
	24	when gap characters are converted to unknown characters.
	25
	26
	27	int fromChar(const string& str, const int pos) and int fromChar(const char s)
	28	give the same answer: there is a map from integers to characters.
	29	For example, A is zero, C is 1, etc. However, the function fromChar(const char s)
	30	is specific to nucleotide and to amino - because these are one letter alphabet.
	31	For codon - this function won't work. This is why the general function is
	32	in the form int fromChar(const string& str, const int pos);
	33	In the case of codon - it will read 3 letters each time.
	34	=========================================================================*/
	35
	36
	37
	38	class nucleotide : public alphabet {
	39	public:
	40	explicit nucleotide();
	41	virtual ~nucleotide() {}
	42	virtual alphabet* clone() const { return new nucleotide(*this); }
	43	int unknown() const {return 15;}
	44	int gap() const {return -1;}
	45	int size() const {return 4;}
	46	int stringSize() const {return 1;} // one letter code.
	47
	48	int fromChar(const string& str, const int pos) const;
	49	int fromChar(const char s) const;
	50	vector<int> fromString(const string& str) const;
	51
	52	string fromInt(const int id) const;
	53	int relations(const int charInSeq, const int charToCheck) const{ // see explanation below
	54	assert (charInSeq != -1);//gaps in the sequences
	55	return _relation[charToCheck][charInSeq];
	56	}
	57
	58	// "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	59	// in this speical case, in fact it will be true also for U which is coded by 4.
	60	// this is why it is <= size.
	61	bool isSpecific(const int id) const {return (id>=0 && id <= size());}
	62
	63	private:
	64	VVint _relation;
	65	char fromIntInternal(const int in_id) const;
	66	int relationsInternal(const int ctc,const int charInSeq) const;
	67
	68	};
	69
	70
	71
	72	#endif
	73
	74
	75	// Explanation about relations:
	76	// Sometimes the sequences contain letters like R which means G or A.
	77	// When calculating the likelihood of such sequences we have to take this into acount.
	78	// For example, the tree : A
	79	/* / \
	80	t1 / \ t2
	81	/ \
	82	R A
	83
	84	L = P(A)P(A->A)(t1)P(A->A)(t2) + P(A)P(A->G)(t1)P(A->A)(t2)
	85	= P(A)P(A->A)(t2) [ P(A->A)(t1) + P(A->G)(t1) ]
	86
	87	Note that we don't divide it by 2.
	88
	89	VVint _relation keeps this information :
	90
	91	A C G T
	92	A 1 0 0 0
	93	C 0 1 0 0
	94	G 0 0 1 0
	95	T 0 0 0 1
	96	U 0 0 0 1
	97	R 1 0 1 0
	98	Y 0 1 0 1
	99	K
	100	.
	101	.
	102	.
	103	*/
	104
	105
	106
	107
	108
	109

+498

-0

libs/phylogeny/numRec.cpp less more

	0	// $Id: numRec.cpp 5990 2009-03-19 10:21:20Z privmane $
	1
	2	#include "numRec.h"
	3	#include "matrixUtils.h"
	4	#include <cassert>
	5	#include <algorithm>
	6
	7	#ifndef VERBOS
	8	#define VERBOS
	9	#endif
	10
	11	void validateSym(VVdouble & v) {
	12	const MDOUBLE epsilon = 0.00000001;
	13	for (int i=0; i < v.size(); ++i) {
	14	for (int j=i+1; j < v.size(); ++j) {
	15	if (fabs(v[i][j] - v[j][i])> epsilon) {
	16	LOG(5,<<"v["<<i<<"]["<<j<<"]="<<v[i][j]<<endl);
	17	LOG(5,<<"v["<<j<<"]["<<i<<"]="<<v[j][i]<<endl);
	18
	19	errorMsg::reportError("trying to find eigen values to non-sym matrix");
	20	}
	21	else v[i][j] = v[j][i];
	22	}
	23	}
	24	}
	25
	26	int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues) {
	27	validateSym(Insym);
	28	const int MaxNumberOfSweeps = 100000;
	29	VVdouble& v = RightEigenV;
	30	VVdouble& a = Insym;
	31	Vdouble& d = EigenValues;
	32	//CheckSizeAndTypeAndResizeIfNessary();
	33	int i,j;
	34	const int size = v.size();
	35
	36	// preparing V to be the indentity matrix
	37	for (i=0; i<size; ++i) {
	38	for (int j=0; j<size ; ++j) v[i][j]=0.0;
	39	v[i][i] = 1.0;
	40	}
	41
	42
	43	for (i=0 ; i<size; ++i ) {
	44	d[i] = a[i][i];
	45	}
	46
	47	MDOUBLE sm = 0.0; // sm is the sum of the off-diagonal elements
	48	int ip, iq;
	49	for (i = 0; i< MaxNumberOfSweeps ; ++i) {
	50	sm = 0.0;
	51	for (ip = 0; ip<size ; ++ip) {
	52	for (iq = ip+1; iq <size; ++iq) sm +=fabs (a[ip][iq]);
	53	}
	54	//if(i%300==0)
	55	// LOG(5,<<"sm= "<<sm<<endl);
	56	if (sm == 0.0) return 0; // the program is suppose to return here, after some rounds of i.
	57	MDOUBLE tresh;
	58	if (i<3) tresh = 0.2 * sm / (size*size); else tresh = 0.0;
	59
	60	MDOUBLE g;
	61	for (ip=0 ; ip<size; ++ip) {
	62	for (iq = ip+1 ; iq<size; ++iq) {
	63	g = 100.0*fabs(a[ip][iq]);
	64
	65	#ifdef VERBOS
	66	if (g<10e-50) {
	67	LOG(5,<<"small g!"<<endl);
	68	if ((i>3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])))==false) {
	69	LOG(5,<<"g is small: "<<g<< "yes, it is not zeroed"<<endl);
	70	LOG(5,<<"because d[ip] is: "<<d[ip]<<" and d[iq] is: "<<d[iq]<<endl);
	71	LOG(5,<<"ip is: "<<ip<<" iq is: "<<iq<<endl);
	72	}
	73	}
	74	#endif //VERBOS
	75	if (i>3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])) ) {
	76	a[ip][iq] = 0.0;
	77	}
	78	else if (fabs(a[ip][iq]) > tresh) {
	79	MDOUBLE h;
	80	MDOUBLE t;
	81	MDOUBLE theta;
	82	h = d[iq]-d[ip];
	83	// assert(h!=0);
	84	if (fabs(h) + g == fabs(h)) {
	85	assert(h!=0);
	86	t = a[ip][iq] / h;
	87	}
	88	else {
	89	theta = 0.5*h/(a[ip][iq]);
	90	t = 1.0 / (fabs(theta)+sqrt(1.0+theta*theta));
	91	if (theta<0.0) t = -t;
	92	}
	93	MDOUBLE c,s;
	94	c = 1.0 / sqrt(1.0+t*t);
	95	s = t*c;
	96	MDOUBLE tau;
	97	tau = s/ (1.0 + c);
	98	h = t * a[ip][iq];
	99
	100	d[ip] = d[ip] - t * a[ip][iq];
	101	d[iq] = d[iq] + t * a[ip][iq];
	102	a[ip][iq]=0.0;
	103	MDOUBLE tmp1, tmp2;
	104	for (j = 0; j < ip; ++j) {
	105	tmp1 = a[j][ip] - s(a[j][iq]+a[j][ip]tau); // updating the above element of a...
	106	tmp2 = a[j][iq] + s(a[j][ip]-a[j][iq]tau);
	107	a[j][ip] = tmp1;
	108	a[j][iq] = tmp2;
	109	}
	110
	111	for (j = ip+1;j<iq; ++j) {
	112	tmp1 = a[ip][j] - s(a[j][iq]+a[ip][j]tau); // updating the above element of a..
	113	tmp2 = a[j][iq] + s(a[ip][j]-a[j][iq]tau);
	114	a[ip][j] = tmp1;
	115	a[j][iq] = tmp2;
	116	}
	117
	118	for (j = iq+1; j< size ; ++j) {
	119	tmp1 = a[ip][j] - s(a[iq][j]+a[ip][j]tau); // updating the above element of a..
	120	tmp2 = a[iq][j] + s(a[ip][j]-a[iq][j]tau);
	121	a[ip][j] = tmp1;
	122	a[iq][j] = tmp2;
	123	}
	124
	125	for (j = 0; j< size ; ++j) {
	126	tmp1 = v[j][ip] - s(v[j][iq]+v[j][ip]tau); // updating v
	127	tmp2 = v[j][iq] + s(v[j][ip]-v[j][iq]tau);
	128	v[j][ip] = tmp1;
	129	v[j][iq] = tmp2;
	130	}
	131	} // end of "else if (fabs(a[ip][iq] > tresh)"
	132	} // end of for (iq = ...
	133	} // end of for (ip = ...
	134	} // end of for (i = 0; i< MaxNumberOfSweeps ; ++i) {
	135	vector<string> err;
	136	err.push_back("problems in function MyJacobi. more than MaxNumberOfSweeps were necesary.");
	137	errorMsg::reportError(err);
	138
	139	return -1;
	140	} //end of function
	141
	142
	143
	144
	145
	146	///////////////////////////////////////////
	147	//Adi cahnges //////////////////////////
	148	/////////////////////////////////////////
	149	MDOUBLE sign(MDOUBLE a,MDOUBLE b){
	150	return (b>0?fabs(a):-fabs(a));
	151	}
	152
	153	MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b){
	154	return sqrt(pow(a,2)+pow(b,2));
	155	}
	156
	157
	158	void houseHolder(VVdouble &mat,VVdouble &Q){
	159	MDOUBLE sigma=0,H,sqrtSigma,K=0,tmp;
	160	int c,r,j,i,n = mat.size();
	161	Q.resize(n);
	162	for(i=0;i<n;i++){
	163	Q.resize(n);
	164	}
	165	for (i=0;i<n;i++)
	166	Q[i].resize(n,0.0);
	167	Vdouble p,q,u;
	168	p.resize(n,0.0);
	169	q.resize(n,0.0);
	170	u.resize(n,0.0);
	171	for (i=n-1;i>1;i--){
	172	sigma=0; //init sigma
	173	K=0; //init K
	174
	175	for(j=0;j<i;j++)
	176	sigma+= mat[i][j]*mat[i][j]; //compute sigma: O(n)
	177
	178	sqrtSigma = mat[i][i-1]>=0.0 ? sqrt(sigma) : -sqrt(sigma); //compute sqrt of sigma +/-
	179
	180	H=sigma+mat[i][i-1]sqrtSigma; //comute H = 0.5\|u\|^2. until here O(n)
	181
	182	/*createing U*****/
	183	for(r=0;r<i;r++) { //update vector u with row i the matrix until i; //takes O(n)
	184	Q[i][r]= u[r] = mat[i][r];
	185	Q[r][i] = u[r]/H;
	186	}
	187	u[i-1]+=sqrtSigma; //update element (i,i-1)
	188	Q[i][i-1]=u[i-1];
	189	Q[i-1][i]=u[i-1]/H;
	190	for(r=i;r<n;r++) //update elemnts (i,j) =0 for j>=i.
	191	u[r]=0.0;
	192	/***********************/
	193	for(r=0;r<n;r++){ //compute vector p O(n^2)
	194	p[r]=0.0;
	195	for (c=0;c<i;c++)
	196	p[r]+=mat[r][c]*u[c]; //compute AU
	197	p[r]/=H; // ->AU/H
	198	}
	199
	200	for(r=0;r<i;r++) // compure K O(n)
	201	K+=u[r]*p[r];
	202	K/=(2*H);
	203	// cout<<"K is: "<<K<<endl;
	204
	205	for(r=0;r<n;r++) //compute vector q O(n)
	206	q[r]=p[r]-K*u[r];
	207
	208	for(r=0;r<=i;r++) {//update matrix O(n^2) only part of the matrix
	209	for(c=0;c<=i;c++)
	210	mat[r][c]-=q[r]u[c]+u[r]q[c];
	211	}
	212
	213	}
	214	for (i=0;i<n;i++){
	215	for(j=0;j<i;j++){
	216	tmp=0;
	217	for(c=0;c<i;c++)
	218	tmp+=Q[i][c]*Q[c][j];
	219	for(c=0;c<i;c++)
	220	Q[c][j]-=tmp*Q[c][i];
	221	}
	222	Q[i][i]=1;
	223	for(j=0;j<i;j++)
	224	Q[j][i]=Q[i][j]=0.0;
	225	}
	226	}
	227
	228	void tred2(VVdouble &a, Vdouble &d, Vdouble &e) //a = symmetricMatrix,d = diagonal,e = offdiagonal
	229	{
	230	int l,k,j,i;
	231	MDOUBLE scale,hh,h,g,f;
	232
	233	int n=d.size();
	234	for (i=n-1;i>0;i--) {
	235	l=i-1;
	236	h=scale=0.0;
	237	if (l > 0) {
	238	for (k=0;k<l+1;k++)
	239	scale += fabs(a[i][k]);
	240	if (scale == 0.0)
	241	e[i]=a[i][l];
	242	else {
	243	for (k=0;k<l+1;k++) {
	244	a[i][k] /= scale;
	245	h += a[i][k]*a[i][k];
	246	}
	247	f=a[i][l];
	248	g=(f >= 0.0 ? -sqrt(h) : sqrt(h));
	249	e[i]=scale*g;
	250	h -= f*g;
	251	a[i][l]=f-g;
	252	f=0.0;
	253	for (j=0;j<l+1;j++) {
	254	// Next statement can be omitted if eigenvectors not wanted
	255	a[j][i]=a[i][j]/h;
	256	g=0.0;
	257	for (k=0;k<j+1;k++)
	258	g += a[j][k]*a[i][k];
	259	for (k=j+1;k<l+1;k++)
	260	g += a[k][j]*a[i][k];
	261	e[j]=g/h;
	262	f += e[j]*a[i][j];
	263	}
	264	hh=f/(h+h);
	265	for (j=0;j<l+1;j++) {
	266	f=a[i][j];
	267	e[j]=g=e[j]-hh*f;
	268	for (k=0;k<j+1;k++)
	269	a[j][k] -= (fe[k]+ga[i][k]);
	270	}
	271	}
	272	} else
	273	e[i]=a[i][l];
	274	d[i]=h;
	275	}
	276	// Next statement can be omitted if eigenvectors not wanted
	277	d[0]=0.0;
	278	e[0]=0.0;
	279	// Contents of this loop can be omitted if eigenvectors not
	280	// wanted except for statement d[i]=a[i][i];
	281	for (i=0;i<n;i++) {
	282	l=i;
	283	if (d[i] != 0.0) {
	284	for (j=0;j<l;j++) {
	285	g=0.0;
	286	for (k=0;k<l;k++)
	287	g += a[i][k]*a[k][j];
	288	for (k=0;k<l;k++)
	289	a[k][j] -= g*a[k][i];
	290	}
	291	}
	292	d[i]=a[i][i];
	293	a[i][i]=1.0;
	294	for (j=0;j<l;j++) a[j][i]=a[i][j]=0.0;
	295	}
	296	}
	297
	298	//called if houseHolder was used - the modified QL implementation corresponding to the modified implementation of householder
	299	/*
	300	void QL(Vdouble &d, Vdouble &e, VVdouble &z){
	301	int m,l,iter,i,k;
	302	MDOUBLE s,r,p,g,f,dd,c,b;
	303
	304	int n=d.size();
	305	//* for (i=1;i<n;i++) e[i-1]=e[i];
	306	//* e[n-1]=0.0;
	307	//* e.push_back(0);//since in my algorithm I return an n-1 sized e
	308	for (l=0;l<n;l++) {
	309	iter=0;
	310	do {
	311	for (m=l;m<n-1;m++) {
	312	dd=fabs(d[m])+fabs(d[m+1]);
	313	if (fabs(e[m])+dd == dd) break;
	314	}
	315	if (m != l) {
	316	if (iter++ == 30) errorMsg::reportError("Too many iterations in QL");
	317	g=(d[l+1]-d[l])/(2.0*e[l]);
	318	r=pythag(g,1.0);
	319	g=d[m]-d[l]+e[l]/(g+sign(r,g));
	320	s=c=1.0;
	321	p=0.0;
	322	for (i=m-1;i>=l;i--) {
	323	f=s*e[i];
	324	b=c*e[i];
	325	e[i+1]=(r=pythag(f,g));
	326	if (r == 0.0) {
	327	d[i+1] -= p;
	328	e[m]=0.0;
	329	break;
	330	}
	331	s=f/r;
	332	c=g/r;
	333	g=d[i+1]-p;
	334	r=(d[i]-g)s+2.0c*b;
	335	d[i+1]=g+(p=s*r);
	336	g=c*r-b;
	337	// Next loop can be omitted if eigenvectors not wanted
	338	for (k=0;k<n;k++) {
	339	f=z[k][i+1];
	340	z[k][i+1]=sz[k][i]+cf;
	341	z[k][i]=cz[k][i]-sf;
	342	}
	343	}
	344	if (r == 0.0 && i >= l) continue;
	345	d[l] -= p;
	346	e[l]=g;
	347	e[m]=0.0;
	348	}
	349	} while (m != l);
	350	}
	351	}
	352	*/
	353
	354
	355	//called if tred2 was used - the original QL implementation from numerical recepies
	356	void QL(Vdouble &d, Vdouble &e, VVdouble &z){
	357	int m,l,iter,i,k;
	358	MDOUBLE s,r,p,g,f,dd,c,b;
	359
	360	int n=d.size();
	361	for(i=1;i<n;i++){
	362	e[i-1]=e[i];
	363	}
	364	e[n-1]=0.0;
	365	for(l=0;l<n;l++){
	366	iter=0;
	367	do {
	368	for(m=l;m<n-1;m++){
	369	dd=fabs(d[m])+fabs(d[m+1]);
	370	if(fabs(e[m])+dd == dd) break;
	371	}
	372	if(m!=l){
	373	if(iter++==30){
	374	errorMsg::reportError("too many iteration in QL");
	375	}
	376	g=(d[l+1]-d[l])/(2.0*e[l]);
	377	r=pythag(g,1.0);
	378	g=d[m]-d[l]+e[l]/(g+sign(r,g));
	379	s=c=1.0;
	380	p=0.0;
	381	for(i=m-1;i>=l;i--){
	382	f=s*e[i];
	383	b=c*e[i];
	384	e[i+1]=(r=pythag(f,g));
	385	if(r==0.0){
	386	d[i+1]-=p;
	387	e[m]=0.0;
	388	break;
	389	}
	390	s=f/r;
	391	c=g/r;
	392	g=d[i+1]-p;
	393	r=(d[i]-g)s+2.0c*b;
	394	d[i+1]=g+(p=s*r);
	395	g=c*r-b;
	396	for(k=0;k<n;k++){
	397	f=z[k][i+1];
	398	z[k][i+1]=sz[k][i]+cf;
	399	z[k][i]=cz[k][i]-sf;
	400	}
	401	}
	402	if(r==0 && i>=l) continue;
	403	d[l]-=p;
	404	e[l]=g;
	405	e[m]=0.0;
	406	}
	407	}
	408	while(m!=l);
	409	}
	410	}
	411
	412
	413
	414	/************************************************************************/
	415	//diaganol will be eigen values and fill matrix of eigen vectors. */
	416	/************************************************************************/
	417
	418	//A modified implementation for eigen analysis, using the house holder function.
	419	/*
	420	void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal){
	421
	422	houseHolder(symmetricMatrix,eigenVectros);
	423
	424	Vdouble offdiagonal;
	425	offdiagonal.resize(symmetricMatrix.size());
	426	for (int i=0; i<symmetricMatrix.size(); i++){
	427	diagonal[i]=symmetricMatrix[i][i];
	428	}
	429	for (int i2=0; i2<symmetricMatrix.size()-1; i2++){
	430	offdiagonal[i2]=symmetricMatrix[i2+1][i2];
	431	}
	432
	433	QL(diagonal,offdiagonal,eigenVectros);
	434	return;
	435	}
	436	*/
	437
	438	//Uses original implementation of tred2 function for eigen analysis, copied from numerical recepies p474.
	439	void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal){
	440
	441	Vdouble offdiagonal;
	442	offdiagonal.resize(symmetricMatrix.size());
	443
	444	tred2(symmetricMatrix,diagonal,offdiagonal);
	445
	446	eigenVectros = symmetricMatrix;
	447
	448	QL(diagonal,offdiagonal,eigenVectros);
	449
	450	return;
	451	}
	452
	453
	454	// the following two functions used for Kolomogorov-Smirnoff test
	455	MDOUBLE performKSTest(const uniformDistribution& empiricalDist, Vdouble& observedDist)
	456	{
	457	MDOUBLE pVal = 0.0;
	458	MDOUBLE distance = 0.0;
	459
	460	int j;
	461	MDOUBLE dt,en,fn,fo = 0.0;
	462
	463	int n = observedDist.size();
	464	sort(observedDist.begin(),observedDist.end());
	465	en = n;
	466	MDOUBLE cdfObserved = 0.0;
	467	for(j = 0; j < n; ++j){
	468	cdfObserved+=observedDist[j];
	469	fn = (j+1)/en;
	470	dt = max(fabs(fo-cdfObserved),fabs(fn-cdfObserved));
	471	if(dt > distance)
	472	distance = dt;
	473	fo = fn;
	474	}
	475	en = sqrt(en);
	476	pVal = computeProbForKS((en+0.12+0.11/en)*distance);
	477	return pVal;
	478	}
	479
	480	// function called only by performKSTest
	481	MDOUBLE computeProbForKS (const MDOUBLE QsParam)
	482	{
	483	const MDOUBLE EPS1 = 1.0e-6,EPS2 = 1.0e-16;
	484	int j;
	485	MDOUBLE a2,fac = 2.0, sum = 0.0, term, termbf = 0.0;
	486
	487	a2 = -2.0QsParamQsParam;
	488	for(j = 1; j <= 100; ++j){
	489	term = facexp(a2j*j);
	490	sum += term;
	491	if(fabs(term) <= EPS1termbf \|\| fabs(term) <= EPS2sum)
	492	return sum;
	493	fac = -fac;
	494	termbf = fabs(term);
	495	}
	496	return 1.0; //get here only by failing to converge
	497	}

+459

-0

libs/phylogeny/numRec.h less more

	0	// $Id: numRec.h 9652 2011-07-12 13:59:26Z rubi $
	1
	2	// version 1.00
	3	// last modified 2 Nov 2002
	4
	5	#ifndef ___NUM_REC
	6	#define ___NUM_REC
	7
	8	#include <cmath>
	9	#include <cassert>
	10	#include <iostream>
	11	using namespace std;
	12	#include "definitions.h"
	13	#include "errorMsg.h"
	14	#include "uniformDistribution.h"
	15	#include "logFile.h"
	16
	17	//#define VERBOS
	18	#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
	19
	20	//========================== function brent =========================================
	21	template <typename regF>
	22	MDOUBLE brent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f, MDOUBLE tol,
	23	MDOUBLE *xmin) {
	24
	25	const int ITMAX = 100;
	26	const MDOUBLE CGOLD = 0.3819660f;
	27	const MDOUBLE ZEPS = 1.0e-10f;
	28
	29	int iter;
	30	MDOUBLE a,b,d=0.0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
	31	MDOUBLE e=0.0;
	32
	33	a=(ax < cx ? ax : cx);
	34	b=(ax > cx ? ax : cx);
	35	x=w=v=bx;
	36	fw=fv=fx=f(x);
	37	LOG(10,<<"brent, f("<<x<<")="<<fx<<endl);
	38	for (iter=1;iter<=ITMAX;iter++) {
	39	xm=0.5*(a+b);
	40	tol2=2.0(tol1=tolfabs(x)+ZEPS);
	41	if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
	42	*xmin=x;
	43	return fx;
	44	}
	45	if (fabs(e) > tol1) {
	46	r=(x-w)*(fx-fv);
	47	q=(x-v)*(fx-fw);
	48	p=(x-v)q-(x-w)r;
	49	q=2.0*(q-r);
	50	if (q > 0.0) p = -p;
	51	q=fabs(q);
	52	etemp=e;
	53	e=d;
	54	if (fabs(p) >= fabs(0.5qetemp) \|\| p <= q(a-x) \|\| p >= q(b-x))
	55	d=CGOLD*(e=(x >= xm ? a-x : b-x));
	56	else {
	57	d=p/q;
	58	u=x+d;
	59	if (u-a < tol2 \|\| b-u < tol2)
	60	d=SIGN(tol1,xm-x);
	61	}
	62	} else {
	63	d=CGOLD*(e=(x >= xm ? a-x : b-x));
	64	}
	65	u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
	66	fu=f(u);
	67	LOG(10,<<"brent, f("<<u<<")="<<fu<<endl);
	68	if (fu <= fx) {
	69	if (u >= x) a=x; else b=x;
	70	v=w;w=x;x=u;
	71	fv=fw;fw=fx; fx=fu;
	72	} else {
	73	if (u < x) a=u; else b=u;
	74	if (fu <= fw \|\| w == x) {
	75	v=w;
	76	w=u;
	77	fv=fw;
	78	fw=fu;
	79	} else if (fu <= fv \|\| v == x \|\| v == w) {
	80	v=u;
	81	fv=fu;
	82	}
	83	}
	84	}
	85	errorMsg::reportError(" too many iterations in function, brent. "); // also quit the program
	86	return -1;
	87	}
	88
	89	/*
	90	//A doubleRep implementation of brent cause return type function overloading is forbidden in c++
	91	template <typename regF>
	92	doubleRep brentDoubleRep(doubleRep ax, doubleRep bx, doubleRep cx, regF f, doubleRep tol,
	93	MDOUBLE *xmin) {
	94
	95	const int ITMAX = 100;
	96	const doubleRep CGOLD(0.3819660f);
	97	const doubleRep ZEPS(1.0e-10f);
	98	doubleRep minusOne(-1.0);
	99	int iter;
	100	doubleRep fu,fv,fw,fx,a,b,etemp,p,q,r,u,v,w,x;
	101	doubleRep d(0.0);
	102	doubleRep e(0.0);
	103	doubleRep half(0.5);
	104	doubleRep two(2.0);
	105	doubleRep zero(0.0);
	106
	107	a=(ax < cx ? ax : cx);
	108	b=(ax > cx ? ax : cx);
	109	x=w=v=bx;
	110	fw=fv=fx=f(convert(x));
	111	LOG(10,<<"brent, f("<<x<<")="<<fx<<endl);
	112	for (iter=1;iter<=ITMAX;iter++) {
	113	doubleRep xm(0.5*convert(a+b));
	114	doubleRep tol1(convert(tol)*fabs(convert(x)));
	115	doubleRep tol2(2.0*convert((tol1+ZEPS)));
	116	if (fabs(convert(x+minusOnexm)) <= convert(tol2+minusOnehalf(b+minusOnea))) {
	117	*xmin=convert(x);
	118	return fx;
	119	}
	120	if (fabs(convert(e)) > convert(tol1)) {
	121	r=(x+minusOnew)(fx+minusOne*fv);
	122	q=(x+minusOnev)(fx+minusOne*fw);
	123	p=(x+minusOnev)q+minusOne(x+minusOnew)*r;
	124	q=two(q+minusOner);
	125	if (q > zero) p = minusOne*p;
	126	doubleRep newQ(fabs(convert(q)));
	127	q=newQ;
	128	etemp=e;
	129	e=d;
	130	if (fabs(convert(p)) >= fabs(convert(halfqetemp)) \|\| p <= q(a+minusOnex) \|\| p >= q(b+minusOnex))
	131	d=CGOLD(e=(x >= xm ? a+minusOnex : b+minusOne*x));
	132	else {
	133	d=p/q;
	134	u=x+d;
	135	if (u+minusOnea < tol2 \|\| b+minusOneu < tol2){
	136	doubleRep newD(SIGN(convert(tol1),convert(xm+minusOne*x)));
	137	d=newD;
	138	}
	139	}
	140	} else {
	141	d=CGOLD(e=(x >= xm ? a+minusOnex : b+minusOne*x));
	142	}
	143	u=(fabs(convert(d)) >= convert(tol1) ? x+d : x+SIGN(convert(tol1),convert(d)));
	144	fu=f(convert(u));
	145	LOG(10,<<"brent, f("<<u<<")="<<fu<<endl);
	146	if (fu <= fx) {
	147	if (u >= x) a=x; else b=x;
	148	v=w;w=x;x=u;
	149	fv=fw;fw=fx; fx=fu;
	150	} else {
	151	if (u < x) a=u; else b=u;
	152	if (fu <= fw \|\| w == x) {
	153	v=w;
	154	w=u;
	155	fv=fw;
	156	fw=fu;
	157	} else if (fu <= fv \|\| v == x \|\| v == w) {
	158	v=u;
	159	fv=fu;
	160	}
	161	}
	162	}
	163	errorMsg::reportError(" too many iterations in function, brentDoubleRep. "); // also quit the program
	164	return minusOne;
	165	}
	166	*/
	167	// ===================================== function dbrent ========================================
	168	/* The efficiency of this function for likelihood computations can be improved by replacing
	169	functors regF f and dF df with one objects that preforms the likelihood computation once
	170	and produces both L(t) and dL(t)/dt. This object will provide methods:
	171	MDOUBLE f(MDOUBLE x)
	172	MDOUBLE df(MDOUBLE x)
	173	*/
	174
	175	#define ITMAX 100
	176	#define ZEPS 1.0e-10
	177	#define MOV3(a,b,c, d,e,f) (a)=(d);(b)=(e);(c)=(f);
	178
	179	template <typename regF, typename dF>
	180	MDOUBLE dbrent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f,
	181	dF df, MDOUBLE tol, MDOUBLE *xmin) {
	182
	183	int iter,ok1,ok2;
	184	MDOUBLE a,b,d=0.0,d1,d2,du,dv,dw,dx,e=0.0;
	185	MDOUBLE fu,fv,fw,fx,olde,tol1,tol2,u,u1,u2,v,w,x,xm;
	186
	187	a=(ax < cx ? ax : cx);
	188	b=(ax > cx ? ax : cx);
	189	//ensuring x is between a and b
	190	if (bx>b) { x=w=v=b;b=bx;}
	191	else if (bx<a) {x=w=v=a; a=bx;}
	192	else x=w=v=bx;
	193
	194	fw=fv=fx=f(x);
	195	assert(fv==fv);// throw an exception if answer is nan = not a number.
	196	dw=dv=dx=df(x);
	197
	198	for (iter=1;iter<=ITMAX;iter++) {
	199	xm=0.5*(a+b);
	200	#ifdef VERBOS
	201	//if (iter>10) cout<<"iteration: "<<iter<<" xm = "<<xm<<" x= "<<x<<" a= "<<a<<" b= "<<b<<" fx= "<<fx<<endl;
	202	#endif
	203	tol1=tol*fabs(x)+ZEPS;
	204	tol2=2.0*tol1;
	205
	206	if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
	207	*xmin=x;
	208	return fx;
	209	}
	210	if (fabs(e) > tol1) {
	211	d1=2.0*(b-a);
	212	d2=d1;
	213	if (dw != dx) d1=(w-x)*dx/(dx-dw);
	214	if (dv != dx) d2=(v-x)*dx/(dx-dv);
	215	u1=x+d1;
	216	u2=x+d2;
	217	ok1 = (a-u1)(u1-b) > 0.0 && dxd1 <= 0.0;
	218	ok2 = (a-u2)(u2-b) > 0.0 && dxd2 <= 0.0;
	219	olde=e;
	220	e=d;
	221	if (ok1 \|\| ok2) {
	222	if (ok1 && ok2)
	223	d=(fabs(d1) < fabs(d2) ? d1 : d2);
	224	else if (ok1)
	225	d=d1;
	226	else
	227	d=d2;
	228	if (fabs(d) <= fabs(0.5*olde)) {
	229	u=x+d;
	230	if (u-a < tol2 \|\| b-u < tol2)
	231	d=SIGN(tol1,xm-x);
	232	} else {
	233	d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
	234	}
	235	} else {
	236	d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
	237	}
	238	} else {
	239	d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
	240	}
	241	if (fabs(d) >= tol1) {
	242	u=x+d;
	243	fu=f(u);
	244	} else {
	245	u=x+SIGN(tol1,d);
	246	if (u<ax) u=x; // MY LATEST ADDITION!
	247	fu=f(u);
	248	if (fu > fx) {
	249	*xmin=x;
	250	return fx;
	251	}
	252	}
	253	du=df(u);
	254	if (fu <= fx) {
	255	if (u >= x) a=x; else b=x;
	256	MOV3(v,fv,dv, w,fw,dw)
	257	MOV3(w,fw,dw, x,fx,dx)
	258	MOV3(x,fx,dx, u,fu,du)
	259	} else {
	260	if (u < x) a=u; else b=u;
	261	if (fu <= fw \|\| w == x) {
	262	MOV3(v,fv,dv, w,fw,dw)
	263	MOV3(w,fw,dw, u,fu,du)
	264	} else if (fu < fv \|\| v == x \|\| v == w) {
	265	MOV3(v,fv,dv, u,fu,du)
	266	}
	267	}
	268
	269	}
	270	errorMsg::reportError("Too many iterations in routine dbrent"); // also quit the program
	271	return -1;
	272	}
	273
	274	/*
	275	//A doubleRep implementation of dbrent cause return type function overloading is forbidden in c++
	276	template <typename regF, typename dF>
	277	doubleRep dbrentDoubleRep(doubleRep ax, doubleRep bx, doubleRep cx, regF f,
	278	dF df, doubleRep tol, MDOUBLE *xmin) {
	279
	280	int iter,ok1,ok2;
	281	doubleRep a,b,d1,d2;
	282	doubleRep d(0.0);
	283	doubleRep e(0.0);
	284	doubleRep olde,u,u1,u2,v,w,x,xm;
	285	doubleRep fu,fv,fw,fx,du,dv,dw,dx;
	286	doubleRep minusOne(-1.0);
	287	doubleRep half(0.5);
	288	doubleRep two(2.0);
	289	doubleRep zero(0.0);
	290	a=(ax < cx ? ax : cx);
	291	b=(ax > cx ? ax : cx);
	292	//ensuring x is between a and b
	293	if (bx>b) { x=w=v=b;b=bx;}
	294	else if (bx<a) {x=w=v=a; a=bx;}
	295	else x=w=v=bx;
	296
	297	fw=fv=fx=f(convert(x));
	298	assert(fv==fv);// throw an exception if answer is nan = not a number.
	299	dw=dv=dx=df(convert(x));
	300
	301	for (iter=1;iter<=ITMAX;iter++) {
	302	xm=half*(a+b);
	303	#ifdef VERBOS
	304	//if (iter>10) cout<<"iteration: "<<iter<<" xm = "<<xm<<" x= "<<x<<" a= "<<a<<" b= "<<b<<" fx= "<<fx<<endl;
	305	#endif
	306	doubleRep tol1(convert(tol)*fabs(convert(x)));
	307	doubleRep tol2(2.0*(convert(tol1)+ZEPS));
	308
	309	if (fabs(convert(x+minusOnexm)) <= convert((tol2+minusOnehalf(b+minusOnea)))) {
	310	*xmin=convert(x);
	311	return fx;
	312	}
	313	if (fabs(convert(e)) > convert(tol1)) {
	314	d1=two(b+minusOnea);
	315	d2=d1;
	316	if (dw != dx) d1=(w+minusOnex)dx/(dx+minusOne*dw);
	317	if (dv != dx) d2=(v+minusOnex)dx/(dx+minusOne*dv);
	318	u1=x+d1;
	319	u2=x+d2;
	320	ok1 = (a+minusOneu1)(u1+minusOneb) > zero && dxd1 <= zero;
	321	ok2 = (a+minusOneu2)(u2+minusOneb) > zero && dxd2 <= zero;
	322	olde=e;
	323	e=d;
	324	if (ok1 \|\| ok2) {
	325	if (ok1 && ok2)
	326	d=(fabs(convert(d1)) < fabs(convert(d2)) ? d1 : d2);
	327	else if (ok1)
	328	d=d1;
	329	else
	330	d=d2;
	331	if (fabs(convert(d)) <= fabs(convert(half*olde))) {
	332	u=x+d;
	333	if (u+minusOnea < tol2 \|\| b+minusOneu < tol2){
	334	doubleRep sign(SIGN(convert(tol1),convert(xm+minusOne*x)));
	335	d=sign;
	336	}
	337	} else {
	338	d=half(e=(dx >= zero ? a+minusOnex : b+minusOne*x));
	339	}
	340	} else {
	341	d=half(e=(dx >= zero ? a+minusOnex : b+minusOne*x));
	342	}
	343	} else {
	344	d=half(e=(dx >= zero ? a+minusOnex : b+minusOne*x));
	345	}
	346	if (fabs(convert(d)) >= convert(tol1)) {
	347	u=x+d;
	348	fu=f(convert(u));
	349	} else {
	350	doubleRep sign(SIGN(convert(tol1),convert(d)));
	351	u=x+sign;
	352	if (u<ax) u=x; // MY LATEST ADDITION!
	353	fu=f(convert(u));
	354	if (fu > fx) {
	355	*xmin=convert(x);
	356	return fx;
	357	}
	358	}
	359	du=df(convert(u));
	360	if (fu <= fx) {
	361	if (u >= x) a=x; else b=x;
	362	MOV3(v,fv,dv, w,fw,dw)
	363	MOV3(w,fw,dw, x,fx,dx)
	364	MOV3(x,fx,dx, u,fu,du)
	365	} else {
	366	if (u < x) a=u; else b=u;
	367	if (fu <= fw \|\| w == x) {
	368	MOV3(v,fv,dv, w,fw,dw)
	369	MOV3(w,fw,dw, u,fu,du)
	370	} else if (fu < fv \|\| v == x \|\| v == w) {
	371	MOV3(v,fv,dv, u,fu,du)
	372	}
	373	}
	374
	375	}
	376	errorMsg::reportError("Too many iterations in routine dbrentDoubleRep"); // also quit the program
	377	return minusOne;
	378	}
	379	*/
	380	/*================================== function rtbis =========================================
	381	//Using bisection, find the root of the function func known to lie between
	382	x1 and x2. The return value is the root will be refined until its accuracy is +- xacc
	383	*/
	384	template <typename regF>
	385	MDOUBLE rtbis(regF func,MDOUBLE x1, MDOUBLE x2, MDOUBLE xacc) {
	386	const int max_number_of_iter = 100;
	387
	388	MDOUBLE f = func(x1);
	389	MDOUBLE fmid = func(x2);
	390	if (f*fmid >=0.0) {
	391	errorMsg::reportError(" error in function rtbis, root must be bracketed for bisection in rtbis ");
	392	// also quit the program
	393	}
	394
	395	MDOUBLE dx, rtb;
	396	if (f<0.0) {
	397	dx = x2-x1;
	398	rtb = x1;
	399	}
	400	else {
	401	dx = x1-x2;
	402	rtb = x2;
	403	}
	404
	405
	406	for (int j=1; j <= max_number_of_iter; ++j) {
	407	dx *= 0.5;
	408	MDOUBLE xmid = rtb+dx;
	409	MDOUBLE fmid = func(xmid);
	410	if (fmid <= 0.0) rtb = xmid;
	411	if ((fabs(dx) < xacc) \|\| (fmid == 0.0)) return rtb;
	412	}
	413	errorMsg::reportError("Error in function rtbis..."); // also quit the program...
	414	return -1.0;
	415	}
	416
	417	//Given a function func and an initial guessed range (x1,x2), the routine expands the range
	418	//geometrically until a root is bracketed by the returned values x1 and x2 (in which case zbrac retruns true)
	419	//or until the range becomes large unacceptably large (in which case zbrac return false).
	420	template <typename regF>
	421	bool zbrac(regF func, MDOUBLE &x1, MDOUBLE &x2) {
	422	const int NTRY=50;
	423	const MDOUBLE FACTOR= 1.6;
	424	int j;
	425	MDOUBLE f1,f2;
	426
	427	if (x1 == x2)
	428	errorMsg::reportError("Bad initial range in zbrac");
	429	f1 = func(x1);
	430	f2 = func(x2);
	431	for (j = 0; j < NTRY; j++)
	432	{
	433	if (f1 * f2 < 0.0)
	434	return true;
	435	if (fabs(f1) < fabs(f2))
	436	f1=func(x1 += FACTOR*(x1-x2));
	437	else
	438	f2=func(x2 += FACTOR*(x2-x1));
	439	}
	440	return false;
	441	}
	442
	443	// ================================ function brent new ======================================
	444
	445	int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues);
	446	MDOUBLE sign(MDOUBLE a,MDOUBLE b);
	447	MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b);
	448	void houseHolder(VVdouble &mat,VVdouble &Q);
	449	void tred2(VVdouble &a, Vdouble &d, Vdouble &e);
	450	void QL(Vdouble &d, Vdouble &e, VVdouble &z);
	451	void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal);
	452	MDOUBLE performKSTest(const uniformDistribution& empiricalDist, Vdouble& observedDist); // perform Kolomogorov-Smirnoff test
	453	MDOUBLE computeProbForKS (const MDOUBLE QsParam); // function called only by performKSTest
	454
	455
	456
	457	#endif
	458

+271

-0

libs/phylogeny/oneTwoMoreModel.cpp less more

	0	#include "oneTwoMoreModel.h"
	1	#include "matrixUtils.h"
	2	#include "someUtil.h"
	3
	4	///////////////////////////////////////////////////////////
	5	//non reversible model
	6	///////////////////////////////////////////////////////////
	7
	8	const MDOUBLE EPSILON_3STATEMODEL = 1e-04;
	9
	10
	11	oneTwoMoreModel::oneTwoMoreModel(const MDOUBLE m1, const MDOUBLE m2,const MDOUBLE m3, const MDOUBLE m4
	12	,const Vdouble &freq, bool useMarkovLimiting)
	13	:_gain(m1),_more(m2), _less(m3),_loss(m4),_freq(freq),_useMarkovLimiting(useMarkovLimiting){
	14	resizeMatrix(_Q,alphabetSize(),alphabetSize());
	15	resizeMatrix(_lastPtCalculated, alphabetSize(), alphabetSize());
	16	updateQ();
	17	}
	18
	19	oneTwoMoreModel& oneTwoMoreModel::operator=(const oneTwoMoreModel &other){
	20	_gain = other._gain;
	21	_more = other._more;
	22	_less = other._less;
	23	_loss = other._loss;
	24	_freq = other._freq;
	25	_useMarkovLimiting = other._useMarkovLimiting;
	26	_Q = other._Q;
	27	_bQchanged = other._bQchanged;
	28	_lastPtCalculated = other._lastPtCalculated;
	29	_lastTcalculated = other._lastTcalculated;
	30
	31	return *this;
	32	}
	33
	34	void oneTwoMoreModel::updateQ(){
	35	setEpsilonForZeroParams();
	36	_Q[0][0] = -_gain;
	37	_Q[0][1] = _gain;
	38	_Q[0][2] = 0;
	39	_Q[1][0] = _loss;
	40	_Q[1][1] = -_more-_loss;
	41	_Q[1][2] = _more;
	42	_Q[2][0] = 0;
	43	_Q[2][1] = _less;
	44	_Q[2][2] = -_less;
	45	for (int i=0; i<_Q.size();i++) {
	46	MDOUBLE sum = _Q[i][0]+_Q[i][1]+_Q[i][2];
	47	if ((abs(sum)>err_allow_for_pijt_function()))
	48	errorMsg::reportError("Error in oneTwoMoreModel::updateQ, sum of row is not 0");
	49	}
	50	if ((!checkIsNullModel()) && (_useMarkovLimiting))
	51	computeMarkovLimitingDistribution();
	52	_bQchanged = true;
	53	}
	54
	55	// when Q matrix parameters are zero the lib code underflows and the likelihood is set to EPSILON
	56	void oneTwoMoreModel::setEpsilonForZeroParams(){
	57	if (DEQUAL(_more,0.0,EPSILON_3STATEMODEL))
	58	_more = EPSILON_3STATEMODEL;
	59	if (DEQUAL(_gain,0.0,EPSILON_3STATEMODEL))
	60	_gain = EPSILON_3STATEMODEL;
	61	if (DEQUAL(_loss,0.0,EPSILON_3STATEMODEL))
	62	_loss = EPSILON_3STATEMODEL;
	63	if (DEQUAL(_less,0.0,EPSILON_3STATEMODEL))
	64	_less = EPSILON_3STATEMODEL;
	65	}
	66
	67	void oneTwoMoreModel::setMu1(const MDOUBLE val) {
	68	_gain = val;
	69	updateQ();
	70	}
	71
	72	void oneTwoMoreModel::setMu2(const MDOUBLE val) {
	73	_more = val;
	74	updateQ();
	75	}
	76
	77	void oneTwoMoreModel::setMu3(const MDOUBLE val) {
	78	_less = val;
	79	updateQ();
	80	}
	81
	82	void oneTwoMoreModel::setMu4(const MDOUBLE val) {
	83	_loss = val;
	84	updateQ();
	85	}
	86
	87
	88
	89	bool oneTwoMoreModel::pijt_is_prob_value(MDOUBLE val) const {
	90	if ((abs(val)+err_allow_for_pijt_function()<0) \|\| (val>1+err_allow_for_pijt_function()))
	91	return false;
	92	else
	93	return true;
	94	}
	95
	96	bool oneTwoMoreModel::areFreqsValid(Vdouble freq) const{
	97	MDOUBLE sum=0.0;
	98	for (int i=0; i<freq.size(); ++i){
	99	if (freq[i]<0.0)
	100	return false;
	101	sum+=freq[i];
	102	}
	103	if (!DEQUAL(sum,1.0)) {
	104	return false;
	105	}
	106	return true;
	107	}
	108
	109	bool oneTwoMoreModel::checkIsNullModel(){
	110	if (_more!=EPSILON_3STATEMODEL)
	111	return false;
	112	if (_more!=EPSILON_3STATEMODEL)
	113	return false;
	114	if (!(DEQUAL(_freq[2],1.0,EPSILON_3STATEMODEL)))
	115	return false;
	116	return true;
	117	}
	118
	119	void oneTwoMoreModel::setFreq(const Vdouble &freq){
	120	if (freq.size()!=_freq.size()) {
	121	errorMsg::reportError("Error in oneTwoMoreModel::setFreq, size of freq is different than member");
	122	}
	123
	124	if (!areFreqsValid(freq)) {
	125	string strErr = "Error in oneTwoMoreModel::setFreq, sum of freq is different than 1 or negative freq value";
	126	errorMsg::reportError(strErr);
	127	}
	128	for (int i=0; i<freq.size(); ++i){
	129	_freq[i] = freq[i];
	130	}
	131	}
	132
	133
	134
	135
	136
	137
	138	void oneTwoMoreModel::computeMarkovLimitingDistribution(){
	139
	140	VVdouble P;
	141	int as = alphabetSize();
	142	resizeMatrix(P,as, as);
	143	// initializing P with P at time 1
	144	for (int i=0; i< as; ++i) {
	145	for (int j=0; j< as; ++j) {
	146	P[i][j]=Pij_t(i,j,1.0);
	147	}
	148	}
	149	VVdouble previous_P = P;
	150	int numIterations = 0;
	151	Vdouble freqs(3,-1.0);
	152	bool converged = false;
	153	MDOUBLE epsilon=0.000001;
	154	int row, col;
	155
	156	while ( converged==false ) {
	157	previous_P = P;
	158	P = multiplyMatrixes(P,P);
	159	// due to rounding errors, we set the diagonal to be 1-(the rest)
	160	P[0][0]=1.0-P[0][1]-P[0][2];
	161	P[1][1]=1.0-P[1][0]-P[1][2];
	162	P[2][2]=1.0-P[2][0]-P[2][1];
	163	for (int d=0; d<as;++d){
	164	freqs[d] = P[0][d];// ** taking the freqs as the first row; this is not necessarily correct if 3 rows are different
	165	}
	166	converged = true;
	167	for (row = 0; row < P.size(); ++row) {
	168	for (col = 0; col < P.size(); ++col)
	169	{
	170	MDOUBLE diff = abs(convert(previous_P[row][col] - P[row][col]));
	171	if ( ( ( ( !DEQUAL(diff,0.0,epsilon) ) \|\| (!areFreqsValid(freqs) ) ) )){
	172	converged = false;
	173	}
	174	}
	175	}
	176	numIterations++;
	177	if (numIterations>100) {
	178	string err = "Error in oneTwoMoreModel::computeMarkovLimitingDistribution, too many iterations =" + double2string(numIterations);
	179	errorMsg::reportError(err);
	180	}
	181
	182	}
	183	//making sure that the three rows are the same
	184	for (row =1; row < P.size(); ++row) {
	185	for (col = 0; col < P.size(); ++col)
	186	{
	187	if (!(DEQUAL(P[row][col],P[row-1][col],epsilon))) {
	188	errorMsg::reportError("Error in oneTwoMoreModel::computeMarkovLimitingDistribution, rows are not equal" );
	189
	190	}
	191
	192	}
	193
	194	}
	195
	196	setFreq(freqs);
	197	}
	198
	199	// new implementation copied from Itay Mayrose which saves the last values of t computed
	200	const MDOUBLE oneTwoMoreModel::Pij_t(const int i,const int j, const MDOUBLE d) const
	201	{
	202	if (!_bQchanged && DEQUAL(d, _lastTcalculated))
	203	return convert(_lastPtCalculated[i][j]);
	204	// converting Q into doubleRep format
	205	VVdouble QdblRep;
	206	resizeMatrix(QdblRep,_Q.size(),_Q.size());
	207	for (int row=0;row<_Q.size();row++){
	208	for (int col=0;col<_Q[row].size();col++)
	209	QdblRep[row][col]=convert(_Q[row][col]);
	210	}
	211
	212	VVdouble Qt = multiplyMatrixByScalar(QdblRep, d);
	213	VVdouble unit;
	214	unitMatrix(unit,_Q.size());
	215	_lastPtCalculated = add(unit,Qt) ; // I + Qt
	216	VVdouble Qt_power = Qt;
	217	VVdouble prevIter_matrix = _lastPtCalculated;
	218	VVdouble diffM = _lastPtCalculated; //init to whatever
	219	int n=2;
	220	bool bConverged = false;
	221	while (bConverged == false)
	222	{
	223	prevIter_matrix = _lastPtCalculated;
	224	VVdouble tempQ = multiplyMatrixByScalar(Qt,1.0/n);
	225	Qt_power = multiplyMatrixes(Qt_power,tempQ);
	226	_lastPtCalculated = add(_lastPtCalculated,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n!
	227	//check if the difference between the cur and prev iteration is smaller than the allowed error of all matrix entries
	228	bConverged = true;
	229	for (int row = 0; row < _lastPtCalculated.size(); ++row) {
	230	for (int col = 0; col < _lastPtCalculated.size(); ++col)
	231	{
	232	MDOUBLE diff = abs(convert(_lastPtCalculated[row][col] - prevIter_matrix[row][col]));
	233	if ((diff > err_allow_for_pijt_function()) \|\| (!pijt_is_prob_value(convert(_lastPtCalculated[i][j]))))
	234	bConverged = false;
	235	}
	236	}
	237	n++;
	238	if (n>150) {
	239	string err = "Error in oneTwoMoreModel::Pij_t, too many iterations for t = " + double2string(d);
	240	//cerr<<diff<<endl;
	241	errorMsg::reportError(err);
	242	}
	243	}
	244	MDOUBLE val = convert(_lastPtCalculated[i][j]);
	245	if (!pijt_is_prob_value(val))
	246	errorMsg::reportError("Error in oneTwoMoreModel::Pij_t, pijt <0 or >1");
	247	if (val<0.0)
	248	val = EPSILON; // absolute zero creates a problem later on in computations
	249	if (val>1.0)
	250	val = 1.0;
	251	_bQchanged = false;
	252	return val;
	253	}
	254	//////////////////////////////////////////////////////////////////////////
	255	MDOUBLE oneTwoMoreModel::sumPijQij(){
	256	MDOUBLE sum=0.0;
	257	for (int i=0; i < _Q.size(); ++i) {
	258	sum -= (_Q[i][i])*_freq[i];
	259	}
	260	return sum;
	261	}
	262	//////////////////////////////////////////////////////////////////////////
	263	void oneTwoMoreModel::norm(const MDOUBLE scale)
	264	{
	265	for (int i=0; i < _Q.size(); ++i) {
	266	for (int j=0; j < _Q.size(); ++j) {
	267	_Q[i][j] *= scale;
	268	}
	269	}
	270	}

+133

-0

libs/phylogeny/oneTwoMoreModel.h less more

	0	#ifndef ___1_2_more_STATE_MODEL
	1	#define ___1_2_more_STATE_MODEL
	2
	3	#include "definitions.h"
	4	#include "replacementModel.h"
	5	#include "fromQtoPt.h"
	6	#include "errorMsg.h"
	7	#include "matrixUtils.h"
	8
	9	class oneTwoMoreModel : public replacementModel {
	10	public:
	11	explicit oneTwoMoreModel(const MDOUBLE m1, const MDOUBLE m2,
	12	const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting = true);
	13	oneTwoMoreModel(const oneTwoMoreModel& other) {*this = other;}
	14	virtual oneTwoMoreModel& operator=(const oneTwoMoreModel &other);
	15	virtual oneTwoMoreModel* clone() const { return new oneTwoMoreModel(*this); }
	16	virtual ~oneTwoMoreModel() {}
	17	const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
	18	const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
	19	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const ;
	20	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	21	if (d==0.0)
	22	return _Q[i][j];
	23	errorMsg::reportError("Error in oneTwoMoreModel, dPij_dt called");
	24	return 0.0; // not supposed to be here
	25	}
	26	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	27	errorMsg::reportError("Error in oneTwoMoreModel, d2Pij_dt2 called");
	28	return 0.0; // not supposed to be here
	29	}
	30	const MDOUBLE freq(const int i) const {
	31	if (i >= _freq.size())
	32	errorMsg::reportError("Error in oneTwoMoreModel::freq, i > size of frequency vector");
	33	return _freq[i];
	34	}
	35	const Vdouble getFreqs() const {return _freq;}
	36	void setFreq(const Vdouble &freq);
	37	void setMu1(const MDOUBLE val) ;
	38	void setMu2(const MDOUBLE val) ;
	39	void setMu3(const MDOUBLE val) ;
	40	void setMu4(const MDOUBLE val) ;
	41	const MDOUBLE getMu1() const {return _gain;}
	42	const MDOUBLE getMu2() const {return _more;}
	43	const MDOUBLE getMu3() const {return _less;}
	44	const MDOUBLE getMu4() const {return _loss;}
	45	void computeMarkovLimitingDistribution(); // compute P(infinity), which specifies the stationary distribution
	46	MDOUBLE sumPijQij();
	47	void norm(const MDOUBLE scale);
	48
	49	private:
	50	virtual void updateQ();
	51	void setEpsilonForZeroParams();
	52	bool checkIsNullModel();
	53	bool pijt_is_prob_value(MDOUBLE val) const;
	54	bool areFreqsValid(Vdouble freq) const; // tests if frequencies are valid (>0, sum=1)
	55
	56	private:
	57
	58	MDOUBLE _gain; // _Q[0][1] not _Q[0][2]
	59	MDOUBLE _more; // _Q[1][2]
	60	MDOUBLE _less; // _Q[2][1] not _Q[2][0]
	61	MDOUBLE _loss; // _Q[2][1]
	62	VVdouble _Q;
	63	Vdouble _freq;
	64	bool _useMarkovLimiting; // should the markov limiting distribution be used to estimate the root frequencies
	65	mutable bool _bQchanged; //indicates whether the Q matrix was changed after the last Pij_t call
	66	mutable MDOUBLE _lastTcalculated;
	67	mutable VVdouble _lastPtCalculated;
	68
	69
	70
	71	};
	72
	73	/*class gainLossModel : public replacementModel {
	74	public:
	75	explicit gainLossModel(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq);
	76	virtual replacementModel* clone() const { return new gainLossModel(*this); }
	77	gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;}
	78	virtual gainLossModel& operator=(const gainLossModel &other);
	79
	80	virtual ~gainLossModel() {if (_q2pt) delete _q2pt;}
	81	const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
	82	const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
	83	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	84	return _q2pt->Pij_t(i,j,d);
	85	}
	86	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	87	return _q2pt->dPij_dt(i,j,d);
	88	}
	89	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	90	return _q2pt->d2Pij_dt2(i,j,d);
	91	}
	92	const MDOUBLE freq(const int i) const {
	93	if (i >= _freq.size())
	94	errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector");
	95	return _freq[i];
	96	}
	97	void setMu1(const MDOUBLE val, bool isReversible=true) { _gain = val; updateQ(isReversible);}
	98	void setMu2(const MDOUBLE val,bool isReversible=true) { _more = val; updateQ(isReversible);}
	99	const MDOUBLE getMu1() const {return _gain;}
	100	const MDOUBLE getMu2() const {return _more;}
	101
	102
	103	protected:
	104	virtual void updateQ(bool isReversible=true);
	105	virtual void normalizeQ();
	106
	107
	108	protected:
	109	Vdouble _freq;
	110	MDOUBLE _gain;
	111	MDOUBLE _more;
	112	VVdouble _Q;
	113	q2pt *_q2pt;
	114
	115
	116
	117	};
	118	*/
	119	/*
	120	Q is a matrix of the following form:
	121
	122	0 1 01
	123	0 1-m1 0 m1
	124	1 0 1-m2 m2
	125	01 (filled in assuming reversibility)
	126
	127	i.e. no direct change from state 0 to state 1 is allowed
	128	*/
	129
	130	#endif // ___3STATE_MODEL
	131
	132

+291

-0

libs/phylogeny/optGammaMixtureEM.cpp less more

	0	#include "optGammaMixtureEM.h"
	1	#include "likelihoodComputation.h"
	2	#include "numRec.h"
	3	#include "uniDistribution.h"
	4
	5	#include <fstream>
	6	#include <algorithm>
	7	#include <ctime>
	8	using namespace std;
	9	using namespace likelihoodComputation;
	10
	11	optGammaMixtureEM::optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree)
	12	{
	13	_pSc = &sc;
	14	_pTree = &inTree;
	15	_pSp = new stochasticProcess(cur_sp);
	16	}
	17
	18	optGammaMixtureEM::~optGammaMixtureEM()
	19	{
	20	if (_pSp != NULL)
	21	{
	22	delete _pSp;
	23	_pSp = NULL;
	24	}
	25	}
	26
	27	///////////////////////////////////////////////////////////////////////////////////////////////////////////
	28	//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
	29	//The function starts form few starting points.
	30	//For each point it tries to optimize the likellihood doing only a small number of iterations.
	31	//It then picks the best points (highest likelihood) and continue the maximization for these points only.
	32	//The best gammaMixture is stored in _sp and the best likelihood is returned.
	33	//input Parameters:
	34	//startPointsNum = the number of starting points.
	35	//bestStartsNum = the number of best points to continue with the full optimization.
	36	//startIter = the number of iterations to perform with all starting points.
	37	//maxIterations = the maximum number of iterations to continue with the best points
	38	//epsilon = for determining convergence in the maximization process.
	39	MDOUBLE optGammaMixtureEM::findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF)
	40	{
	41	vector<mixtureDistribution> distVec;
	42	Vdouble likelihoodVec(startPointsNum);
	43	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	44	//create starting distributions
	45	int i;
	46	for (i = 0; i < startPointsNum; ++i)
	47	{
	48	//the first distribution will be the current one
	49	if (i == 0)
	50	distVec.push_back(*pMixture);
	51	else
	52	distVec.push_back(mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15));
	53	}
	54
	55	//make a small number of iterations for all random starts
	56	for (i = 0; i < distVec.size(); ++i)
	57	{
	58	likelihoodVec[i] = optimizeParam(&distVec[i], startIter, epsilon, epsilomQopt, pOutF);
	59	}
	60
	61	//sort results and make full optimization only on the best starts
	62	Vdouble sortedL = likelihoodVec;
	63	sort(sortedL.begin(),sortedL.end());
	64	MDOUBLE threshold = sortedL[sortedL.size()- bestStartsNum];
	65	MDOUBLE bestL = sortedL[0];
	66	int bestDistNum = 0;
	67	for (i = 0; i < distVec.size(); ++i)
	68	{
	69	if (likelihoodVec[i] >= threshold)
	70	{
	71	MDOUBLE newL = optimizeParam(&distVec[i], maxIterations, epsilon, epsilomQopt, pOutF);
	72	if (newL > bestL)
	73	{
	74	bestL = newL;
	75	bestDistNum = i;
	76	}
	77	}
	78	}
	79	_pSp->setDistribution(&distVec[bestDistNum]);
	80	distVec.clear();
	81	return bestL;
	82	}
	83
	84
	85	MDOUBLE optGammaMixtureEM::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF)
	86	{
	87	stochasticProcess inSp(pInDistribution, _pSp->getPijAccelerator());
	88	MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_pTree, _pSc, inSp, NULL);
	89
	90	/////compute piHomPos as in getTreeLikelihoodAllPosAlphTheSame
	91	//computePijGam pi;
	92	//pi.fillPij(*_pTree, inSp);
	93	//MDOUBLE res =0;
	94	//doubleRep LofPos;
	95	//int k;
	96	//for (k=0; k < _pSc->seqLen(); ++k)
	97	//{
	98	// doubleRep tmp=0;
	99	// for (int i=0; i < inSp.categories();++i)
	100	// {
	101	// tmp += getLofPos(k, _pTree, _pSc, pi[i], inSp)* inSp.ratesProb(i);
	102	// /MDOUBLE Pr = pDist->ratesProb(cat) likelihoodComputation::getLofPos(pos, _pTree, _pSc, cpgVec[comp][cat], spVec[comp]); */
	103	// }
	104	// LofPos = tmp;
	105	// res += log(LofPos);
	106	//}
	107	//
	108
	109
	110
	111
	112
	113
	114	//int componentNum = pInDistribution->getComponentsNum();
	115	////compute Pij for each component
	116	//vector<computePijGam> cpgVec(componentNum);
	117	//vector<stochasticProcess> spVec;
	118	//for (int comp = 0; comp < componentNum; ++comp) {
	119	// //create a local sp so to compute likelihoods of this component only
	120	// stochasticProcess compSp(pInDistribution->getComponent(comp), _pSp->getPijAccelerator());
	121	// cpgVec[comp].fillPij(*_pTree, compSp);
	122	// spVec.push_back(compSp);
	123	//}
	124
	125
	126
	127	//for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	128	//{
	129	// int comp;
	130	// for (comp = 0; comp < componentNum; ++comp)
	131	// {
	132	// const generalGammaDistribution* pDist = pInDistribution->getComponent(comp);
	133	// for (int cat=0; cat < pDist->categories(); ++cat)
	134	// {
	135	// doubleRep LofPos = likelihoodComputation::getLofPos(pos, _pTree, _pSc, cpgVec[comp][cat], spVec[comp]);
	136	// L2 += log(LofPos);
	137	// }
	138	// }
	139	//}
	140
	141
	142
	143	if (maxIterations == 0)
	144	{
	145	return curL;
	146	LOG(4,<<endl<<endl<<"starting Gamma Mixture EM optimization..."<<endl);
	147	printIter(inSp, 0, curL);
	148	}
	149
	150	MDOUBLE newL = curL;
	151	int it;
	152	for (it = 0; it < maxIterations; ++it)
	153	{
	154	stochasticProcess oldSp(inSp);
	155	maximizeGammaParam(&inSp, epsilomQopt);
	156	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_pTree, _pSc, inSp, NULL);
	157	if (newL < curL + epsilon)
	158	{
	159	//the improvemnt in Likelihood is smaller than epsilon
	160	if (newL < curL)
	161	{ //ERROR - L went Down!
	162	cerr<<"likelihood went down!"<<endl<<"oldL = "<<curL<<" newL= "<<newL<<" Diff= "<<newL-curL<<endl;
	163	if (pOutF != NULL) *pOutF <<"likelihood went down!"<<endl<<"oldL = "<<curL<<" newL= "<<newL<<endl;
	164	pInDistribution = (static_cast<mixtureDistribution*>(oldSp.distr()));
	165	if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
	166	printIter(inSp, it, curL);
	167	return curL;
	168	}
	169	else
	170	{
	171	cerr<<"converged!"<<endl;
	172	pInDistribution = (static_cast<mixtureDistribution*>(inSp.distr()));
	173	if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
	174	printIter(inSp, it, newL);
	175	return newL;
	176	}
	177	}
	178	cerr << "iter " << it <<": cur likelihood= " << curL <<" new likelihood= " << newL <<endl;
	179	curL = newL;
	180	}
	181
	182	pInDistribution = (static_cast<mixtureDistribution*>(inSp.distr()));
	183	if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
	184	printIter(inSp, it, newL);
	185	return newL;
	186	}
	187
	188
	189	void optGammaMixtureEM::maximizeGammaParam(stochasticProcess* pNewSp, MDOUBLE accuracyRtbis)
	190	{
	191	suffStatGammaMixture stats(pNewSp, _pSc, *_pTree);
	192	stats.computeStatistics();
	193	//cerr << "Q BEFORE IS: " << stats.computeQ()<<endl;
	194	maximizeGammaParam(stats, pNewSp, accuracyRtbis);
	195	//cerr << "Q AFTER IS: " << stats.computeQ()<<endl;
	196	}
	197
	198	void optGammaMixtureEM::maximizeGammaParam(const suffStatGammaMixture & stats,
	199	stochasticProcess* pNewSp, MDOUBLE accuracyRtbis)
	200	{
	201	MDOUBLE upperBoundAlpha = 15.0;
	202	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(pNewSp->distr());
	203	int numComponents = pMixture->getComponentsNum();
	204	Vdouble compProb(numComponents), alphaVec(numComponents), betaVec(numComponents);
	205	for (int k = 0; k < numComponents; ++k)
	206	{
	207	alphaVec[k] = findBestAlpha(stats, k, accuracyRtbis, upperBoundAlpha);
	208	betaVec[k] = alphaVec[k] * (stats.getMk(k) / stats.getAk(k));
	209	compProb[k] = stats.getMk(k) / _pSc->seqLen();
	210	}
	211	pMixture->setMixtureParameters(alphaVec, betaVec, compProb);
	212	}
	213
	214	void optGammaMixtureEM::printIter(const stochasticProcess& inSp, const int it, const MDOUBLE curL)
	215	{
	216	LOG(4, << "iter " << it <<": cur likelihood= " << curL <<endl);
	217	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(inSp.distr());
	218	for (int k = 0; k < pMixture->getComponentsNum(); ++k)
	219	{
	220	LOG(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
	221	}
	222	}
	223
	224
	225	//findBestAlpha: this function finds the alpha which is the root of the function C_evalAlphaEM().
	226	//BUT - if there is no root in the range (lowerBoundAlpha, upperBoundAlpha)
	227	//or - the root is higher than upperBoundAlpha - the function returns upperBoundAlpha
	228	MDOUBLE optGammaMixtureEM::findBestAlpha(const suffStatGammaMixture& stats, const int compNum, const MDOUBLE accuracyRtbis, const MDOUBLE upperBoundAlpha) const
	229	{
	230	MDOUBLE res = upperBoundAlpha;
	231	MDOUBLE lowerBoundAlpha = MINIMUM_ALPHA_PARAM;
	232	MDOUBLE upperRange = upperBoundAlpha;
	233	MDOUBLE lowerRange = lowerBoundAlpha;
	234	bool haveRoot = zbrac(C_evalAlphaEM(stats, compNum), lowerRange, upperRange);
	235	if (haveRoot == true)
	236	res = rtbis(C_evalAlphaEM(stats, compNum), lowerRange, upperRange, accuracyRtbis); ;
	237	if (res > upperBoundAlpha)
	238	res = upperBoundAlpha;
	239	else if (res < lowerBoundAlpha)
	240	res = lowerBoundAlpha;
	241	return res;
	242	}
	243
	244
	245	void optGammaMixtureEM::checkEntropy(stochasticProcess & oldSp, stochasticProcess & newSp)
	246	{
	247	//the entropy is
	248	//sigma_r P(r\|D,oldSp)*log(P(r\|D,oldSp) / P(r\|D,newSp))
	249	//VVdouble posteriorBefore,posteriorAfter ;
	250	//likelihoodComputation::getPosteriorOfRates(_pTree, _pSc, oldSp, posteriorBefore, NULL);
	251	//likelihoodComputation::getPosteriorOfRates(_pTree, _pSc, newSp, posteriorAfter, NULL);
	252
	253
	254	//MDOUBLE entropyAll = 0.0;
	255	//MDOUBLE secondTerm= 0.0;
	256	//for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	257	//{
	258	// MDOUBLE entropyPos = 0.0;
	259	// for (int cat = 0; cat < oldSp.categories(); ++cat)
	260	// {
	261	// entropyPos += posteriorBefore[pos][cat] * log(posteriorBefore[pos][cat] / posteriorAfter[pos][cat]);
	262	// secondTerm += posteriorBefore[pos][cat] * log(posteriorAfter[pos][cat]);
	263	// }
	264	// entropyAll += entropyPos;
	265	// //cerr <<"Pos Entropy = "<<entropyPos<<endl;
	266	//}
	267	//cerr <<endl<<endl<<endl;
	268	//cerr <<"All Entropy = "<<entropyAll<<endl;
	269
	270
	271	//calculating Q
	272	//MDOUBLE QAll = 0.0;
	273	//for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	274	//{
	275	// MDOUBLE QPos = 0.0;
	276	// for (int cat = 0; cat < oldSp.categories(); ++cat)
	277	// {
	278	// stochasticProcess localSp(&uniDistribution(), newSp.getPijAccelerator());
	279	// MDOUBLE rate = newSp.rates(cat);
	280	// MDOUBLE L_after = likelihoodComputation::getLofPos(pos, _pTree, _pSc, localSp, rate);
	281	// QPos += posteriorBefore[pos][cat] * log(L_after * newSp.ratesProb(cat));
	282	// }
	283	// QAll += QPos;
	284	// //cerr <<"Pos Q = "<<QPos<<endl;
	285	//}
	286	//cerr <<endl<<endl<<endl;
	287	//cerr <<"Q ALL= "<<QAll<<endl;
	288	//cerr <<"secondTerm = "<<secondTerm<<endl;
	289
	290	}

+102

-0

libs/phylogeny/optGammaMixtureEM.h less more

	0	#ifndef ___OPT_GAMMA_MIXTURE_EM
	1	#define ___OPT_GAMMA_MIXTURE_EM
	2	/************************************************************
	3	optGammaMixtureEM class is used to maximize the gammaMixture parameters.
	4	The parameters to otimized are the alpha and beta of each component and the components probabilities.
	5	In each iteration:
	6	(1) The sufficient statistics are calculated.
	7	(2) Based on these statistics the parameters are optimized.
	8	the procedure stops when no improvment in the tree likelihood is achieved
	9	************************************************************/
	10	#include "definitions.h"
	11	#include "suffStatGammaMixture.h"
	12	#include "stochasticProcess.h"
	13	#include "sequenceContainer.h"
	14	#include "tree.h"
	15	#include "gammaUtilities.h"
	16
	17	#include <cmath>
	18
	19	class optGammaMixtureEM{
	20
	21	public:
	22	explicit optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree);
	23	virtual ~optGammaMixtureEM();
	24
	25	//return the logLikelihood. the final distribution is stored in the stochasticProcess
	26	MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF);
	27
	28	const stochasticProcess* getSp() const {return _pSp;}
	29
	30	MDOUBLE findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF = NULL);
	31
	32
	33	void maximizeGammaParam(stochasticProcess* pNewSp, MDOUBLE accuracy);
	34	void maximizeGammaParam(const suffStatGammaMixture & stats, stochasticProcess* pNewSp, MDOUBLE accuracy);
	35	private:
	36	void printIter(const stochasticProcess& pInSp, const int it, const MDOUBLE curL);
	37
	38
	39	MDOUBLE findBestAlpha(const suffStatGammaMixture& stats, const int compNum, const MDOUBLE accuracy, const MDOUBLE upperBoundAlpha) const;
	40
	41	void checkEntropy(stochasticProcess & oldSp, stochasticProcess & inSp);
	42
	43
	44	private:
	45	stochasticProcess* _pSp;
	46	const sequenceContainer* _pSc;
	47	const tree* _pTree;
	48	};
	49
	50
	51
	52
	53	class C_evalAlphaEM{
	54	public:
	55	explicit C_evalAlphaEM(const suffStatGammaMixture& stats, const int compNum)
	56	:_compNum(compNum) {_pStats = &stats;}
	57
	58	public:
	59	MDOUBLE operator() (const MDOUBLE x)
	60	{
	61	MDOUBLE Ak = _pStats->getAk(_compNum);
	62	MDOUBLE Bk = _pStats->getBk(_compNum);
	63	MDOUBLE Mk = _pStats->getMk(_compNum);
	64
	65	MDOUBLE res = log(x) - gammaDerivative(x) + log(Mk) - log(Ak) + (Bk / Mk);
	66	//cerr<<"+++++++ x = "<<x<<" Ak = "<<Ak<<" Bk = "<<Bk<<" Mk = "<<Mk<<" RES = "<<res<<endl;
	67	// when x is beta (checking)
	68	// MDOUBLE res = Mk * log(x) - Mk * diGamma(Ak * x / Mk) + Bk;
	69	return res;
	70	}
	71
	72	private:
	73	MDOUBLE diGammaPlus(MDOUBLE x) const
	74	{
	75	MDOUBLE res1 = log(x) + (1/(2x)) - (1/(12xx)) + (1/(120pow(x, 4))) - (1/(252*pow(x, 6)));
	76	MDOUBLE res = log(x) + (0.5/x) - (0.083333333333333333333333333333333/(xx)) + (0.0083333333333333333333333333333333/(xxxx)) - (0.003968253968253968253968253968254/(pow(x, 6)));
	77	return res;
	78	}
	79	MDOUBLE diGamma(MDOUBLE x) const
	80	{
	81	//if x<1: use the identity digamma(Z) = digamma(z+1)- (1/z) see http://mathworld.wolfram.com/DigammaFunction.html
	82	if (x < 1)
	83	return (diGamma(x+1) - (1.0 / x));
	84	MDOUBLE res = log(x) - (1/(2x)) - (1/(12xx)) + (1/(120pow(x, 4))) - (1/(252*pow(x, 6)));
	85	//using more terms in the series expansion:
	86	MDOUBLE debugRes = log(x) - (1/(2x)) - (1/(12xx)) + (1/(120pow(x, 4))) - (1/(252pow(x, 6))) + (1/(240pow(x, 8))) - (1/(132*pow(x, 10)));
	87	return res;
	88	}
	89
	90	MDOUBLE gammaDerivative(MDOUBLE x) const
	91	{
	92	//MDOUBLE resCheck = (gammln(x+0.001) - gammln(x)) /0.001;
	93	MDOUBLE res = diGamma(x);
	94	return res;
	95	}
	96	private:
	97	const suffStatGammaMixture* _pStats;
	98	const int _compNum;
	99	};
	100	#endif
	101

+262

-0

libs/phylogeny/optGammaMixtureLS.cpp less more

	0	#include "optGammaMixtureLS.h"
	1	#include "likelihoodComputation.h"
	2	#include "numRec.h"
	3	//#include "optimizer.h"
	4	//#include "NRconjugateGradient.h"
	5
	6	#include <fstream>
	7	#include <algorithm>
	8	#include <ctime>
	9	using namespace std;
	10	using namespace likelihoodComputation;
	11
	12	optGammaMixtureLS::optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha/=15.0/, MDOUBLE upperBoundBeta/=15.0/,unObservableData* unObservableData_p)
	13	{
	14	_pSc = &sc;
	15	_pTree = &inTree;
	16	_pSp = pSp;
	17	_upperBoundAlpha = upperBoundAlpha;
	18	_upperBoundBeta = upperBoundBeta;
	19	_unObservableData_p = unObservableData_p;
	20	}
	21
	22
	23	optGammaMixtureLS::~optGammaMixtureLS()
	24	{
	25	}
	26
	27	MDOUBLE optGammaMixtureLS::optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType)
	28	{
	29	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	30	return optimizeParam(pMixture, maxIterations, tol, pWeights, optType);
	31	}
	32
	33
	34	MDOUBLE optGammaMixtureLS::optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType)
	35	{
	36	switch (optType)
	37	{
	38	case ONE_DIM:
	39	return optimizeParamOneDim(pMixture, maxIterations, tol, pWeights);
	40	break;
	41	//case POWELL:
	42	// return optimizeParamPowell(pMixture, maxIterations, tol, pWeights, pOutF);
	43	// break;
	44	//case CONJUGATE_DERIVATIVES:
	45	// return optimizeParamConjugateDeriv(pMixture, maxIterations, tol, pWeights, pOutF);
	46	// break;
	47	default:
	48	errorMsg::reportError("unknown optimization algorithm in optGammaMixtureLS::optimizeParam()");
	49	return -1;
	50	}
	51	}
	52
	53
	54	//this function finds the best mixture param using a line search maximization. Each time only one parameter is optimized using the regular brent algorithm.
	55	//CAN BE USED FOR 2 COMPONENTS ONLY (the maximization on components probabilities maximize only P1, the prob of the first component, while the prob of the second is set to 1-P1)
	56	// ...Note: if more than 2 components, all the others are scaled by P1
	57	//total there are 5 parameters to optimize: alpha1, beta1, alpha2, beta2, and P1
	58	MDOUBLE optGammaMixtureLS::optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights)
	59	{
	60	MDOUBLE lowerBound = 0.0;
	61
	62	MDOUBLE newL = VERYSMALL; //newL is the LL after a single param optimization.
	63	//MDOUBLE curL = VERYSMALL; //the current LL.
	64	MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_pTree,_pSc,*_pSp,pWeights,_unObservableData_p); //the current LL.
	65	MDOUBLE prevIterL = VERYSMALL; //The LL of the previous iteration. the loop quit if the increase in LL between iterations is smaller than tol
	66	MDOUBLE bestA=0, bestB=0, bestW = 0;
	67
	68	for (int it = 0; it < maxIterations; ++it)
	69	{
	70	//prevIterL = newL;
	71	prevIterL = curL;
	72
	73	for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	74	{
	75	//optimize alpha
	76	MDOUBLE oldAlpha = pMixture->getAlpha(comp);
	77	newL = -brent(lowerBound,oldAlpha, _upperBoundAlpha, C_evalAlphaMixture(_pTree,_pSc,_pSp,comp,pWeights,_unObservableData_p), tol, &bestA);
	78	if (newL < curL)
	79	{
	80	//the Likelihood wend down
	81	pMixture->setAlpha(oldAlpha, comp);
	82	if(_unObservableData_p){
	83	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	84	}
	85	LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
	86	}
	87	else
	88	{
	89	pMixture->setAlpha(bestA, comp);
	90	if(_unObservableData_p){
	91	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	92	}
	93	curL = newL;
	94	LOG(7, <<"iteration: "<<it<<" Optimize alpha comp"<<comp<<" new Likelihood = "<<curL<<endl);
	95	}
	96
	97	//optimize beta
	98	MDOUBLE oldBeta = pMixture->getBeta(comp);
	99	newL = -brent(lowerBound,oldBeta,_upperBoundBeta, C_evalBetaMixture(_pTree,_pSc,_pSp,comp,pWeights,_unObservableData_p), tol, &bestB);
	100	if (newL < curL)
	101	{
	102	//the Likelihood wend down
	103	pMixture->setBeta(oldBeta, comp);
	104	if(_unObservableData_p){
	105	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	106	}
	107	LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
	108	}
	109	else
	110	{
	111	pMixture->setBeta(bestB, comp);
	112	if(_unObservableData_p){
	113	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	114	}
	115	curL = newL;
	116	LOG(7, <<"iteration: "<<it<<" Optimize beta comp"<<comp<<" new Likelihood = "<<curL<<endl);
	117	}
	118	//optimize components probability.
	119	if (pMixture->getComponentsNum() == 1)
	120	continue;
	121
	122	MDOUBLE upperBound = 0.0;
	123	MDOUBLE lowerBound = 1.0;
	124	MDOUBLE oldWeight = pMixture->getComponentWeight(comp);
	125	newL = -brent(lowerBound, oldWeight, upperBound, C_evalProbMixture(_pTree,_pSc, _pSp, comp, pWeights), tol, &bestW);
	126	if (newL < curL)
	127	{
	128	//the Likelihood wend down
	129	pMixture->setComponentWeight(oldWeight, comp);
	130	if(_unObservableData_p){
	131	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	132	}
	133	LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
	134	}
	135	else
	136	{
	137	pMixture->setComponentWeight(bestW, comp);
	138	if(_unObservableData_p){
	139	_unObservableData_p->setLforMissingData(*_pTree,_pSp);
	140	}
	141	curL = newL;
	142	LOG(7, <<"iteration: "<<it<<" Optimize Prob"<<" new Likelihood = "<<curL<<endl);
	143	}
	144	}
	145	pMixture->normalizeProbabilities(); // why again ???
	146	printIter(pMixture, it, curL);
	147	if (curL < prevIterL + tol){
	148	//if(_unObservableData_p){
	149	// _unObservableData_p->setLforMissingData(*_pTree,_pSp);
	150	//}
	151	return max(curL,prevIterL); // not to reduce likelihood
	152	}
	153	}
	154	return curL;
	155	}
	156
	157
	158
	159	/*
	160	//this function uses a line search maximization. The difference is that it does not use the naive method (optimize each parameter seperatly untill convergence)
	161	//but uses Powel's quadratically convergent method (Numerical Recipes pp 420).
	162	//CAN BE USED FOR 2 COMPONENTS ONLY (the maximization on components probabilities maximize only P1, the prob of the first component, while the prob of the second is set to 1-P1)
	163	//total there are 5 parameters to optimize: alpha1, beta1, alpha2, beta2, and P1
	164	MDOUBLE optGammaMixtureLS::optimizeParamPowell(mixtureDistribution* pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	165	{
	166	if (pMixture->getComponentsNum() == 1)
	167	return optimizeParam1CompPowel(pMixture, maxIterations, tol, pWeights, pOutF);
	168	else return optimizeParamManyCompPowel(pMixture, maxIterations, tol, pWeights, pOutF);
	169	}
	170
	171
	172	MDOUBLE optGammaMixtureLS::optimizeParam1CompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	173	{
	174	tree tree1(*_pTree);
	175	sequenceContainer sc1(*_pSc);
	176
	177	C_evalGammaMixture optPowell(&tree1, &sc1, _pSp, NULL);
	178	optimizer<C_evalGammaMixture> opt(optPowell);
	179	Vdouble param(2);
	180	param[0] = pMixture->getAlpha(0);
	181	param[1] = pMixture->getBeta(0);
	182
	183	MDOUBLE res = opt.findmin(param);
	184	return res;
	185	}
	186
	187	MDOUBLE optGammaMixtureLS::optimizeParamManyCompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	188	{
	189	tree tree1(*_pTree);
	190	sequenceContainer sc1(*_pSc);
	191
	192	Vdouble param(pMixture->getComponentsNum() * 3 - 1);
	193	int paramNum = 0;
	194	for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	195	{
	196	param[paramNum++] = pMixture->getAlpha(comp);
	197	param[paramNum++] = pMixture->getBeta(comp);
	198	param[paramNum++] = pMixture->getComponentWeight(comp);
	199	}
	200	C_evalGammaMixture optPowell(&tree1, &sc1, _pSp, NULL);
	201	optimizer<C_evalGammaMixture> opt(optPowell);
	202	MDOUBLE res = opt.findmin(param);
	203	cerr <<"optimized Powell result = "<< res<<endl;
	204	return res;
	205	}
	206	*/
	207
	208	/*
	209	MDOUBLE optGammaMixtureLS::optimizeParamConjugateDeriv(
	210	mixtureDistribution * pMixture, const int maxIterations,
	211	const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
	212	{
	213	tree tree1(*_pTree);
	214	sequenceContainer sc1(*_pSc);
	215
	216	Vdouble param(pMixture->getComponentsNum() * 3);
	217	int paramNum = 0;
	218	int comp;
	219	for (comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	220	{
	221	param[paramNum++] = pMixture->getAlpha(comp);
	222	param[paramNum++] = pMixture->getBeta(comp);
	223	param[paramNum++] = pMixture->getComponentWeight(comp);
	224	}
	225	C_evalGammaMixture func(&tree1, &sc1, _pSp, pWeights);
	226	NRconjugateGradient<C_evalGammaMixture> opt;
	227	if (pOutF != NULL)
	228	{
	229	*pOutF <<endl<<endl<<"starting NRconjugateGradient optimization..."<<endl;
	230	printIter(pMixture, 0, 0.0, pOutF);
	231	}
	232
	233	MDOUBLE res = opt.findmin(param, &func, tol);
	234
	235	paramNum = 0;
	236	for (comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	237	{
	238	pMixture->setAlpha(param[paramNum++], comp);
	239	pMixture->setBeta(param[paramNum++], comp);
	240	pMixture->setComponentWeight(param[paramNum++], comp);
	241	}
	242	pMixture->normalizeProbabilities();
	243	if (pOutF != NULL)
	244	{
	245	*pOutF <<endl<<endl<<"after NRconjugateGradient optimization"<<endl;
	246	printIter(pMixture, 0, res, pOutF);
	247	}
	248	cerr <<"optimized Conjugate Deriv result = "<< res<<endl;
	249	return res;
	250	}
	251	*/
	252
	253
	254	void optGammaMixtureLS::printIter(const mixtureDistribution * pMixture, const int it, const MDOUBLE curL)
	255	{
	256	LOG(4,<< "iter " << it <<": cur likelihood= " << curL <<endl);
	257	for (int k = 0; k < pMixture->getComponentsNum(); ++k)
	258	{
	259	LOG(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
	260	}
	261	}

+280

-0

libs/phylogeny/optGammaMixtureLS.h less more

	0	#ifndef ___OPT_GAMMA_MIXTURE_LS
	1	#define ___OPT_GAMMA_MIXTURE_LS
	2	/************************************************************
	3	optGammaMixtureLS class is used to maximize the gammaMixture parameters via a line search maximization.
	4	The parameters to otimized are the alpha and beta of each component and the components probabilities.
	5	In each iteration:
	6	optimized all parameters iteratively
	7	The procedure stops when no improvment in the tree likelihood is achieved
	8	************************************************************/
	9	#include "definitions.h"
	10	#include "suffStatGammaMixture.h"
	11	#include "stochasticProcess.h"
	12	#include "sequenceContainer.h"
	13	#include "tree.h"
	14	#include "gammaUtilities.h"
	15	#include "likelihoodComputation.h"
	16	#include "unObservableData.h"
	17
	18
	19
	20	#include <cmath>
	21
	22	class optGammaMixtureLS{
	23	public:
	24	enum optAlg {ONE_DIM/, POWELL, CONJUGATE_DERIVATIVES/};
	25
	26	public:
	27	explicit optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha =15.0, MDOUBLE upperBoundBeta =15.0, unObservableData* unObservableData_p=NULL);
	28	virtual ~optGammaMixtureLS();
	29
	30	//return the logLikelihood. the final distribution is stored in the stochasticProcess
	31	MDOUBLE optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType);
	32	MDOUBLE optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType);
	33
	34
	35	private:
	36	void printIter(const mixtureDistribution * pMixture, const int it, const MDOUBLE curL);
	37
	38	MDOUBLE optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights);
	39	//MDOUBLE optimizeParamPowell(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
	40	//MDOUBLE optimizeParamConjugateDeriv(mixtureDistribution *pMixture,
	41	// const int maxIterations, const MDOUBLE tol, const Vdouble pWeights, ofstream pOutF);
	42
	43	//MDOUBLE optimizeParam1CompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
	44	//MDOUBLE optimizeParamManyCompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
	45
	46	private:
	47	stochasticProcess* _pSp;
	48	const sequenceContainer* _pSc;
	49	const tree* _pTree;
	50	unObservableData* _unObservableData_p;
	51
	52	MDOUBLE _upperBoundAlpha;
	53	MDOUBLE _upperBoundBeta;
	54	};
	55
	56
	57
	58
	59	//line search classes for brent
	60	class C_evalAlphaMixture{
	61	public:
	62	C_evalAlphaMixture(const tree& et,
	63	const sequenceContainer& sc,
	64	stochasticProcess* pSp,
	65	const int componetNumber,
	66	const Vdouble * weights = NULL,
	67	unObservableData* unObservableData_p=NULL)
	68	: _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
	69	{
	70	if(unObservableData_p)
	71	_unObservableData_p = unObservableData_p->clone();
	72	else
	73	_unObservableData_p = NULL;
	74	};
	75	virtual ~C_evalAlphaMixture(){
	76	if(_unObservableData_p) delete _unObservableData_p;
	77	}
	78
	79	private:
	80	const tree& _et;
	81	const sequenceContainer& _sc;
	82	const Vdouble * _weights;
	83	unObservableData* _unObservableData_p;
	84	stochasticProcess* _pSp;
	85	const int _compNum;
	86	public:
	87	MDOUBLE operator() (MDOUBLE alpha) {
	88	if (_pSp->categories() == 1) {
	89	errorMsg::reportError(" one category when trying to optimize alpha");
	90	}
	91	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	92	pMixture->setAlpha(alpha, _compNum);
	93	if(_unObservableData_p){
	94	_unObservableData_p->setLforMissingData(_et,_pSp);
	95	}
	96	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
	97	#ifdef VERBOS
	98	cerr<<"Component = "<<_compNum<<" with alpha = "<<alpha<<" logL = "<<res<<endl;
	99	#endif
	100	return -res;
	101	}
	102	};
	103
	104
	105	//////////////////////////////////////////////////////////////////////////
	106	class C_evalBetaMixture{
	107	public:
	108	C_evalBetaMixture(const tree& et,
	109	const sequenceContainer& sc,
	110	stochasticProcess* pSp,
	111	const int componetNumber,
	112	const Vdouble * weights = NULL,
	113	unObservableData* unObservableData_p=NULL)
	114	: _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
	115	{
	116	if(unObservableData_p)
	117	_unObservableData_p = unObservableData_p->clone();
	118	else
	119	_unObservableData_p = NULL;
	120	};
	121	virtual ~C_evalBetaMixture(){
	122	if(_unObservableData_p) delete _unObservableData_p;
	123	}
	124
	125	private:
	126	const tree& _et;
	127	const sequenceContainer& _sc;
	128	const Vdouble * _weights;
	129	unObservableData* _unObservableData_p;
	130	stochasticProcess* _pSp;
	131	const int _compNum;
	132	public:
	133	MDOUBLE operator() (MDOUBLE beta) {
	134	if (_pSp->categories() == 1) {
	135	errorMsg::reportError(" one category when trying to optimize beta");
	136	}
	137	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	138	pMixture->setBeta(beta, _compNum);
	139	if(_unObservableData_p){
	140	_unObservableData_p->setLforMissingData(_et,_pSp);
	141	}
	142	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
	143	#ifdef VERBOS
	144	cerr<<"Component = "<<_compNum<<" with beta = "<<beta<<" logL = "<<res<<endl;
	145	#endif
	146	return -res;
	147	}
	148	};
	149
	150
	151	class C_evalProbMixture{
	152	public:
	153	C_evalProbMixture(const tree& et,
	154	const sequenceContainer& sc,
	155	stochasticProcess* pSp,
	156	const int componetNumber,
	157	const Vdouble * weights = NULL,
	158	unObservableData* unObservableData_p=NULL)
	159	: _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
	160	{
	161	if(unObservableData_p)
	162	_unObservableData_p = unObservableData_p->clone();
	163	else
	164	_unObservableData_p = NULL;
	165	}
	166	virtual ~C_evalProbMixture(){
	167	if(_unObservableData_p) delete _unObservableData_p;
	168	}
	169
	170	private:
	171	const tree& _et;
	172	const sequenceContainer& _sc;
	173	const Vdouble * _weights;
	174	stochasticProcess* _pSp;
	175	const int _compNum;
	176	unObservableData* _unObservableData_p;
	177	public:
	178	MDOUBLE operator() (MDOUBLE w) {
	179	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	180	pMixture->setComponentWeight(w, _compNum);
	181	if(_unObservableData_p){
	182	_unObservableData_p->setLforMissingData(_et,_pSp);
	183	}
	184	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
	185	return -res;
	186	}
	187	};
	188
	189
	190	/*
	191	//the function to optimize using the conjugate Gradient algorithm
	192	class C_evalGammaMixture {
	193	public:
	194	C_evalGammaMixture(tree* pT,
	195	sequenceContainer* pSc,
	196	stochasticProcess* pSp,
	197	const Vdouble * weights = NULL,
	198	const MDOUBLE gradEps = 0.001)
	199	: _pTree(pT),_pSc(pSc),_pWeights(weights),_pSp(pSp), _gradEpsilon(gradEps)
	200	{};
	201
	202
	203	C_evalGammaMixture() {}
	204
	205	C_evalGammaMixture& operator= (const C_evalGammaMixture &other)
	206	{
	207	_pTree = other._pTree;
	208	_pSc = other._pSc;
	209	_pWeights = other._pWeights;
	210	_pSp = other._pSp;
	211	_gradEpsilon = other._gradEpsilon;
	212	return *this;
	213	}
	214
	215	MDOUBLE operator () (Vdouble &param){
	216	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	217
	218	int paramNum = 0;
	219	for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	220	{
	221	pMixture->setAlpha(param[paramNum++], comp);
	222	pMixture->setBeta(param[paramNum++], comp);
	223	pMixture->setComponentWeight(param[paramNum++], comp);
	224	}
	225	pMixture->normalizeProbabilities();
	226
	227	if (checkOutOfBounds(pMixture) == true)
	228	return 1000000000;
	229
	230	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_pTree,_pSc,*_pSp,_pWeights);
	231	return -res;
	232	}
	233
	234	void dfunc(const Vdouble &paramsIn, Vdouble& grads){
	235	if (paramsIn.size() != grads.size())
	236	errorMsg::reportError("C_evalGammaMixture::dfunc(): vectors of prameters and gradients are not the same size");
	237	Vdouble myx = paramsIn; // temporary vector, since x is const.
	238
	239	// calc the likelihood at the current point
	240	MDOUBLE fa = (*this)(myx);
	241
	242	// then calc likelihood at param+deltah for each param to approximate the derivative.
	243	int curParam;
	244	for(curParam=0; curParam < paramsIn.size(); curParam++)
	245	{
	246	myx[curParam] += _gradEpsilon;
	247	MDOUBLE fb = (*this)(myx);
	248	grads[curParam] = (fb - fa)/_gradEpsilon;
	249	myx[curParam] -= _gradEpsilon;
	250	}
	251	}
	252
	253	private:
	254	bool checkOutOfBounds(mixtureDistribution * pMixture) {
	255	for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
	256	{
	257	if ((pMixture->getAlpha(comp) >= 15) \|\| (pMixture->getAlpha(comp) <= 0.05))
	258	return true;
	259	if ((pMixture->getBeta(comp) >= 15) \|\| (pMixture->getBeta(comp) <= 0.05))
	260	return true;
	261	if ((pMixture->getComponentProb(comp) > 1.0) \|\| (pMixture->getComponentProb(comp) < 0.0))
	262	return true;
	263	}
	264	return false;
	265	}
	266
	267	private:
	268	tree* _pTree;
	269	sequenceContainer* _pSc;
	270	const Vdouble * _pWeights;
	271	stochasticProcess* _pSp;
	272	MDOUBLE _gradEpsilon; //the epsilon to calculate the gradiante
	273	};
	274	*/
	275
	276
	277
	278	#endif
	279

+37

-0

libs/phylogeny/pDistance.h less more

	0	// $Id: pDistance.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___P_DISTANCE
	3	#define ___P_DISTANCE
	4
	5	#include "definitions.h"
	6	#include "distanceMethod.h"
	7	/*********************************************************
	8	p distance computes distance by counting number of differences and dividing by length of seq.
	9	Weights are an input vector for giving additional weight to positions in the sequences.
	10	*******************************************************/
	11	class pDistance : public distanceMethod {
	12	public:
	13	explicit pDistance(){}
	14	const MDOUBLE giveDistance( const sequence& s1,
	15	const sequence& s2,
	16	const vector<MDOUBLE> * weights,
	17	MDOUBLE* score=NULL) const {//score is not used here
	18	MDOUBLE p =0;
	19	if (weights == NULL) {
	20	for (int i = 0; i < s1.seqLen() ; ++i) if (s1[i] != s2[i]) p++;
	21	p = p/s1.seqLen();
	22	} else {
	23	MDOUBLE len=0;
	24	for (int i = 0; i < s1.seqLen() ; ++i) {
	25	len +=((*weights)[i]);
	26	if (s1[i] != s2[i]) p+=((*weights)[i]);
	27	}
	28	p = p/len;
	29	}
	30	return p;
	31	}
	32	virtual pDistance* clone() const {return new pDistance(*this);}
	33
	34	};
	35
	36	#endif

+158

-0

libs/phylogeny/pairwiseGammaDistance.cpp less more

	0	// $Id: pairwiseGammaDistance.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "pairwiseGammaDistance.h"
	3	#include "numRec.h"
	4	#include "countTableComponent.h"
	5	#include "likeDist.h"
	6	#include "uniDistribution.h"
	7	#include <cmath>
	8
	9	// Local utility functions
	10	MDOUBLE pairwiseGammaDistance::giveInitialGuessOfDistance(
	11	const sequence& s1,
	12	const sequence& s2,
	13	const vector<MDOUBLE> * weights,
	14	MDOUBLE* score) const {
	15	uniDistribution ud;
	16	stochasticProcess uniSp(&ud,_sp.getPijAccelerator());
	17	likeDist ld(uniSp);
	18	return (ld.giveDistance(s1,s2,weights,score));
	19	}
	20
	21	class C_eval_gammaMLAlpha{
	22	private:
	23	const stochasticProcess& _sp;
	24	const sequence& _s1;
	25	const sequence& _s2;
	26	const MDOUBLE _distance;
	27	const Vdouble* _weights;
	28	// const VVdouble& _posteriorProb; // pos, rate
	29	public:
	30	C_eval_gammaMLAlpha(const stochasticProcess& sp,
	31	const sequence& s1,
	32	const sequence& s2,
	33	const MDOUBLE distance,
	34	// const VVdouble& posteriorProb,
	35	const Vdouble * weights): _sp(sp),
	36	_s1(s1),
	37	_s2(s2),
	38	_distance(distance),
	39	_weights(weights)
	40	// _posteriorProb(posteriorProb)
	41	{};
	42
	43	// this cast is required as the distribution within the
	44	// stochasticProcess is kept as the parent "distribution" class that
	45	// knows nothing of Alpha
	46	void setAlpha(MDOUBLE alpha) {
	47	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	48	}
	49
	50
	51	MDOUBLE operator() (MDOUBLE alpha) {
	52	setAlpha(alpha);
	53	MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights);
	54	LOG(11,<<"check alpha="<<alpha<<", bl="<<_distance<<" gives "<<likelihood<<endl);
	55	return -likelihood;
	56	};
	57	};
	58
	59	// returns the best alpha for a given distance
	60	MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(const sequence& s1,
	61	const sequence& s2,
	62	stochasticProcess & sp,
	63	const MDOUBLE branchL,
	64	const vector<MDOUBLE> * weights,
	65	MDOUBLE* score) const { // changes sp.
	66	MDOUBLE bestA=0.0;
	67	MDOUBLE bestQ=0.0;
	68	const MDOUBLE upperBoundOnAlpha = 15.0;
	69	const MDOUBLE epsilonAlphaOptimization = 0.01;
	70	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	71	const MDOUBLE bx=cx*0.3;
	72	const MDOUBLE ax=0.0;
	73
	74
	75	bestQ = -brent(ax,bx,cx,
	76	C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights),
	77	epsilonAlphaOptimization,
	78	&bestA);
	79	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	80	if (score) *score = bestQ;
	81	return bestA;
	82	}
	83
	84	class C_evalAlphaForPairOfSeq{
	85	private:
	86	const countTableComponentGam& _ctc;
	87	stochasticProcess& _sp;
	88	const MDOUBLE _branchL;
	89	public:
	90	C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc,
	91	const MDOUBLE branchL,
	92	stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {};
	93
	94	MDOUBLE operator() (MDOUBLE alpha) {
	95	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	96	C_evalLikeDist cev(_ctc,_sp);
	97	MDOUBLE L=cev(_branchL);
	98	LOG(10,<<"check alpha="<<alpha<<", bl="<<_branchL<<" gives "<<L<<endl);
	99	return L;
	100	};
	101	};
	102
	103	// returns the best alpha for a given distance
	104	MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(stochasticProcess & sp,
	105	const countTableComponentGam & ctc,
	106	const MDOUBLE branchL,
	107	const vector<MDOUBLE> * weights,
	108	MDOUBLE* score) const { // changes sp.
	109	MDOUBLE bestA=0.0;
	110	MDOUBLE bestQ=0.0;
	111	const MDOUBLE upperBoundOnAlpha = 15.0;
	112	const MDOUBLE epsilonAlphaOptimization = 0.01;
	113	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	114	const MDOUBLE bx=cx*0.3;
	115	const MDOUBLE ax=0.0;
	116
	117
	118	bestQ = -brent(ax,bx,cx,
	119	C_evalAlphaForPairOfSeq(ctc,branchL,sp),
	120	epsilonAlphaOptimization,
	121	&bestA);
	122	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	123	if (score) *score = bestQ;
	124	return bestA;
	125	}
	126
	127	const MDOUBLE pairwiseGammaDistance::giveDistance(const sequence& s1,
	128	const sequence& s2,
	129	const vector<MDOUBLE> * weights,
	130	MDOUBLE* score,
	131	MDOUBLE* alpha) const {
	132
	133	MDOUBLE resL = 0.0;
	134	MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
	135
	136	countTableComponentGam ctc; // from technical reasons.
	137
	138	stochasticProcess tmpSp(_sp);
	139
	140	const int maxIter = 30;
	141	MDOUBLE newDist = 0.0;
	142	MDOUBLE lastBestAlpha = 0.0;
	143	for (int i=0; i < maxIter; ++i) {
	144	lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp.
	145	LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<"\t L="<<resL<<"\t");
	146	likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
	147	newDist = tmpld.giveDistance(s1, s2, weights, &resL);
	148	LOG(8,<<"dist="<<newDist<<"(L="<<resL<<")"<<endl);
	149	if (fabs(newDist-currentDistance)<_toll) break;
	150	currentDistance = newDist;
	151	}
	152	if (score) *score = resL;
	153	if (alpha) *alpha = lastBestAlpha;
	154	assert (newDist >=0);
	155	return newDist;
	156	}
	157

+63

-0

libs/phylogeny/pairwiseGammaDistance.h less more

	0	// $Id: pairwiseGammaDistance.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef PAIRWISE_GAMMA_DISTANCE_H
	3	#define PAIRWISE_GAMMA_DISTANCE_H
	4
	5	#include "likeDist.h"
	6	#include "stochasticProcess.h"
	7	#include "definitions.h"
	8	#include "sequence.h"
	9	#include "gammaDistribution.h"
	10	#include "logFile.h"
	11
	12	#include <cmath>
	13	using namespace std;
	14
	15	// Finds ML distance with a gamma-ASRV stochasticProcess for a pair of
	16	// sequences while optimizing the alpha parameter for the given pair of
	17	// sequences.
	18	// Was called "njGamma::giveDistanceOptAlphaForPairOfSequences"
	19	class pairwiseGammaDistance : public likeDist {
	20	public:
	21	explicit pairwiseGammaDistance(const stochasticProcess & sp,
	22	const MDOUBLE toll =0.0001,
	23	const MDOUBLE maxPairwiseDistance = 5.0)
	24	: likeDist(sp,toll,maxPairwiseDistance) {}
	25
	26	explicit pairwiseGammaDistance(stochasticProcess & sp,
	27	const MDOUBLE toll =0.0001,
	28	const MDOUBLE maxPairwiseDistance = 5.0)
	29	: likeDist(sp,toll,maxPairwiseDistance) {}
	30
	31	const MDOUBLE giveDistance(const sequence& s1,
	32	const sequence& s2,
	33	const vector<MDOUBLE> * weights = NULL,
	34	MDOUBLE* score=NULL,
	35	MDOUBLE* alpha=NULL) const;
	36
	37	virtual pairwiseGammaDistance* clone() const {return new pairwiseGammaDistance(*this);}
	38
	39	void setAlpha(MDOUBLE alpha) {
	40	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	41	}
	42
	43
	44	protected:
	45	MDOUBLE giveInitialGuessOfDistance(const sequence& s1,
	46	const sequence& s2,
	47	const vector<MDOUBLE> * weights,
	48	MDOUBLE* score) const;
	49	MDOUBLE optimizeAlphaFixedDist(const sequence& s1,
	50	const sequence& s2,
	51	stochasticProcess & sp,
	52	const MDOUBLE branchL,
	53	const vector<MDOUBLE> * weights,
	54	MDOUBLE* score=NULL) const;
	55	MDOUBLE optimizeAlphaFixedDist(stochasticProcess & sp,
	56	const countTableComponentGam & ctc,
	57	const MDOUBLE branchL,
	58	const vector<MDOUBLE> * weights,
	59	MDOUBLE* score=NULL) const;
	60	};
	61
	62	#endif

+138

-0

libs/phylogeny/phylipFormat.cpp less more

	0	// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "phylipFormat.h"
	3	#include "someUtil.h"
	4	#include "errorMsg.h"
	5	#include "logFile.h"
	6
	7	sequenceContainer phylipFormat::read(istream &infile, const alphabet* alph){
	8	sequenceContainer mySeqData = readUnAligned(infile, alph);
	9	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	10	return mySeqData;
	11	}
	12	sequenceContainer phylipFormat::readUnAligned(istream &infile, const alphabet* alph){
	13	sequenceContainer mySeqData;
	14
	15	vector<string> seqFileData;
	16	putFileIntoVectorStringArray(infile,seqFileData);
	17
	18	vector<string>::const_iterator currentLinePosition = seqFileData.begin();
	19	string::const_iterator itStr = seqFileData.begin()->begin();
	20	string::const_iterator itStrEnd = seqFileData.begin()->end();
	21
	22	int f_numSeq;
	23	bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
	24	if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
	25	int f_seqLength;
	26	bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
	27	if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
	28	currentLinePosition++; // we read the first line.
	29
	30	int localid=0;
	31	for (; currentLinePosition != seqFileData.end() ; ) {
	32	if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line constinue
	33	string remark;
	34	string name;
	35	sequence seq(alph);
	36
	37
	38
	39	if (mySeqData.numberOfSeqs() < f_numSeq ) {//get from the line a name and a sequence;
	40
	41	string name1;
	42	string stringSeq1;
	43	string::const_iterator it2 = (currentLinePosition)->begin();
	44	for (; it2 != (currentLinePosition)->end();++it2) {
	45	if ((*it2)==' ') break;
	46	else name1+=(*it2);
	47	}
	48	for (; it2 != (currentLinePosition)->end();++it2) {
	49	if ((*it2)==' ') continue;
	50	else stringSeq1+=(*it2);
	51	}
	52	mySeqData.add(sequence(stringSeq1,name1,remark,localid,alph));
	53	currentLinePosition++;
	54	localid++;
	55	}
	56	else { // adding to the
	57	string stringSeq1;
	58	string::const_iterator it2 = (currentLinePosition)->begin();
	59	int sequenceId=localid%f_numSeq;
	60	for (; it2 != (currentLinePosition)->end() &&
	61	mySeqData[sequenceId].seqLen() <f_seqLength;++it2) {
	62	if ((*it2)==' ') continue;
	63	else stringSeq1+=(*it2);
	64
	65	}
	66	sequence tmp(stringSeq1,"","",sequenceId,alph);
	67	mySeqData[sequenceId].operator += (tmp);
	68	currentLinePosition++;
	69	localid++;
	70	}
	71	}
	72	return mySeqData;
	73	}
	74
	75	void phylipFormat::write(ostream &out, const sequenceContainer& sd,
	76	const int numOfPositionInLine,
	77	const int spaceEvery) {
	78	sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
	79	for (;it5!=sd.constTaxaEnd();++it5) {
	80	if (it5->name().size() > 10) break;
	81	}
	82	if (it5 != sd.constTaxaEnd()) {
	83	LOG(1,<<"you asked to print in phylip format\n");
	84	LOG(1,<<"however, the names in phylip format\n");
	85	LOG(1,<<"must be no more than 10 characters.\n");
	86	LOG(1,<<"Names are hence trancated to ten \n");
	87	LOG(1,<<"characters. Notice, that this might\n");
	88	LOG(1,<<"result in a two or more sequences \n");
	89	LOG(1,<<"having the same name \n");
	90	}
	91
	92	// vector<const sequenceContainer::sequenceDatum*> vec;
	93	// sd.getSequenceDatumPtrVector(vec);
	94	out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
	95	if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
	96
	97	int maxLengthOfSeqName =0;
	98	maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
	99
	100	int currentPosition = 0;
	101	while (currentPosition < sd.seqLen() ) {
	102	out<<endl;
	103	out.flush();
	104	// for (vector<const sequenceContainer::sequenceDatum*>::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) {
	105	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	106
	107	for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
	108	if (iName<it5->name().size()) {
	109	if (currentPosition<numOfPositionInLine) {
	110	out<<it5->name()[iName];
	111	}
	112	else out<<" ";
	113	out.flush();
	114	}
	115	else out<<" ";
	116	}
	117	out.flush();
	118	out<<" ";
	119
	120	if (it5->seqLen()<numOfPositionInLine)
	121	out<<it5->toString()<<endl;
	122	else {
	123	for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
	124	if (k>=it5->seqLen()) break;
	125	out<<it5->toString(k);
	126	if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
	127	}
	128	out<<endl;
	129	}
	130	}
	131	currentPosition +=numOfPositionInLine;
	132
	133	}
	134	return;
	135	}
	136
	137

+47

-0

libs/phylogeny/phylipFormat.h less more

	0	// $Id: phylipFormat.h 1812 2007-03-01 09:29:12Z adist $
	1
	2	#ifndef ___PHYLIP_FORMAT
	3	#define ___PHYLIP_FORMAT
	4
	5	#include "definitions.h"
	6	#include "sequenceContainer.h"
	7
	8	class phylipFormat {
	9	public:
	10	static sequenceContainer read(istream &infile, const alphabet* alph);
	11	static void write(ostream &out, const sequenceContainer& sd,
	12	const int numOfPositionInLine = 50,
	13	const int spaceEvery = 10);
	14	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	15	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	16	};
	17
	18	#endif
	19
	20	/* EXAMPLE OF PHYLIP FORMAT (interleaved):
	21
	22	6 128
	23	Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE
	24	Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ
	25	Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR
	26	Rat KTYERCEFAR TLKRNGMSGY YGVSLADWVC LAQHESNYNT QARNYDPGDQ
	27	Cow KVFERCELAR TLKKLGLDGY KGVSLANWLC LTKWESSYNT KATNYNPSSE
	28	Horse KVFSKCELAH KLKAQEMDGF GGYSLANWVC MAEYESNFNT RAFNGKNANG
	29
	30	STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS
	31	STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS
	32	STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR
	33	STDYGIFQIN SRYWCNDGKP RAKNACGIPC SALLQDDITQ AIQCAKRVVR
	34	STDYGIFQIN SKWWCNDGKP NAVDGCHVSC SELMENDIAK AVACAKKIVS
	35	SSDYGLFQLN NKWWCKDNKR SSSNACNIMC SKLLDENIDD DISCAKRVVR
	36
	37	DQGIRAWVAW RNHCQNKDVS QYVKGCGV
	38	DQGIRAWVAW RNHCQNRDVS QYVQGCGV
	39	DQGIRAWVAW RNRCQNRDVR QYVQGCGV
	40	DQGIRAWVAW QRHCKNRDLS GYIRNCGV
	41	EQGITAWVAW KSHCRDHDVS SYVEGCTL
	42	DKGMSAWKAW VKHCKDKDLS EYLASCNL
	43
	44
	45	*/
	46

+130

-0

libs/phylogeny/phylipSequentialFormat.cpp less more

	0	// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "phylipSequentialFormat.h"
	3	#include "someUtil.h"
	4	#include "errorMsg.h"
	5	#include "logFile.h"
	6
	7	sequenceContainer phylipSequentialFormat::read(istream &infile, const alphabet* alph){
	8	sequenceContainer mySeqData = readUnAligned(infile, alph);
	9	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	10	return mySeqData;
	11	}
	12	sequenceContainer phylipSequentialFormat::readUnAligned(istream &infile, const alphabet* alph){
	13	sequenceContainer mySeqData;
	14
	15	vector<string> seqFileData;
	16	putFileIntoVectorStringArray(infile,seqFileData);
	17
	18	vector<string>::const_iterator currentLinePosition = seqFileData.begin();
	19	string::const_iterator itStr = seqFileData.begin()->begin();
	20	string::const_iterator itStrEnd = seqFileData.begin()->end();
	21
	22	int f_numSeq;
	23	bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
	24	if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
	25	int f_seqLength;
	26	bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
	27	if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
	28	currentLinePosition++; // we read the first line.
	29
	30	int localid=0;
	31	for (; currentLinePosition != seqFileData.end() ; ) {
	32	if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line continue
	33	string stringSeq1;
	34	string name1;
	35	while (stringSeq1.length() < f_seqLength ) { // adding a new seq
	36	string::const_iterator it2 = (currentLinePosition)->begin();
	37	if ((*it2)==' ') { // line without seq. name, read seq. content only
	38	for (; it2 != (currentLinePosition)->end();++it2) {
	39	if ((*it2)==' ') continue;
	40	else stringSeq1+=(*it2);
	41	}
	42	}
	43	else { // first read sequence name, then read seq itself
	44	for (; it2 != (currentLinePosition)->end();++it2) {
	45	if ((*it2)==' ') break;
	46	else name1+=(*it2);
	47	}
	48	for (; it2 != (currentLinePosition)->end();++it2) {
	49	if ((*it2)==' ') continue;
	50	else stringSeq1+=(*it2);
	51	}
	52	}
	53
	54	currentLinePosition++;
	55	}
	56	mySeqData.add(sequence(stringSeq1,name1,"",localid,alph));
	57	localid++;
	58
	59	}
	60	return mySeqData;
	61	}
	62
	63	void phylipSequentialFormat::write(ostream &out, const sequenceContainer& sd,
	64	const int numOfPositionInLine,
	65	const int spaceEvery) {
	66	sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
	67	for (;it5!=sd.constTaxaEnd();++it5) {
	68	if (it5->name().size() > 10) break;
	69	}
	70	if (it5 != sd.constTaxaEnd()) {
	71	LOG(1,<<"you asked to print in phylip format\n");
	72	LOG(1,<<"however, the names in phylip format\n");
	73	LOG(1,<<"must be no more than 10 characters.\n");
	74	LOG(1,<<"Names are hence trancated to ten \n");
	75	LOG(1,<<"characters. Notice, that this might\n");
	76	LOG(1,<<"result in a two or more sequences \n");
	77	LOG(1,<<"having the same name \n");
	78	}
	79
	80	// vector<const sequenceContainer::sequenceDatum*> vec;
	81	// sd.getSequenceDatumPtrVector(vec);
	82	out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
	83	if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
	84
	85	int maxLengthOfSeqName =0;
	86	maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
	87
	88
	89	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
	90	int currentPosition = 0;
	91	out<<endl;
	92	out.flush();
	93	// first - print name of sequence
	94	for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
	95	if (iName<it5->name().size()) {
	96	if (currentPosition<numOfPositionInLine) {
	97	out<<it5->name()[iName];
	98	}
	99	else out<<" ";
	100	out.flush();
	101	}
	102	else out<<" ";
	103	}
	104	out.flush();
	105	out<<" ";
	106	// next - print sequence itself
	107	while (currentPosition < sd.seqLen() ) {
	108	if (it5->seqLen()<numOfPositionInLine)
	109	out<<it5->toString()<<endl;
	110	else {
	111	for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
	112	if (k>=it5->seqLen()) break;
	113	out<<it5->toString(k);
	114	if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
	115	}
	116	out<<endl;
	117	if (currentPosition+numOfPositionInLine < sd.seqLen()) {
	118	for (int i = 0; i < spaceEvery +1; i++) // creates spaces to align properly
	119	out << " ";
	120	}
	121	}
	122	currentPosition +=numOfPositionInLine;
	123	}
	124
	125	}
	126
	127	}
	128
	129

+35

-0

libs/phylogeny/phylipSequentialFormat.h less more

	0	// $Id: phylipFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___PHYLIP_INTERLEAVED_FORMAT
	3	#define ___PHYLIP_INTERLEAVED_FORMAT
	4
	5	#include "definitions.h"
	6	#include "sequenceContainer.h"
	7
	8	class phylipSequentialFormat {
	9	public:
	10	static sequenceContainer read(istream &infile, const alphabet* alph);
	11	static void write(ostream &out, const sequenceContainer& sd,
	12	const int numOfPositionInLine = 50,
	13	const int spaceEvery = 10);
	14	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	15	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	16	};
	17
	18	#endif
	19
	20	/* EXAMPLE OF PHYLIP FORMAT (sequential):
	21
	22	6 128
	23	Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE
	24	STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS
	25	DQGIRAWVAW RNHCQNKDVS QYVKGCGV
	26	Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ
	27	STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS
	28	DQGIRAWVAW RNHCQNRDVS QYVQGCGV
	29	Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR
	30	STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR
	31	DQGIRAWVAW RNRCQNRDVR QYVQGCGV
	32
	33	*/
	34

libs/phylogeny/phylogeny.ncb less more

Binary diff not shown

+21

-0

libs/phylogeny/phylogeny.sln less more

	0	Microsoft Visual Studio Solution File, Format Version 8.00
	1	Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "phylogenyLib", "phylogeny.vcproj", "{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}"
	2	ProjectSection(ProjectDependencies) = postProject
	3	EndProjectSection
	4	EndProject
	5	Global
	6	GlobalSection(SolutionConfiguration) = preSolution
	7	Debug = Debug
	8	Release = Release
	9	EndGlobalSection
	10	GlobalSection(ProjectConfiguration) = postSolution
	11	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Debug.ActiveCfg = Debug\|Win32
	12	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Debug.Build.0 = Debug\|Win32
	13	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Release.ActiveCfg = Release\|Win32
	14	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Release.Build.0 = Release\|Win32
	15	EndGlobalSection
	16	GlobalSection(ExtensibilityGlobals) = postSolution
	17	EndGlobalSection
	18	GlobalSection(ExtensibilityAddIns) = postSolution
	19	EndGlobalSection
	20	EndGlobal

libs/phylogeny/phylogeny.suo less more

Binary diff not shown

+1238

-0

libs/phylogeny/phylogeny.vcproj less more

	0	<?xml version="1.0" encoding="windows-1255"?>
	1	<VisualStudioProject
	2	ProjectType="Visual C++"
	3	Version="7.10"
	4	Name="phylogenyLib"
	5	ProjectGUID="{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}"
	6	Keyword="Win32Proj">
	7	<Platforms>
	8	<Platform
	9	Name="Win32"/>
	10	</Platforms>
	11	<Configurations>
	12	<Configuration
	13	Name="Debug\|Win32"
	14	OutputDirectory="Debug"
	15	IntermediateDirectory="Debug"
	16	ConfigurationType="4"
	17	CharacterSet="2">
	18	<Tool
	19	Name="VCCLCompilerTool"
	20	Optimization="0"
	21	PreprocessorDefinitions="WIN32;_DEBUG;_LIB"
	22	MinimalRebuild="TRUE"
	23	BasicRuntimeChecks="3"
	24	RuntimeLibrary="5"
	25	RuntimeTypeInfo="TRUE"
	26	UsePrecompiledHeader="0"
	27	WarningLevel="3"
	28	Detect64BitPortabilityProblems="TRUE"
	29	DebugInformationFormat="4"/>
	30	<Tool
	31	Name="VCCustomBuildTool"/>
	32	<Tool
	33	Name="VCLibrarianTool"
	34	OutputFile="$(OutDir)/phylogeny.lib"/>
	35	<Tool
	36	Name="VCMIDLTool"/>
	37	<Tool
	38	Name="VCPostBuildEventTool"/>
	39	<Tool
	40	Name="VCPreBuildEventTool"/>
	41	<Tool
	42	Name="VCPreLinkEventTool"/>
	43	<Tool
	44	Name="VCResourceCompilerTool"/>
	45	<Tool
	46	Name="VCWebServiceProxyGeneratorTool"/>
	47	<Tool
	48	Name="VCXMLDataGeneratorTool"/>
	49	<Tool
	50	Name="VCManagedWrapperGeneratorTool"/>
	51	<Tool
	52	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	53	</Configuration>
	54	<Configuration
	55	Name="Release\|Win32"
	56	OutputDirectory="Release"
	57	IntermediateDirectory="Release"
	58	ConfigurationType="4"
	59	CharacterSet="2">
	60	<Tool
	61	Name="VCCLCompilerTool"
	62	PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
	63	RuntimeLibrary="4"
	64	RuntimeTypeInfo="TRUE"
	65	UsePrecompiledHeader="0"
	66	WarningLevel="3"
	67	Detect64BitPortabilityProblems="TRUE"
	68	DebugInformationFormat="3"/>
	69	<Tool
	70	Name="VCCustomBuildTool"/>
	71	<Tool
	72	Name="VCLibrarianTool"
	73	OutputFile="$(OutDir)/phylogeny.lib"/>
	74	<Tool
	75	Name="VCMIDLTool"/>
	76	<Tool
	77	Name="VCPostBuildEventTool"/>
	78	<Tool
	79	Name="VCPreBuildEventTool"/>
	80	<Tool
	81	Name="VCPreLinkEventTool"/>
	82	<Tool
	83	Name="VCResourceCompilerTool"/>
	84	<Tool
	85	Name="VCWebServiceProxyGeneratorTool"/>
	86	<Tool
	87	Name="VCXMLDataGeneratorTool"/>
	88	<Tool
	89	Name="VCManagedWrapperGeneratorTool"/>
	90	<Tool
	91	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	92	</Configuration>
	93	</Configurations>
	94	<References>
	95	</References>
	96	<Files>
	97	<Filter
	98	Name="matrices"
	99	Filter="">
	100	<File
	101	RelativePath=".\adrianCodon.dat.q">
	102	</File>
	103	<File
	104	RelativePath=".\cpREV45.dat.q">
	105	</File>
	106	<File
	107	RelativePath=".\dayhoff.dat.q">
	108	</File>
	109	<File
	110	RelativePath=".\HIVb.dat.q">
	111	</File>
	112	<File
	113	RelativePath=".\HIVw.dat.q">
	114	</File>
	115	<File
	116	RelativePath=".\jones.dat.q">
	117	</File>
	118	<File
	119	RelativePath=".\LG.dat.q">
	120	</File>
	121	<File
	122	RelativePath=".\mtREV24.dat.q">
	123	</File>
	124	<File
	125	RelativePath=".\wag.dat.q">
	126	</File>
	127	</Filter>
	128	<Filter
	129	Name="alphabet"
	130	Filter="">
	131	<File
	132	RelativePath=".\alphabet.cpp">
	133	</File>
	134	<File
	135	RelativePath=".\alphabet.h">
	136	</File>
	137	<File
	138	RelativePath=".\amino.cpp">
	139	</File>
	140	<File
	141	RelativePath=".\amino.h">
	142	</File>
	143	<File
	144	RelativePath=".\codon.cpp">
	145	</File>
	146	<File
	147	RelativePath=".\codon.h">
	148	</File>
	149	<File
	150	RelativePath=".\gainLossAlphabet.cpp">
	151	</File>
	152	<File
	153	RelativePath=".\gainLossAlphabet.h">
	154	</File>
	155	<File
	156	RelativePath=".\geneticCodeHolder.cpp">
	157	</File>
	158	<File
	159	RelativePath=".\geneticCodeHolder.h">
	160	</File>
	161	<File
	162	RelativePath=".\indel.cpp">
	163	</File>
	164	<File
	165	RelativePath=".\indel.h">
	166	</File>
	167	<File
	168	RelativePath=".\integerAlphabet.cpp">
	169	</File>
	170	<File
	171	RelativePath=".\integerAlphabet.h">
	172	</File>
	173	<File
	174	RelativePath=".\mulAlphabet.cpp">
	175	</File>
	176	<File
	177	RelativePath=".\mulAlphabet.h">
	178	</File>
	179	<File
	180	RelativePath=".\nucleotide.cpp">
	181	</File>
	182	<File
	183	RelativePath=".\nucleotide.h">
	184	</File>
	185	<File
	186	RelativePath=".\oneTwoMoreModel.cpp">
	187	</File>
	188	<File
	189	RelativePath=".\oneTwoMoreModel.h">
	190	</File>
	191	<File
	192	RelativePath=".\threeStateAlphabet.cpp">
	193	</File>
	194	<File
	195	RelativePath=".\threeStateAlphabet.h">
	196	</File>
	197	</Filter>
	198	<Filter
	199	Name="sequence-related"
	200	Filter="">
	201	<File
	202	RelativePath=".\evaluateCharacterFreq.cpp">
	203	</File>
	204	<File
	205	RelativePath=".\evaluateCharacterFreq.h">
	206	</File>
	207	<File
	208	RelativePath=".\samplingSequences.cpp">
	209	</File>
	210	<File
	211	RelativePath=".\samplingSequences.h">
	212	</File>
	213	<File
	214	RelativePath=".\seqContainerTreeMap.cpp">
	215	</File>
	216	<File
	217	RelativePath=".\seqContainerTreeMap.h">
	218	</File>
	219	<File
	220	RelativePath=".\seqeuncesFilter.cpp">
	221	</File>
	222	<File
	223	RelativePath=".\seqeuncesFilter.h">
	224	</File>
	225	<File
	226	RelativePath=".\sequence.cpp">
	227	</File>
	228	<File
	229	RelativePath=".\sequence.h">
	230	</File>
	231	<File
	232	RelativePath=".\sequenceContainer.cpp">
	233	</File>
	234	<File
	235	RelativePath=".\sequenceContainer.h">
	236	</File>
	237	</Filter>
	238	<Filter
	239	Name="distribution"
	240	Filter="">
	241	<File
	242	RelativePath=".\betaDistribution.cpp">
	243	</File>
	244	<File
	245	RelativePath=".\betaDistribution.h">
	246	</File>
	247	<File
	248	RelativePath=".\betaDistributionFixedCategories.cpp">
	249	</File>
	250	<File
	251	RelativePath=".\betaDistributionFixedCategories.h">
	252	</File>
	253	<File
	254	RelativePath=".\betaDistributionFixedCategoriesWithOmegaUniform.cpp">
	255	</File>
	256	<File
	257	RelativePath=".\betaDistributionFixedCategoriesWithOmegaUniform.h">
	258	</File>
	259	<File
	260	RelativePath=".\betaOmegaDistribution.cpp">
	261	</File>
	262	<File
	263	RelativePath=".\betaOmegaDistribution.h">
	264	</File>
	265	<File
	266	RelativePath=".\betaUtilities.cpp">
	267	</File>
	268	<File
	269	RelativePath=".\betaUtilities.h">
	270	</File>
	271	<File
	272	RelativePath=".\distribution.cpp">
	273	</File>
	274	<File
	275	RelativePath=".\distribution.h">
	276	</File>
	277	<File
	278	RelativePath=".\distributionPlusCategory.cpp">
	279	</File>
	280	<File
	281	RelativePath=".\distributionPlusCategory.h">
	282	</File>
	283	<File
	284	RelativePath=".\distributionPlusInvariant.cpp">
	285	</File>
	286	<File
	287	RelativePath=".\distributionPlusInvariant.h">
	288	</File>
	289	<File
	290	RelativePath=".\gammaDistribution.cpp">
	291	</File>
	292	<File
	293	RelativePath=".\gammaDistribution.h">
	294	</File>
	295	<File
	296	RelativePath=".\gammaDistributionFixedCategories.cpp">
	297	</File>
	298	<File
	299	RelativePath=".\gammaDistributionFixedCategories.h">
	300	</File>
	301	<File
	302	RelativePath=".\gammaDistributionLaguerre.cpp">
	303	</File>
	304	<File
	305	RelativePath=".\gammaDistributionLaguerre.h">
	306	</File>
	307	<File
	308	RelativePath=".\gammaDistributionPlusInvariant.cpp">
	309	</File>
	310	<File
	311	RelativePath=".\gammaDistributionPlusInvariant.h">
	312	</File>
	313	<File
	314	RelativePath=".\gammaUtilities.cpp">
	315	</File>
	316	<File
	317	RelativePath=".\gammaUtilities.h">
	318	</File>
	319	<File
	320	RelativePath=".\GamMixtureOptimizer.cpp">
	321	</File>
	322	<File
	323	RelativePath=".\GamMixtureOptimizer.h">
	324	</File>
	325	<File
	326	RelativePath=".\generalGammaDistribution.cpp">
	327	</File>
	328	<File
	329	RelativePath=".\generalGammaDistribution.h">
	330	</File>
	331	<File
	332	RelativePath=".\generalGammaDistributionFixedCategories.cpp">
	333	</File>
	334	<File
	335	RelativePath=".\generalGammaDistributionFixedCategories.h">
	336	</File>
	337	<File
	338	RelativePath=".\generalGammaDistributionLaguerre.cpp">
	339	</File>
	340	<File
	341	RelativePath=".\generalGammaDistributionLaguerre.h">
	342	</File>
	343	<File
	344	RelativePath=".\generalGammaDistributionPlusInvariant.cpp">
	345	</File>
	346	<File
	347	RelativePath=".\generalGammaDistributionPlusInvariant.h">
	348	</File>
	349	<File
	350	RelativePath=".\mixtureDistribution.cpp">
	351	</File>
	352	<File
	353	RelativePath=".\mixtureDistribution.h">
	354	</File>
	355	<File
	356	RelativePath=".\optGammaMixtureEM.cpp">
	357	</File>
	358	<File
	359	RelativePath=".\optGammaMixtureEM.h">
	360	</File>
	361	<File
	362	RelativePath=".\optGammaMixtureLS.cpp">
	363	</File>
	364	<File
	365	RelativePath=".\optGammaMixtureLS.h">
	366	</File>
	367	<File
	368	RelativePath=".\suffStatGammaMixture.cpp">
	369	</File>
	370	<File
	371	RelativePath=".\suffStatGammaMixture.h">
	372	</File>
	373	<File
	374	RelativePath=".\uniDistribution.cpp">
	375	</File>
	376	<File
	377	RelativePath=".\uniDistribution.h">
	378	</File>
	379	<File
	380	RelativePath=".\uniformDistribution.cpp">
	381	</File>
	382	<File
	383	RelativePath=".\uniformDistribution.h">
	384	</File>
	385	</Filter>
	386	<Filter
	387	Name="stochastic&model-stuff"
	388	Filter="">
	389	<File
	390	RelativePath=".\datMatrixHolder.cpp">
	391	</File>
	392	<File
	393	RelativePath=".\datMatrixHolder.h">
	394	</File>
	395	<File
	396	RelativePath=".\fromQtoPt.cpp">
	397	</File>
	398	<File
	399	RelativePath=".\fromQtoPt.h">
	400	</File>
	401	<File
	402	RelativePath=".\granthamChemicalDistances.cpp">
	403	</File>
	404	<File
	405	RelativePath=".\granthamChemicalDistances.h">
	406	</File>
	407	<File
	408	RelativePath=".\readDatMatrix.cpp">
	409	</File>
	410	<File
	411	RelativePath=".\readDatMatrix.h">
	412	</File>
	413	<File
	414	RelativePath=".\ussrvModel.cpp">
	415	</File>
	416	<File
	417	RelativePath=".\ussrvModel.h">
	418	</File>
	419	<Filter
	420	Name="Accelerator"
	421	Filter="">
	422	<File
	423	RelativePath=".\alphaTrivialAccelerator.h">
	424	</File>
	425	<File
	426	RelativePath=".\chebyshevAccelerator.cpp">
	427	</File>
	428	<File
	429	RelativePath=".\chebyshevAccelerator.h">
	430	</File>
	431	<File
	432	RelativePath=".\pijAccelerator.cpp">
	433	</File>
	434	<File
	435	RelativePath=".\pijAccelerator.h">
	436	</File>
	437	<File
	438	RelativePath=".\trivialAccelerator.h">
	439	</File>
	440	</Filter>
	441	<Filter
	442	Name="ReplacementModel"
	443	Filter="">
	444	<File
	445	RelativePath=".\aaJC.cpp">
	446	</File>
	447	<File
	448	RelativePath=".\aaJC.h">
	449	</File>
	450	<File
	451	RelativePath=".\codonJC.cpp">
	452	</File>
	453	<File
	454	RelativePath=".\codonJC.h">
	455	</File>
	456	<File
	457	RelativePath=".\goldmanYangModel.cpp">
	458	</File>
	459	<File
	460	RelativePath=".\goldmanYangModel.h">
	461	</File>
	462	<File
	463	RelativePath=".\gtrModel.cpp">
	464	</File>
	465	<File
	466	RelativePath=".\gtrModel.h">
	467	</File>
	468	<File
	469	RelativePath=".\hky.cpp">
	470	</File>
	471	<File
	472	RelativePath=".\hky.h">
	473	</File>
	474	<File
	475	RelativePath=".\indelModel.cpp">
	476	</File>
	477	<File
	478	RelativePath=".\indelModel.h">
	479	</File>
	480	<File
	481	RelativePath=".\nucJC.cpp">
	482	</File>
	483	<File
	484	RelativePath=".\nucJC.h">
	485	</File>
	486	<File
	487	RelativePath=".\replacementModel.cpp">
	488	</File>
	489	<File
	490	RelativePath=".\replacementModel.h">
	491	</File>
	492	<File
	493	RelativePath=".\replacementModelSSRV.cpp">
	494	</File>
	495	<File
	496	RelativePath=".\replacementModelSSRV.h">
	497	</File>
	498	<File
	499	RelativePath=".\tamura92.cpp">
	500	</File>
	501	<File
	502	RelativePath=".\tamura92.h">
	503	</File>
	504	<File
	505	RelativePath=".\threeStateModel.cpp">
	506	</File>
	507	<File
	508	RelativePath=".\threeStateModel.h">
	509	</File>
	510	<File
	511	RelativePath=".\wYangModel.cpp">
	512	</File>
	513	<File
	514	RelativePath=".\wYangModel.h">
	515	</File>
	516	</Filter>
	517	<Filter
	518	Name="StochasticProcess"
	519	Filter="">
	520	<File
	521	RelativePath=".\multipleStochasticProcess.cpp">
	522	</File>
	523	<File
	524	RelativePath=".\multipleStochasticProcess.h">
	525	</File>
	526	<File
	527	RelativePath=".\stochasticProcess.cpp">
	528	</File>
	529	<File
	530	RelativePath=".\stochasticProcess.h">
	531	</File>
	532	<File
	533	RelativePath=".\stochasticProcessSSRV.cpp">
	534	</File>
	535	<File
	536	RelativePath=".\stochasticProcessSSRV.h">
	537	</File>
	538	</Filter>
	539	</Filter>
	540	<Filter
	541	Name="Tree-related"
	542	Filter="">
	543	<File
	544	RelativePath=".\allTrees.cpp">
	545	</File>
	546	<File
	547	RelativePath=".\allTrees.h">
	548	</File>
	549	<File
	550	RelativePath=".\allTreesSeparateModel.cpp">
	551	</File>
	552	<File
	553	RelativePath=".\allTreesSeparateModel.h">
	554	</File>
	555	<File
	556	RelativePath=".\bootstrap.cpp">
	557	</File>
	558	<File
	559	RelativePath=".\bootstrap.h">
	560	</File>
	561	<File
	562	RelativePath=".\fastStartTree.cpp">
	563	</File>
	564	<File
	565	RelativePath=".\fastStartTree.h">
	566	</File>
	567	<File
	568	RelativePath=".\readTree.cpp">
	569	</File>
	570	<File
	571	RelativePath=".\readTree.h">
	572	</File>
	573	<File
	574	RelativePath=".\simulateTree.cpp">
	575	</File>
	576	<File
	577	RelativePath=".\simulateTree.h">
	578	</File>
	579	<File
	580	RelativePath=".\tree.cpp">
	581	</File>
	582	<File
	583	RelativePath=".\tree.h">
	584	</File>
	585	<File
	586	RelativePath=".\treeInference.cpp">
	587	</File>
	588	<File
	589	RelativePath=".\treeInference.h">
	590	</File>
	591	<File
	592	RelativePath=".\treeIt.cpp">
	593	</File>
	594	<File
	595	RelativePath=".\treeIt.h">
	596	</File>
	597	<File
	598	RelativePath=".\treeUtil.cpp">
	599	</File>
	600	<File
	601	RelativePath=".\treeUtil.h">
	602	</File>
	603	<Filter
	604	Name="NNI"
	605	Filter="">
	606	<File
	607	RelativePath=".\Nni.cpp">
	608	</File>
	609	<File
	610	RelativePath=".\Nni.h">
	611	</File>
	612	<File
	613	RelativePath=".\NNiProp.cpp">
	614	</File>
	615	<File
	616	RelativePath=".\NNiProp.h">
	617	</File>
	618	<File
	619	RelativePath=".\NNiSep.cpp">
	620	</File>
	621	<File
	622	RelativePath=".\NNiSep.h">
	623	</File>
	624	</Filter>
	625	</Filter>
	626	<Filter
	627	Name="Formats"
	628	Filter="">
	629	<File
	630	RelativePath=".\clustalFormat.cpp">
	631	</File>
	632	<File
	633	RelativePath=".\clustalFormat.h">
	634	</File>
	635	<File
	636	RelativePath=".\fastaFormat.cpp">
	637	</File>
	638	<File
	639	RelativePath=".\fastaFormat.h">
	640	</File>
	641	<File
	642	RelativePath=".\maseFormat.cpp">
	643	</File>
	644	<File
	645	RelativePath=".\maseFormat.h">
	646	</File>
	647	<File
	648	RelativePath=".\molphyFormat.cpp">
	649	</File>
	650	<File
	651	RelativePath=".\molphyFormat.h">
	652	</File>
	653	<File
	654	RelativePath=".\nexusFormat.cpp">
	655	</File>
	656	<File
	657	RelativePath=".\nexusFormat.h">
	658	</File>
	659	<File
	660	RelativePath=".\phylipFormat.cpp">
	661	</File>
	662	<File
	663	RelativePath=".\phylipFormat.h">
	664	</File>
	665	<File
	666	RelativePath=".\phylipSequentialFormat.cpp">
	667	</File>
	668	<File
	669	RelativePath=".\phylipSequentialFormat.h">
	670	</File>
	671	<File
	672	RelativePath=".\recognizeFormat.cpp">
	673	</File>
	674	<File
	675	RelativePath=".\recognizeFormat.h">
	676	</File>
	677	</Filter>
	678	<Filter
	679	Name="Likelihood"
	680	Filter="">
	681	<File
	682	RelativePath=".\computeDownAlg.cpp">
	683	</File>
	684	<File
	685	RelativePath=".\computeDownAlg.h">
	686	</File>
	687	<File
	688	RelativePath=".\computeMarginalAlg.cpp">
	689	</File>
	690	<File
	691	RelativePath=".\computeMarginalAlg.h">
	692	</File>
	693	<File
	694	RelativePath=".\computePijComponent.cpp">
	695	</File>
	696	<File
	697	RelativePath=".\computePijComponent.h">
	698	</File>
	699	<File
	700	RelativePath=".\computeUpAlg.cpp">
	701	</File>
	702	<File
	703	RelativePath=".\computeUpAlg.h">
	704	</File>
	705	<File
	706	RelativePath=".\computeUpAlgFactors.cpp">
	707	</File>
	708	<File
	709	RelativePath=".\likeDist2Codon.cpp">
	710	</File>
	711	<File
	712	RelativePath=".\likeDist2Codon.h">
	713	</File>
	714	<File
	715	RelativePath=".\likelihoodComputation.cpp">
	716	</File>
	717	<File
	718	RelativePath=".\likelihoodComputation.h">
	719	</File>
	720	<File
	721	RelativePath=".\likelihoodComputation2Codon.cpp">
	722	</File>
	723	<File
	724	RelativePath=".\likelihoodComputation2Codon.h">
	725	</File>
	726	<File
	727	RelativePath=".\likelihoodComputation2USSRV.cpp">
	728	</File>
	729	<File
	730	RelativePath=".\likelihoodComputation2USSRV.h">
	731	</File>
	732	<File
	733	RelativePath=".\likelihoodComputationFactors.cpp">
	734	</File>
	735	<File
	736	RelativePath=".\likelihoodComputationFactors.h">
	737	</File>
	738	<File
	739	RelativePath=".\likelihoodComputationGL.cpp">
	740	</File>
	741	<File
	742	RelativePath=".\likelihoodComputationGL.h">
	743	</File>
	744	<File
	745	RelativePath=".\suffStatComponent.cpp">
	746	</File>
	747	<File
	748	RelativePath=".\suffStatComponent.h">
	749	</File>
	750	</Filter>
	751	<Filter
	752	Name="Optimization"
	753	Filter="">
	754	<File
	755	RelativePath=".\bblEM.cpp">
	756	</File>
	757	<File
	758	RelativePath=".\bblEM.h">
	759	</File>
	760	<File
	761	RelativePath=".\bblEM2codon.cpp">
	762	</File>
	763	<File
	764	RelativePath=".\bblEM2codon.h">
	765	</File>
	766	<File
	767	RelativePath=".\bblEM2USSRV.cpp">
	768	</File>
	769	<File
	770	RelativePath=".\bblEM2USSRV.h">
	771	</File>
	772	<File
	773	RelativePath=".\bblEMfixRoot.cpp">
	774	</File>
	775	<File
	776	RelativePath=".\bblEMfixRoot.h">
	777	</File>
	778	<File
	779	RelativePath=".\bblEMProportional.h">
	780	</File>
	781	<File
	782	RelativePath=".\bblEMProportionalEB.cpp">
	783	</File>
	784	<File
	785	RelativePath=".\bblEMProportionalEB.h">
	786	</File>
	787	<File
	788	RelativePath=".\bblEMProprtional.cpp">
	789	</File>
	790	<File
	791	RelativePath=".\bblEMSeperate.cpp">
	792	</File>
	793	<File
	794	RelativePath=".\bblEMSeperate.h">
	795	</File>
	796	<File
	797	RelativePath=".\bestAlpha.cpp">
	798	</File>
	799	<File
	800	RelativePath=".\bestAlpha.h">
	801	</File>
	802	<File
	803	RelativePath=".\bestAlphaAndK.cpp">
	804	</File>
	805	<File
	806	RelativePath=".\bestAlphaAndK.h">
	807	</File>
	808	<File
	809	RelativePath=".\bestAlphaAndNu.cpp">
	810	</File>
	811	<File
	812	RelativePath=".\bestAlphaAndNu.h">
	813	</File>
	814	<File
	815	RelativePath=".\bestAlphaManyTrees.cpp">
	816	</File>
	817	<File
	818	RelativePath=".\bestAlphaManyTrees.h">
	819	</File>
	820	<File
	821	RelativePath=".\bestGtrModelParams.cpp">
	822	</File>
	823	<File
	824	RelativePath=".\bestGtrModelParams.h">
	825	</File>
	826	<File
	827	RelativePath=".\bestHKYparam.cpp">
	828	</File>
	829	<File
	830	RelativePath=".\bestHKYparam.h">
	831	</File>
	832	<File
	833	RelativePath=".\bestParamUSSRV.cpp">
	834	</File>
	835	<File
	836	RelativePath=".\bestParamUSSRV.h">
	837	</File>
	838	<File
	839	RelativePath=".\bestTamura92param.cpp">
	840	</File>
	841	<File
	842	RelativePath=".\bestTamura92param.h">
	843	</File>
	844	<File
	845	RelativePath=".\C_evalParamUSSRV.cpp">
	846	</File>
	847	<File
	848	RelativePath=".\C_evalParamUSSRV.h">
	849	</File>
	850	<File
	851	RelativePath=".\computeCounts.cpp">
	852	</File>
	853	<File
	854	RelativePath=".\computeCounts.h">
	855	</File>
	856	<File
	857	RelativePath=".\countTableComponent.cpp">
	858	</File>
	859	<File
	860	RelativePath=".\countTableComponent.h">
	861	</File>
	862	<File
	863	RelativePath=".\fromCountTableComponentToDistance2Codon.cpp">
	864	</File>
	865	<File
	866	RelativePath=".\fromCountTableComponentToDistance2Codon.h">
	867	</File>
	868	<File
	869	RelativePath=".\fromCountTableComponentToDistancefixRoot.cpp">
	870	</File>
	871	<File
	872	RelativePath=".\fromCountTableComponentToDistancefixRoot.h">
	873	</File>
	874	</Filter>
	875	<Filter
	876	Name="Splits"
	877	Filter="">
	878	<File
	879	RelativePath=".\getRandomWeights.cpp">
	880	</File>
	881	<File
	882	RelativePath=".\getRandomWeights.h">
	883	</File>
	884	<File
	885	RelativePath=".\split.cpp">
	886	</File>
	887	<File
	888	RelativePath=".\split.h">
	889	</File>
	890	<File
	891	RelativePath=".\splitMap.cpp">
	892	</File>
	893	<File
	894	RelativePath=".\splitMap.h">
	895	</File>
	896	<File
	897	RelativePath=".\splitTreeUtil.cpp">
	898	</File>
	899	<File
	900	RelativePath=".\splitTreeUtil.h">
	901	</File>
	902	</Filter>
	903	<Filter
	904	Name="distance-methods"
	905	Filter="">
	906	<File
	907	RelativePath=".\distanceBasedSeqs2Tree.cpp">
	908	</File>
	909	<File
	910	RelativePath=".\distanceBasedSeqs2Tree.h">
	911	</File>
	912	<File
	913	RelativePath=".\distanceMethod.h">
	914	</File>
	915	<File
	916	RelativePath=".\distances2Tree.h">
	917	</File>
	918	<File
	919	RelativePath=".\distanceTable.cpp">
	920	</File>
	921	<File
	922	RelativePath=".\distanceTable.h">
	923	</File>
	924	<File
	925	RelativePath=".\fromCountTableComponentToDistance.cpp">
	926	</File>
	927	<File
	928	RelativePath=".\fromCountTableComponentToDistance.h">
	929	</File>
	930	<File
	931	RelativePath=".\fromCountTableComponentToDistance2USSRV.cpp">
	932	</File>
	933	<File
	934	RelativePath=".\fromCountTableComponentToDistance2USSRV.h">
	935	</File>
	936	<File
	937	RelativePath=".\fromCountTableComponentToDistanceProp.cpp">
	938	</File>
	939	<File
	940	RelativePath=".\fromCountTableComponentToDistanceProp.h">
	941	</File>
	942	<File
	943	RelativePath=".\fromCountTableComponentToDistancePropEB.cpp">
	944	</File>
	945	<File
	946	RelativePath=".\fromCountTableComponentToDistancePropEB.h">
	947	</File>
	948	<File
	949	RelativePath=".\givenRatesMLDistance.cpp">
	950	</File>
	951	<File
	952	RelativePath=".\givenRatesMLDistance.h">
	953	</File>
	954	<File
	955	RelativePath=".\jcDistance.h">
	956	</File>
	957	<File
	958	RelativePath=".\likeDist.cpp">
	959	</File>
	960	<File
	961	RelativePath=".\likeDist.h">
	962	</File>
	963	<File
	964	RelativePath=".\likeDist2USSRV.cpp">
	965	</File>
	966	<File
	967	RelativePath=".\likeDist2USSRV.h">
	968	</File>
	969	<File
	970	RelativePath=".\likeDistfixRoot.cpp">
	971	</File>
	972	<File
	973	RelativePath=".\likeDistfixRoot.h">
	974	</File>
	975	<File
	976	RelativePath=".\likeDistProp.cpp">
	977	</File>
	978	<File
	979	RelativePath=".\likeDistProp.h">
	980	</File>
	981	<File
	982	RelativePath=".\likeDistPropEB.cpp">
	983	</File>
	984	<File
	985	RelativePath=".\likeDistPropEB.h">
	986	</File>
	987	<File
	988	RelativePath=".\nj.cpp">
	989	</File>
	990	<File
	991	RelativePath=".\nj.h">
	992	</File>
	993	<File
	994	RelativePath=".\njConstrain.cpp">
	995	</File>
	996	<File
	997	RelativePath=".\njConstrain.h">
	998	</File>
	999	<File
	1000	RelativePath=".\pairwiseGammaDistance.cpp">
	1001	</File>
	1002	<File
	1003	RelativePath=".\pairwiseGammaDistance.h">
	1004	</File>
	1005	<File
	1006	RelativePath=".\pDistance.h">
	1007	</File>
	1008	<File
	1009	RelativePath=".\posteriorDistance.cpp">
	1010	</File>
	1011	<File
	1012	RelativePath=".\posteriorDistance.h">
	1013	</File>
	1014	<File
	1015	RelativePath=".\ssrvDistanceSeqs2Tree.cpp">
	1016	</File>
	1017	<File
	1018	RelativePath=".\ssrvDistanceSeqs2Tree.h">
	1019	</File>
	1020	</Filter>
	1021	<Filter
	1022	Name="Utils-stuff"
	1023	Filter="">
	1024	<File
	1025	RelativePath=".\AddLog.cpp">
	1026	</File>
	1027	<File
	1028	RelativePath=".\AddLog.h">
	1029	</File>
	1030	<File
	1031	RelativePath=".\codonUtils.cpp">
	1032	</File>
	1033	<File
	1034	RelativePath=".\codonUtils.h">
	1035	</File>
	1036	<File
	1037	RelativePath=".\computePosteriorExpectationOfSubstitutions.cpp">
	1038	</File>
	1039	<File
	1040	RelativePath=".\computePosteriorExpectationOfSubstitutions.h">
	1041	</File>
	1042	<File
	1043	RelativePath=".\computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp">
	1044	</File>
	1045	<File
	1046	RelativePath=".\computePosteriorExpectationOfSubstitutions_nonReversibleSp.h">
	1047	</File>
	1048	<File
	1049	RelativePath=".\computeSubstitutionCounts.cpp">
	1050	</File>
	1051	<File
	1052	RelativePath=".\computeSubstitutionCounts.h">
	1053	</File>
	1054	<File
	1055	RelativePath=".\ConversionUtils.cpp">
	1056	</File>
	1057	<File
	1058	RelativePath=".\ConversionUtils.h">
	1059	</File>
	1060	<File
	1061	RelativePath=".\definitions.h">
	1062	</File>
	1063	<File
	1064	RelativePath=".\errorMsg.cpp">
	1065	</File>
	1066	<File
	1067	RelativePath=".\errorMsg.h">
	1068	</File>
	1069	<File
	1070	RelativePath=".\fromInstructionFile.cpp">
	1071	</File>
	1072	<File
	1073	RelativePath=".\fromInstructionFile.h">
	1074	</File>
	1075	<File
	1076	RelativePath=".\getopt.c">
	1077	</File>
	1078	<File
	1079	RelativePath=".\getopt.h">
	1080	</File>
	1081	<File
	1082	RelativePath=".\getopt1.c">
	1083	</File>
	1084	<File
	1085	RelativePath=".\logFile.cpp">
	1086	</File>
	1087	<File
	1088	RelativePath=".\logFile.h">
	1089	</File>
	1090	<File
	1091	RelativePath=".\matrixUtils.cpp">
	1092	</File>
	1093	<File
	1094	RelativePath=".\matrixUtils.h">
	1095	</File>
	1096	<File
	1097	RelativePath=".\normalDist.cpp">
	1098	</File>
	1099	<File
	1100	RelativePath=".\normalDist.h">
	1101	</File>
	1102	<File
	1103	RelativePath=".\numRec.cpp">
	1104	</File>
	1105	<File
	1106	RelativePath=".\numRec.h">
	1107	</File>
	1108	<File
	1109	RelativePath=".\simulateCodonsJumps.cpp">
	1110	</File>
	1111	<File
	1112	RelativePath=".\simulateCodonsJumps.h">
	1113	</File>
	1114	<File
	1115	RelativePath=".\simulateJumps.cpp">
	1116	</File>
	1117	<File
	1118	RelativePath=".\simulateJumps.h">
	1119	</File>
	1120	<File
	1121	RelativePath=".\simulateJumpsAbstract.cpp">
	1122	</File>
	1123	<File
	1124	RelativePath=".\simulateJumpsAbstract.h">
	1125	</File>
	1126	<File
	1127	RelativePath=".\simulateRateShiftJumps.cpp">
	1128	</File>
	1129	<File
	1130	RelativePath=".\simulateRateShiftJumps.h">
	1131	</File>
	1132	<File
	1133	RelativePath=".\someUtil.cpp">
	1134	</File>
	1135	<File
	1136	RelativePath=".\someUtil.h">
	1137	</File>
	1138	<File
	1139	RelativePath=".\talRandom.cpp">
	1140	</File>
	1141	<File
	1142	RelativePath=".\talRandom.h">
	1143	</File>
	1144	</Filter>
	1145	<Filter
	1146	Name="Rates"
	1147	Filter="">
	1148	<File
	1149	RelativePath=".\siteSpecificRate.cpp">
	1150	</File>
	1151	<File
	1152	RelativePath=".\siteSpecificRate.h">
	1153	</File>
	1154	</Filter>
	1155	<Filter
	1156	Name="Misc"
	1157	Filter="">
	1158	<File
	1159	RelativePath=".\checkcovFanctors.h">
	1160	</File>
	1161	<File
	1162	RelativePath=".\checkcovFanctorsWithFactors.h">
	1163	</File>
	1164	<File
	1165	RelativePath=".\cmdline2EvolObjs.cpp">
	1166	</File>
	1167	<File
	1168	RelativePath=".\cmdline2EvolObjs.h">
	1169	</File>
	1170	<File
	1171	RelativePath=".\cmdline2EvolObjs.separate_template_classes.h">
	1172	</File>
	1173	<File
	1174	RelativePath=".\createSPFromArgsInfo.h">
	1175	</File>
	1176	<File
	1177	RelativePath=".\doubleRep.cpp">
	1178	</File>
	1179	<File
	1180	RelativePath=".\doubleRep.h">
	1181	</File>
	1182	<File
	1183	RelativePath=".\findRateOfGene.cpp">
	1184	</File>
	1185	<File
	1186	RelativePath=".\findRateOfGene.h">
	1187	</File>
	1188	<File
	1189	RelativePath=".\GLaguer.cpp">
	1190	</File>
	1191	<File
	1192	RelativePath=".\GLaguer.h">
	1193	</File>
	1194	<File
	1195	RelativePath=".\khTest.cpp">
	1196	</File>
	1197	<File
	1198	RelativePath=".\khTest.h">
	1199	</File>
	1200	<File
	1201	RelativePath=".\logRep.cpp">
	1202	</File>
	1203	<File
	1204	RelativePath=".\logRep.h">
	1205	</File>
	1206	<File
	1207	RelativePath=".\Parameters.cpp">
	1208	</File>
	1209	<File
	1210	RelativePath=".\Parameters.h">
	1211	</File>
	1212	<File
	1213	RelativePath=".\searchStatus.cpp">
	1214	</File>
	1215	<File
	1216	RelativePath=".\searchStatus.h">
	1217	</File>
	1218	</Filter>
	1219	<File
	1220	RelativePath=".\computeJumps.cpp">
	1221	</File>
	1222	<File
	1223	RelativePath=".\computeJumps.h">
	1224	</File>
	1225	<File
	1226	RelativePath=".\Makefile">
	1227	</File>
	1228	<File
	1229	RelativePath=".\unObservableData.cpp">
	1230	</File>
	1231	<File
	1232	RelativePath=".\unObservableData.h">
	1233	</File>
	1234	</Files>
	1235	<Globals>
	1236	</Globals>
	1237	</VisualStudioProject>

+449

-0

libs/phylogeny/phylogeny.vcxproj less more

	0	<?xml version="1.0" encoding="utf-8"?>
	1	<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
	2	<ItemGroup Label="ProjectConfigurations">
	3	<ProjectConfiguration Include="Debug\|Win32">
	4	<Configuration>Debug</Configuration>
	5	<Platform>Win32</Platform>
	6	</ProjectConfiguration>
	7	<ProjectConfiguration Include="Release\|Win32">
	8	<Configuration>Release</Configuration>
	9	<Platform>Win32</Platform>
	10	</ProjectConfiguration>
	11	</ItemGroup>
	12	<PropertyGroup Label="Globals">
	13	<ProjectName>phylogenyLib</ProjectName>
	14	<ProjectGuid>{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}</ProjectGuid>
	15	<Keyword>Win32Proj</Keyword>
	16	</PropertyGroup>
	17	<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
	18	<PropertyGroup Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'" Label="Configuration">
	19	<ConfigurationType>StaticLibrary</ConfigurationType>
	20	<CharacterSet>MultiByte</CharacterSet>
	21	</PropertyGroup>
	22	<PropertyGroup Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'" Label="Configuration">
	23	<ConfigurationType>StaticLibrary</ConfigurationType>
	24	<CharacterSet>MultiByte</CharacterSet>
	25	</PropertyGroup>
	26	<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
	27	<ImportGroup Label="ExtensionSettings">
	28	</ImportGroup>
	29	<ImportGroup Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'" Label="PropertySheets">
	30	<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
	31	</ImportGroup>
	32	<ImportGroup Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'" Label="PropertySheets">
	33	<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
	34	</ImportGroup>
	35	<PropertyGroup Label="UserMacros" />
	36	<PropertyGroup>
	37	<_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
	38	<OutDir Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'">Debug\</OutDir>
	39	<IntDir Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'">Debug\</IntDir>
	40	<OutDir Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'">Release\</OutDir>
	41	<IntDir Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'">Release\</IntDir>
	42	<CodeAnalysisRuleSet Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
	43	<CodeAnalysisRules Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'" />
	44	<CodeAnalysisRuleAssemblies Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'" />
	45	<CodeAnalysisRuleSet Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
	46	<CodeAnalysisRules Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'" />
	47	<CodeAnalysisRuleAssemblies Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'" />
	48	</PropertyGroup>
	49	<ItemDefinitionGroup Condition="'$(Configuration)\|$(Platform)'=='Debug\|Win32'">
	50	<ClCompile>
	51	<Optimization>Disabled</Optimization>
	52	<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
	53	<MinimalRebuild>true</MinimalRebuild>
	54	<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
	55	<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
	56	<RuntimeTypeInfo>true</RuntimeTypeInfo>
	57	<PrecompiledHeader>
	58	</PrecompiledHeader>
	59	<WarningLevel>Level3</WarningLevel>
	60	<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
	61	</ClCompile>
	62	<Lib>
	63	<OutputFile>$(OutDir)phylogeny.lib</OutputFile>
	64	</Lib>
	65	</ItemDefinitionGroup>
	66	<ItemDefinitionGroup Condition="'$(Configuration)\|$(Platform)'=='Release\|Win32'">
	67	<ClCompile>
	68	<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
	69	<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
	70	<RuntimeTypeInfo>true</RuntimeTypeInfo>
	71	<PrecompiledHeader>
	72	</PrecompiledHeader>
	73	<WarningLevel>Level3</WarningLevel>
	74	<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
	75	</ClCompile>
	76	<Lib>
	77	<OutputFile>$(OutDir)phylogeny.lib</OutputFile>
	78	</Lib>
	79	</ItemDefinitionGroup>
	80	<ItemGroup>
	81	<None Include="adrianCodon.dat.q" />
	82	<None Include="cpREV45.dat.q" />
	83	<None Include="dayhoff.dat.q" />
	84	<None Include="HIVb.dat.q" />
	85	<None Include="HIVw.dat.q" />
	86	<None Include="jones.dat.q" />
	87	<None Include="LG.dat.q" />
	88	<None Include="mtREV24.dat.q" />
	89	<None Include="wag.dat.q" />
	90	<None Include="Makefile" />
	91	</ItemGroup>
	92	<ItemGroup>
	93	<ClCompile Include="alphabet.cpp" />
	94	<ClCompile Include="amino.cpp" />
	95	<ClCompile Include="bblLSProportionalEB.cpp" />
	96	<ClCompile Include="codon.cpp" />
	97	<ClCompile Include="extremeValDistribution.cpp" />
	98	<ClCompile Include="gainLossAlphabet.cpp" />
	99	<ClCompile Include="geneticCodeHolder.cpp" />
	100	<ClCompile Include="indel.cpp" />
	101	<ClCompile Include="integerAlphabet.cpp" />
	102	<ClCompile Include="mulAlphabet.cpp" />
	103	<ClCompile Include="nucleotide.cpp" />
	104	<ClCompile Include="oneTwoMoreModel.cpp" />
	105	<ClCompile Include="threeStateAlphabet.cpp" />
	106	<ClCompile Include="evaluateCharacterFreq.cpp" />
	107	<ClCompile Include="samplingSequences.cpp" />
	108	<ClCompile Include="seqContainerTreeMap.cpp" />
	109	<ClCompile Include="seqeuncesFilter.cpp" />
	110	<ClCompile Include="sequence.cpp" />
	111	<ClCompile Include="sequenceContainer.cpp" />
	112	<ClCompile Include="betaDistribution.cpp" />
	113	<ClCompile Include="betaDistributionFixedCategories.cpp" />
	114	<ClCompile Include="betaDistributionFixedCategoriesWithOmegaUniform.cpp" />
	115	<ClCompile Include="betaOmegaDistribution.cpp" />
	116	<ClCompile Include="betaUtilities.cpp" />
	117	<ClCompile Include="distribution.cpp" />
	118	<ClCompile Include="distributionPlusCategory.cpp" />
	119	<ClCompile Include="distributionPlusInvariant.cpp" />
	120	<ClCompile Include="gammaDistribution.cpp" />
	121	<ClCompile Include="gammaDistributionFixedCategories.cpp" />
	122	<ClCompile Include="gammaDistributionLaguerre.cpp" />
	123	<ClCompile Include="gammaDistributionPlusInvariant.cpp" />
	124	<ClCompile Include="gammaUtilities.cpp" />
	125	<ClCompile Include="GamMixtureOptimizer.cpp" />
	126	<ClCompile Include="generalGammaDistribution.cpp" />
	127	<ClCompile Include="generalGammaDistributionFixedCategories.cpp" />
	128	<ClCompile Include="generalGammaDistributionLaguerre.cpp" />
	129	<ClCompile Include="generalGammaDistributionPlusInvariant.cpp" />
	130	<ClCompile Include="mixtureDistribution.cpp" />
	131	<ClCompile Include="optGammaMixtureEM.cpp" />
	132	<ClCompile Include="optGammaMixtureLS.cpp" />
	133	<ClCompile Include="suffStatGammaMixture.cpp" />
	134	<ClCompile Include="uniDistribution.cpp" />
	135	<ClCompile Include="uniformDistribution.cpp" />
	136	<ClCompile Include="datMatrixHolder.cpp" />
	137	<ClCompile Include="fromQtoPt.cpp" />
	138	<ClCompile Include="granthamChemicalDistances.cpp" />
	139	<ClCompile Include="readDatMatrix.cpp" />
	140	<ClCompile Include="ussrvModel.cpp" />
	141	<ClCompile Include="chebyshevAccelerator.cpp" />
	142	<ClCompile Include="pijAccelerator.cpp" />
	143	<ClCompile Include="aaJC.cpp" />
	144	<ClCompile Include="codonJC.cpp" />
	145	<ClCompile Include="goldmanYangModel.cpp" />
	146	<ClCompile Include="gtrModel.cpp" />
	147	<ClCompile Include="hky.cpp" />
	148	<ClCompile Include="indelModel.cpp" />
	149	<ClCompile Include="nucJC.cpp" />
	150	<ClCompile Include="replacementModel.cpp" />
	151	<ClCompile Include="replacementModelSSRV.cpp" />
	152	<ClCompile Include="tamura92.cpp" />
	153	<ClCompile Include="threeStateModel.cpp" />
	154	<ClCompile Include="wYangModel.cpp" />
	155	<ClCompile Include="multipleStochasticProcess.cpp" />
	156	<ClCompile Include="stochasticProcess.cpp" />
	157	<ClCompile Include="stochasticProcessSSRV.cpp" />
	158	<ClCompile Include="allTrees.cpp" />
	159	<ClCompile Include="allTreesSeparateModel.cpp" />
	160	<ClCompile Include="bootstrap.cpp" />
	161	<ClCompile Include="fastStartTree.cpp" />
	162	<ClCompile Include="readTree.cpp" />
	163	<ClCompile Include="simulateTree.cpp" />
	164	<ClCompile Include="tree.cpp" />
	165	<ClCompile Include="treeInference.cpp" />
	166	<ClCompile Include="treeIt.cpp" />
	167	<ClCompile Include="treeUtil.cpp" />
	168	<ClCompile Include="Nni.cpp" />
	169	<ClCompile Include="NNiProp.cpp" />
	170	<ClCompile Include="NNiSep.cpp" />
	171	<ClCompile Include="clustalFormat.cpp" />
	172	<ClCompile Include="fastaFormat.cpp" />
	173	<ClCompile Include="maseFormat.cpp" />
	174	<ClCompile Include="molphyFormat.cpp" />
	175	<ClCompile Include="nexusFormat.cpp" />
	176	<ClCompile Include="phylipFormat.cpp" />
	177	<ClCompile Include="phylipSequentialFormat.cpp" />
	178	<ClCompile Include="recognizeFormat.cpp" />
	179	<ClCompile Include="computeDownAlg.cpp" />
	180	<ClCompile Include="computeMarginalAlg.cpp" />
	181	<ClCompile Include="computePijComponent.cpp" />
	182	<ClCompile Include="computeUpAlg.cpp" />
	183	<ClCompile Include="computeUpAlgFactors.cpp" />
	184	<ClCompile Include="likeDist2Codon.cpp" />
	185	<ClCompile Include="likelihoodComputation.cpp" />
	186	<ClCompile Include="likelihoodComputation2Codon.cpp" />
	187	<ClCompile Include="likelihoodComputation2USSRV.cpp" />
	188	<ClCompile Include="likelihoodComputationFactors.cpp" />
	189	<ClCompile Include="likelihoodComputationGL.cpp" />
	190	<ClCompile Include="suffStatComponent.cpp" />
	191	<ClCompile Include="bblEM.cpp" />
	192	<ClCompile Include="bblEM2codon.cpp" />
	193	<ClCompile Include="bblEM2USSRV.cpp" />
	194	<ClCompile Include="bblEMfixRoot.cpp" />
	195	<ClCompile Include="bblEMProportionalEB.cpp" />
	196	<ClCompile Include="bblEMProprtional.cpp" />
	197	<ClCompile Include="bblEMSeperate.cpp" />
	198	<ClCompile Include="bestAlpha.cpp" />
	199	<ClCompile Include="bestAlphaAndK.cpp" />
	200	<ClCompile Include="bestAlphaAndNu.cpp" />
	201	<ClCompile Include="bestAlphaManyTrees.cpp" />
	202	<ClCompile Include="bestGtrModelParams.cpp" />
	203	<ClCompile Include="bestHKYparam.cpp" />
	204	<ClCompile Include="bestParamUSSRV.cpp" />
	205	<ClCompile Include="bestTamura92param.cpp" />
	206	<ClCompile Include="C_evalParamUSSRV.cpp" />
	207	<ClCompile Include="computeCounts.cpp" />
	208	<ClCompile Include="countTableComponent.cpp" />
	209	<ClCompile Include="fromCountTableComponentToDistance2Codon.cpp" />
	210	<ClCompile Include="fromCountTableComponentToDistancefixRoot.cpp" />
	211	<ClCompile Include="getRandomWeights.cpp" />
	212	<ClCompile Include="split.cpp" />
	213	<ClCompile Include="splitMap.cpp" />
	214	<ClCompile Include="splitTreeUtil.cpp" />
	215	<ClCompile Include="distanceBasedSeqs2Tree.cpp" />
	216	<ClCompile Include="distanceTable.cpp" />
	217	<ClCompile Include="fromCountTableComponentToDistance.cpp" />
	218	<ClCompile Include="fromCountTableComponentToDistance2USSRV.cpp" />
	219	<ClCompile Include="fromCountTableComponentToDistanceProp.cpp" />
	220	<ClCompile Include="fromCountTableComponentToDistancePropEB.cpp" />
	221	<ClCompile Include="givenRatesMLDistance.cpp" />
	222	<ClCompile Include="likeDist.cpp" />
	223	<ClCompile Include="likeDist2USSRV.cpp" />
	224	<ClCompile Include="likeDistfixRoot.cpp" />
	225	<ClCompile Include="likeDistProp.cpp" />
	226	<ClCompile Include="likeDistPropEB.cpp" />
	227	<ClCompile Include="nj.cpp" />
	228	<ClCompile Include="njConstrain.cpp" />
	229	<ClCompile Include="pairwiseGammaDistance.cpp" />
	230	<ClCompile Include="posteriorDistance.cpp" />
	231	<ClCompile Include="ssrvDistanceSeqs2Tree.cpp" />
	232	<ClCompile Include="AddLog.cpp" />
	233	<ClCompile Include="codonUtils.cpp" />
	234	<ClCompile Include="computePosteriorExpectationOfSubstitutions.cpp" />
	235	<ClCompile Include="computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp" />
	236	<ClCompile Include="computeSubstitutionCounts.cpp" />
	237	<ClCompile Include="ConversionUtils.cpp" />
	238	<ClCompile Include="errorMsg.cpp" />
	239	<ClCompile Include="fromInstructionFile.cpp" />
	240	<ClCompile Include="getopt.c" />
	241	<ClCompile Include="getopt1.c" />
	242	<ClCompile Include="logFile.cpp" />
	243	<ClCompile Include="matrixUtils.cpp" />
	244	<ClCompile Include="normalDist.cpp" />
	245	<ClCompile Include="numRec.cpp" />
	246	<ClCompile Include="simulateCodonsJumps.cpp" />
	247	<ClCompile Include="simulateJumps.cpp" />
	248	<ClCompile Include="simulateJumpsAbstract.cpp" />
	249	<ClCompile Include="simulateRateShiftJumps.cpp" />
	250	<ClCompile Include="someUtil.cpp" />
	251	<ClCompile Include="talRandom.cpp" />
	252	<ClCompile Include="siteSpecificRate.cpp" />
	253	<ClCompile Include="cmdline2EvolObjs.cpp" />
	254	<ClCompile Include="doubleRep.cpp" />
	255	<ClCompile Include="findRateOfGene.cpp" />
	256	<ClCompile Include="GLaguer.cpp" />
	257	<ClCompile Include="khTest.cpp" />
	258	<ClCompile Include="logRep.cpp" />
	259	<ClCompile Include="Parameters.cpp" />
	260	<ClCompile Include="searchStatus.cpp" />
	261	<ClCompile Include="computeJumps.cpp" />
	262	<ClCompile Include="unObservableData.cpp" />
	263	</ItemGroup>
	264	<ItemGroup>
	265	<ClInclude Include="alphabet.h" />
	266	<ClInclude Include="amino.h" />
	267	<ClInclude Include="bblLSProportionalEB.h" />
	268	<ClInclude Include="codon.h" />
	269	<ClInclude Include="extremeValDistribution.h" />
	270	<ClInclude Include="gainLossAlphabet.h" />
	271	<ClInclude Include="geneticCodeHolder.h" />
	272	<ClInclude Include="indel.h" />
	273	<ClInclude Include="integerAlphabet.h" />
	274	<ClInclude Include="mulAlphabet.h" />
	275	<ClInclude Include="nucleotide.h" />
	276	<ClInclude Include="oneTwoMoreModel.h" />
	277	<ClInclude Include="threeStateAlphabet.h" />
	278	<ClInclude Include="evaluateCharacterFreq.h" />
	279	<ClInclude Include="samplingSequences.h" />
	280	<ClInclude Include="seqContainerTreeMap.h" />
	281	<ClInclude Include="seqeuncesFilter.h" />
	282	<ClInclude Include="sequence.h" />
	283	<ClInclude Include="sequenceContainer.h" />
	284	<ClInclude Include="betaDistribution.h" />
	285	<ClInclude Include="betaDistributionFixedCategories.h" />
	286	<ClInclude Include="betaDistributionFixedCategoriesWithOmegaUniform.h" />
	287	<ClInclude Include="betaOmegaDistribution.h" />
	288	<ClInclude Include="betaUtilities.h" />
	289	<ClInclude Include="distribution.h" />
	290	<ClInclude Include="distributionPlusCategory.h" />
	291	<ClInclude Include="distributionPlusInvariant.h" />
	292	<ClInclude Include="gammaDistribution.h" />
	293	<ClInclude Include="gammaDistributionFixedCategories.h" />
	294	<ClInclude Include="gammaDistributionLaguerre.h" />
	295	<ClInclude Include="gammaDistributionPlusInvariant.h" />
	296	<ClInclude Include="gammaUtilities.h" />
	297	<ClInclude Include="GamMixtureOptimizer.h" />
	298	<ClInclude Include="generalGammaDistribution.h" />
	299	<ClInclude Include="generalGammaDistributionFixedCategories.h" />
	300	<ClInclude Include="generalGammaDistributionLaguerre.h" />
	301	<ClInclude Include="generalGammaDistributionPlusInvariant.h" />
	302	<ClInclude Include="mixtureDistribution.h" />
	303	<ClInclude Include="optGammaMixtureEM.h" />
	304	<ClInclude Include="optGammaMixtureLS.h" />
	305	<ClInclude Include="suffStatGammaMixture.h" />
	306	<ClInclude Include="uniDistribution.h" />
	307	<ClInclude Include="uniformDistribution.h" />
	308	<ClInclude Include="datMatrixHolder.h" />
	309	<ClInclude Include="fromQtoPt.h" />
	310	<ClInclude Include="granthamChemicalDistances.h" />
	311	<ClInclude Include="readDatMatrix.h" />
	312	<ClInclude Include="ussrvModel.h" />
	313	<ClInclude Include="alphaTrivialAccelerator.h" />
	314	<ClInclude Include="chebyshevAccelerator.h" />
	315	<ClInclude Include="pijAccelerator.h" />
	316	<ClInclude Include="trivialAccelerator.h" />
	317	<ClInclude Include="aaJC.h" />
	318	<ClInclude Include="codonJC.h" />
	319	<ClInclude Include="goldmanYangModel.h" />
	320	<ClInclude Include="gtrModel.h" />
	321	<ClInclude Include="hky.h" />
	322	<ClInclude Include="indelModel.h" />
	323	<ClInclude Include="nucJC.h" />
	324	<ClInclude Include="replacementModel.h" />
	325	<ClInclude Include="replacementModelSSRV.h" />
	326	<ClInclude Include="tamura92.h" />
	327	<ClInclude Include="threeStateModel.h" />
	328	<ClInclude Include="wYangModel.h" />
	329	<ClInclude Include="multipleStochasticProcess.h" />
	330	<ClInclude Include="stochasticProcess.h" />
	331	<ClInclude Include="stochasticProcessSSRV.h" />
	332	<ClInclude Include="allTrees.h" />
	333	<ClInclude Include="allTreesSeparateModel.h" />
	334	<ClInclude Include="bootstrap.h" />
	335	<ClInclude Include="fastStartTree.h" />
	336	<ClInclude Include="readTree.h" />
	337	<ClInclude Include="simulateTree.h" />
	338	<ClInclude Include="tree.h" />
	339	<ClInclude Include="treeInference.h" />
	340	<ClInclude Include="treeIt.h" />
	341	<ClInclude Include="treeUtil.h" />
	342	<ClInclude Include="Nni.h" />
	343	<ClInclude Include="NNiProp.h" />
	344	<ClInclude Include="NNiSep.h" />
	345	<ClInclude Include="clustalFormat.h" />
	346	<ClInclude Include="fastaFormat.h" />
	347	<ClInclude Include="maseFormat.h" />
	348	<ClInclude Include="molphyFormat.h" />
	349	<ClInclude Include="nexusFormat.h" />
	350	<ClInclude Include="phylipFormat.h" />
	351	<ClInclude Include="phylipSequentialFormat.h" />
	352	<ClInclude Include="recognizeFormat.h" />
	353	<ClInclude Include="computeDownAlg.h" />
	354	<ClInclude Include="computeMarginalAlg.h" />
	355	<ClInclude Include="computePijComponent.h" />
	356	<ClInclude Include="computeUpAlg.h" />
	357	<ClInclude Include="likeDist2Codon.h" />
	358	<ClInclude Include="likelihoodComputation.h" />
	359	<ClInclude Include="likelihoodComputation2Codon.h" />
	360	<ClInclude Include="likelihoodComputation2USSRV.h" />
	361	<ClInclude Include="likelihoodComputationFactors.h" />
	362	<ClInclude Include="likelihoodComputationGL.h" />
	363	<ClInclude Include="suffStatComponent.h" />
	364	<ClInclude Include="bblEM.h" />
	365	<ClInclude Include="bblEM2codon.h" />
	366	<ClInclude Include="bblEM2USSRV.h" />
	367	<ClInclude Include="bblEMfixRoot.h" />
	368	<ClInclude Include="bblEMProportional.h" />
	369	<ClInclude Include="bblEMProportionalEB.h" />
	370	<ClInclude Include="bblEMSeperate.h" />
	371	<ClInclude Include="bestAlpha.h" />
	372	<ClInclude Include="bestAlphaAndK.h" />
	373	<ClInclude Include="bestAlphaAndNu.h" />
	374	<ClInclude Include="bestAlphaManyTrees.h" />
	375	<ClInclude Include="bestGtrModelParams.h" />
	376	<ClInclude Include="bestHKYparam.h" />
	377	<ClInclude Include="bestParamUSSRV.h" />
	378	<ClInclude Include="bestTamura92param.h" />
	379	<ClInclude Include="C_evalParamUSSRV.h" />
	380	<ClInclude Include="computeCounts.h" />
	381	<ClInclude Include="countTableComponent.h" />
	382	<ClInclude Include="fromCountTableComponentToDistance2Codon.h" />
	383	<ClInclude Include="fromCountTableComponentToDistancefixRoot.h" />
	384	<ClInclude Include="getRandomWeights.h" />
	385	<ClInclude Include="split.h" />
	386	<ClInclude Include="splitMap.h" />
	387	<ClInclude Include="splitTreeUtil.h" />
	388	<ClInclude Include="distanceBasedSeqs2Tree.h" />
	389	<ClInclude Include="distanceMethod.h" />
	390	<ClInclude Include="distances2Tree.h" />
	391	<ClInclude Include="distanceTable.h" />
	392	<ClInclude Include="fromCountTableComponentToDistance.h" />
	393	<ClInclude Include="fromCountTableComponentToDistance2USSRV.h" />
	394	<ClInclude Include="fromCountTableComponentToDistanceProp.h" />
	395	<ClInclude Include="fromCountTableComponentToDistancePropEB.h" />
	396	<ClInclude Include="givenRatesMLDistance.h" />
	397	<ClInclude Include="jcDistance.h" />
	398	<ClInclude Include="likeDist.h" />
	399	<ClInclude Include="likeDist2USSRV.h" />
	400	<ClInclude Include="likeDistfixRoot.h" />
	401	<ClInclude Include="likeDistProp.h" />
	402	<ClInclude Include="likeDistPropEB.h" />
	403	<ClInclude Include="nj.h" />
	404	<ClInclude Include="njConstrain.h" />
	405	<ClInclude Include="pairwiseGammaDistance.h" />
	406	<ClInclude Include="pDistance.h" />
	407	<ClInclude Include="posteriorDistance.h" />
	408	<ClInclude Include="ssrvDistanceSeqs2Tree.h" />
	409	<ClInclude Include="AddLog.h" />
	410	<ClInclude Include="codonUtils.h" />
	411	<ClInclude Include="computePosteriorExpectationOfSubstitutions.h" />
	412	<ClInclude Include="computePosteriorExpectationOfSubstitutions_nonReversibleSp.h" />
	413	<ClInclude Include="computeSubstitutionCounts.h" />
	414	<ClInclude Include="ConversionUtils.h" />
	415	<ClInclude Include="definitions.h" />
	416	<ClInclude Include="errorMsg.h" />
	417	<ClInclude Include="fromInstructionFile.h" />
	418	<ClInclude Include="getopt.h" />
	419	<ClInclude Include="logFile.h" />
	420	<ClInclude Include="matrixUtils.h" />
	421	<ClInclude Include="normalDist.h" />
	422	<ClInclude Include="numRec.h" />
	423	<ClInclude Include="simulateCodonsJumps.h" />
	424	<ClInclude Include="simulateJumps.h" />
	425	<ClInclude Include="simulateJumpsAbstract.h" />
	426	<ClInclude Include="simulateRateShiftJumps.h" />
	427	<ClInclude Include="someUtil.h" />
	428	<ClInclude Include="talRandom.h" />
	429	<ClInclude Include="siteSpecificRate.h" />
	430	<ClInclude Include="checkcovFanctors.h" />
	431	<ClInclude Include="checkcovFanctorsWithFactors.h" />
	432	<ClInclude Include="cmdline2EvolObjs.h" />
	433	<ClInclude Include="cmdline2EvolObjs.separate_template_classes.h" />
	434	<ClInclude Include="createSPFromArgsInfo.h" />
	435	<ClInclude Include="doubleRep.h" />
	436	<ClInclude Include="findRateOfGene.h" />
	437	<ClInclude Include="GLaguer.h" />
	438	<ClInclude Include="khTest.h" />
	439	<ClInclude Include="logRep.h" />
	440	<ClInclude Include="Parameters.h" />
	441	<ClInclude Include="searchStatus.h" />
	442	<ClInclude Include="computeJumps.h" />
	443	<ClInclude Include="unObservableData.h" />
	444	</ItemGroup>
	445	<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
	446	<ImportGroup Label="ExtensionTargets">
	447	</ImportGroup>
	448	</Project>⏎

+9

-0

libs/phylogeny/pijAccelerator.cpp less more

	0	// $Id: pijAccelerator.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "pijAccelerator.h"
	3
	4	pijAccelerator::~pijAccelerator(){}
	5	// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
	6	// assignment
	7
	8

+26

-0

libs/phylogeny/pijAccelerator.h less more

	0	// $Id: pijAccelerator.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___PIJ_ACCELERATOR
	3	#define ___PIJ_ACCELERATOR
	4
	5	#include "definitions.h"
	6	#include "replacementModel.h"
	7
	8	class pijAccelerator {
	9	public:
	10	virtual pijAccelerator* clone() const = 0;
	11	virtual ~pijAccelerator() = 0;
	12	virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0;
	13	virtual const MDOUBLE freq(const int i) const = 0; // P(i)
	14	virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0;
	15	virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0;
	16	virtual replacementModel* getReplacementModel() const =0; // @@@@ this const is a lie !!!
	17	virtual const int alphabetSize() const =0;
	18	};
	19
	20
	21
	22
	23
	24	#endif
	25

+420

-0

libs/phylogeny/posteriorDistance.cpp less more

	0	// $Id: posteriorDistance.cpp 5883 2009-02-06 10:42:11Z privmane $
	1
	2	#include "posteriorDistance.h"
	3	#include "numRec.h"
	4	#include "countTableComponent.h"
	5	#include "likeDist.h"
	6	#include "uniDistribution.h"
	7	#include "someUtil.h"
	8	#include "jcDistance.h"
	9	#include <cmath>
	10
	11
	12	class C_eval_gammaMLDistancesPosterior_d{
	13	private:
	14	const stochasticProcess& _sp;
	15	const sequence& _s1;
	16	const sequence& _s2;
	17	const Vdouble* _weights;
	18	const VVdoubleRep& _posteriorProb; // pos, rate
	19	public:
	20	C_eval_gammaMLDistancesPosterior_d(const stochasticProcess& sp,
	21	const sequence& s1,
	22	const sequence& s2,
	23	const VVdoubleRep& posteriorProb,
	24	const Vdouble * weights)
	25	: _sp(sp),
	26	_s1(s1),
	27	_s2(s2),
	28	_weights(weights),
	29	_posteriorProb(posteriorProb)
	30	{};
	31
	32
	33	MDOUBLE operator() (MDOUBLE dist) {
	34	MDOUBLE sumL=0.0;
	35	doubleRep posLikelihood = 0.0;
	36	MDOUBLE posLikelihood_d = 0.0;
	37	for (int pos=0; pos < _s1.seqLen(); ++pos){
	38	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	39	posLikelihood = 0.0;
	40	posLikelihood_d = 0.0;
	41	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	42	// this is the more complicated case, where s1 = ?, s2 = specific
	43	posLikelihood = _sp.freq(_s2[pos]);
	44	posLikelihood_d =0.0;
	45	}
	46	else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	47	posLikelihood = _sp.freq(_s1[pos]);
	48	posLikelihood_d =0.0;
	49	} else {
	50	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	51	MDOUBLE rate = _sp.rates(rateCategor);
	52	MDOUBLE pij= 0.0;
	53	MDOUBLE dpij=0.0;
	54	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
	55	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	56	dpij= _sp.dPij_dt(_s1[pos],_s2[pos],distrate)rate;
	57	doubleRep tmp = _sp.freq(_s1[pos])*_posteriorProb[pos][rateCategor];
	58	posLikelihood += pij *tmp;
	59	posLikelihood_d += dpij*convert(tmp);
	60	}
	61	else {// this is the most complicated case, when you have combinations of letters,
	62	// for example B in one sequence and ? in the other.
	63	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	64	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	65	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	66	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	67	doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor];;
	68	posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
	69	posLikelihood_d += convert(exp) * _sp.dPij_dt(iS1,iS2,distrate)rate;
	70	}
	71	}
	72	}
	73	}
	74	}// end of for rate categories
	75	}
	76	assert(posLikelihood!=0.0);
	77	sumL += posLikelihood_d/convert(posLikelihood)(_weights ? (_weights)[pos]:1.0);
	78	}
	79	return -sumL;
	80	};
	81	};
	82
	83	class C_eval_gammaMLDistancesPosterior{
	84	private:
	85	const stochasticProcess& _sp;
	86	const sequence& _s1;
	87	const sequence& _s2;
	88	const Vdouble* _weights;
	89	const VVdoubleRep& _posteriorProb; // pos, rate
	90	public:
	91	C_eval_gammaMLDistancesPosterior(const stochasticProcess& sp,
	92	const sequence& s1,
	93	const sequence& s2,
	94	const VVdoubleRep& posteriorProb,
	95	const Vdouble * weights): _sp(sp),
	96	_s1(s1),
	97	_s2(s2),
	98	_weights(weights),
	99	_posteriorProb(posteriorProb)
	100	{};
	101
	102
	103	MDOUBLE operator() (MDOUBLE dist) {
	104	/DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": dist = "<<dist<<endl); DEBUG/
	105	MDOUBLE sumL=0.0;
	106	doubleRep posLikelihood = 0.0;
	107
	108	for (int pos=0; pos < _s1.seqLen(); ++pos){
	109	/DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": pos = "<<pos<<endl); DEBUG/
	110	if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	111	/DEBUG LOG(9,<<"_posteriorProb ="<<_posteriorProb[pos]<<endl); DEBUG/
	112	posLikelihood = 0.0;
	113	/DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG/
	114	if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
	115	// this is the more complicated case, where s1 = ?, s2 = specific
	116	posLikelihood = _sp.freq(_s2[pos]);
	117	}
	118	else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
	119	posLikelihood = _sp.freq(_s1[pos]);
	120	} else {
	121	for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
	122	MDOUBLE rate = _sp.rates(rateCategor);
	123	/DEBUG LOG(9,<<"rate = "<<rate<<endl); DEBUG/
	124	MDOUBLE pij= 0.0;
	125	if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
	126	/DEBUG LOG(9,<<"Both are specific"<<endl); DEBUG/
	127	pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
	128	doubleRep exp = _sp.freq(_s1[pos])*_posteriorProb[pos][rateCategor];
	129	/DEBUG LOG(9,<<"exp = "<<exp<<endl); DEBUG/
	130	posLikelihood += pij *exp;
	131	/DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG/
	132	}
	133	else {// this is the most complicated case, when you have combinations of letters,
	134	// for example B in one sequence and ? in the other.
	135	/DEBUG LOG(9,<<"One or both are non-specific"<<endl); DEBUG/
	136	for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
	137	for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
	138	if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
	139	(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
	140	doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor];
	141	posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
	142	}
	143	}
	144	}
	145	/DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG/
	146	}
	147	}// end of for rate categories
	148	}
	149	assert(posLikelihood!=0.0);
	150	sumL += log(posLikelihood)(_weights ? (_weights)[pos]:1);
	151	}
	152	/DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": returning "<<(-sumL)<<endl); DEBUG/
	153	return -sumL;
	154	};
	155	};
	156
	157	posteriorDistance::posteriorDistance(const stochasticProcess & sp,
	158	const VVdoubleRep & posteriorProb,
	159	const MDOUBLE toll,
	160	const MDOUBLE maxPairwiseDistance)
	161	:
	162	likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(posteriorProb)
	163	{}
	164
	165	posteriorDistance::posteriorDistance(stochasticProcess & sp,
	166	const VVdoubleRep & posteriorProb,
	167	const MDOUBLE toll,
	168	const MDOUBLE maxPairwiseDistance)
	169	:
	170	likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(posteriorProb)
	171	{}
	172
	173	posteriorDistance::posteriorDistance(const stochasticProcess & sp,
	174	const MDOUBLE toll,
	175	const MDOUBLE maxPairwiseDistance)
	176	:
	177	likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(0)
	178	{}
	179
	180
	181	posteriorDistance::posteriorDistance(stochasticProcess & sp,
	182	const MDOUBLE toll,
	183	const MDOUBLE maxPairwiseDistance)
	184	:
	185	likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(0)
	186	{}
	187
	188	posteriorDistance::posteriorDistance(const posteriorDistance& other):
	189	likeDist(static_cast<likeDist>(other)), _posteriorProb(other._posteriorProb)
	190	{}
	191
	192
	193
	194	// distance is computed based on the posterior probability
	195	const MDOUBLE posteriorDistance::giveDistance(const sequence& s1,
	196	const sequence& s2,
	197	const Vdouble * weights,
	198	MDOUBLE* score) const
	199	{
	200	/DEBUG LOG(9,<<"posteriorDistance::giveDistance - start"<<endl); LOGDO(9,printTime(myLog::LogFile())); DEBUG/
	201	const MDOUBLE ax=0, cx=_maxPairwiseDistance;
	202	MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/=1.0/;
	203	if (!(bx==bx)) bx = 1.0;
	204	if (!(bx>0.0)) bx = 0.000001;
	205	MDOUBLE dist=-1.0;
	206	MDOUBLE resL = -dbrent(ax,bx,cx,
	207	C_eval_gammaMLDistancesPosterior(_sp,s1,s2,_posteriorProb,weights),
	208	C_eval_gammaMLDistancesPosterior_d(_sp,s1,s2,_posteriorProb,weights),
	209	_toll,
	210	&dist);
	211	if (score) *score = resL;
	212	return dist;
	213	}
	214
	215	// =============================
	216	// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
	217	class C_evalAlphaForPairOfSeq{
	218	private:
	219	const countTableComponentGam& _ctc;
	220	stochasticProcess& _sp;
	221	const MDOUBLE _branchL;
	222	public:
	223	C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc,
	224	const MDOUBLE branchL,
	225	stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {};
	226
	227	MDOUBLE operator() (MDOUBLE alpha) {
	228	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	229	C_evalLikeDist cev(_ctc,_sp);
	230	MDOUBLE L=cev(_branchL);
	231	LOG(10,<<"check alpha="<<alpha<<", bl="<<_branchL<<" gives "<<L<<endl);
	232	return L;
	233	};
	234	};
	235
	236	// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
	237	// returns the best alpha.
	238	MDOUBLE optimizeAlphaFixedDist(stochasticProcess & sp,
	239	const countTableComponentGam & ctc,
	240	const MDOUBLE branchL,
	241	const vector<MDOUBLE> * weights,
	242	MDOUBLE* score=NULL){ // changes sp.
	243	MDOUBLE bestA=0.0;
	244	MDOUBLE bestQ=0.0;
	245	const MDOUBLE upperBoundOnAlpha = 15.0;
	246	const MDOUBLE epsilonAlphaOptimization = 0.01;
	247	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	248	const MDOUBLE bx=cx*0.3;
	249	const MDOUBLE ax=0.0;
	250
	251
	252	bestQ = -brent(ax,bx,cx,
	253	C_evalAlphaForPairOfSeq(ctc,branchL,sp),
	254	epsilonAlphaOptimization,
	255	&bestA);
	256	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	257	if (score) *score = bestQ;
	258	return bestA;
	259	}
	260
	261
	262
	263	// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
	264	class C_eval_gammaMLAlpha{
	265	private:
	266	const stochasticProcess& _sp;
	267	const sequence& _s1;
	268	const sequence& _s2;
	269	const MDOUBLE _distance;
	270	const Vdouble* _weights;
	271	// const VVdoubleRep& _posteriorProb; // pos, rate
	272	public:
	273	C_eval_gammaMLAlpha(const stochasticProcess& sp,
	274	const sequence& s1,
	275	const sequence& s2,
	276	const MDOUBLE distance,
	277	// const VVdoubleRep& posteriorProb,
	278	const Vdouble * weights): _sp(sp),
	279	_s1(s1),
	280	_s2(s2),
	281	_distance(distance),
	282	_weights(weights)
	283	// _posteriorProb(posteriorProb)
	284	{};
	285
	286	// this cast is required as the distribution within the
	287	// stochasticProcess is kept as the parent "distribution" class that
	288	// knows nothing of Alpha
	289	void setAlpha(MDOUBLE alpha) {
	290	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	291	}
	292
	293
	294	MDOUBLE operator() (MDOUBLE alpha) {
	295	setAlpha(alpha);
	296	MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights);
	297	LOG(11,<<"check alpha="<<alpha<<", bl="<<_distance<<" gives "<<likelihood<<endl);
	298	return -likelihood;
	299	};
	300	} ;
	301
	302
	303	// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
	304	// returns the best alpha.
	305	MDOUBLE optimizeAlphaFixedDist( const sequence& s1,
	306	const sequence& s2,
	307	stochasticProcess & sp,
	308	const MDOUBLE branchL,
	309	const vector<MDOUBLE> * weights,
	310	MDOUBLE* score=NULL){ // changes sp.
	311	MDOUBLE bestA=0.0;
	312	MDOUBLE bestQ=0.0;
	313	const MDOUBLE upperBoundOnAlpha = 15.0;
	314	const MDOUBLE epsilonAlphaOptimization = 0.01;
	315	const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
	316	const MDOUBLE bx=cx*0.3;
	317	const MDOUBLE ax=0.0;
	318
	319
	320	bestQ = -brent(ax,bx,cx,
	321	C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights),
	322	epsilonAlphaOptimization,
	323	&bestA);
	324	(static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
	325	if (score) *score = bestQ;
	326	return bestA;
	327	}
	328
	329
	330
	331	MDOUBLE posteriorDistance::giveInitialGuessOfDistance(
	332	const sequence& s1,
	333	const sequence& s2,
	334	const vector<MDOUBLE> * weights,
	335	MDOUBLE* score) const {
	336	uniDistribution ud;
	337	stochasticProcess uniSp(&ud,_sp.getPijAccelerator());
	338	likeDist ld(uniSp);
	339	return (ld.giveDistance(s1,s2,weights,score));
	340	}
	341
	342	// OBSOLETE? What's the difference between this function and giveDistanceOptAlphaForPairOfSequences???
	343	MDOUBLE posteriorDistance::giveDistanceOptAlphaForEachPairOfSequences( const sequence& s1,
	344	const sequence& s2,
	345	const vector<MDOUBLE> * weights,
	346	MDOUBLE* score,
	347	MDOUBLE* alpha) const {
	348
	349	MDOUBLE toll = 0.0001;
	350
	351	MDOUBLE resL = 0.0;
	352	MDOUBLE resQ = 0.0;
	353	MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
	354
	355	countTableComponentGam ctc; // from technical reasons.
	356	ctc.countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories());
	357
	358	stochasticProcess tmpSp(_sp);
	359	for (int z=0; z<s1.seqLen(); ++z) {
	360	for (int j=0; j < tmpSp.categories(); ++j) {
	361	ctc.addToCounts(s1[z],s2[z],j,weights?(*weights)[z]:tmpSp.ratesProb(j));
	362	}
	363	}
	364	const int maxIter = 30;
	365	MDOUBLE newDist = 0.0;
	366	MDOUBLE lastBestAlpha = 0.0;
	367	for (int i=0; i < maxIter; ++i) {
	368	lastBestAlpha = optimizeAlphaFixedDist(tmpSp,ctc,currentDistance,weights,&resL); // changes sp.
	369	(static_cast<gammaDistribution*>(tmpSp.distr()))->setAlpha(lastBestAlpha);
	370	LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<(static_cast<gammaDistribution*>(tmpSp.distr()))->getAlpha()<<")"<<"\t L="<<resL<<"\t");
	371	likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
	372	newDist = tmpld.giveDistance(ctc,resQ);
	373	LOG(8,<<"dist="<<newDist<<endl);
	374	if (fabs(newDist-currentDistance)<toll) break;
	375	currentDistance = newDist;
	376	}
	377	if (score) *score = resL;
	378	if (alpha) *alpha = lastBestAlpha;
	379	assert (newDist >=0);
	380	return newDist;
	381
	382	}
	383
	384
	385
	386	// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
	387	MDOUBLE posteriorDistance::giveDistanceOptAlphaForPairOfSequences( const sequence& s1,
	388	const sequence& s2,
	389	const vector<MDOUBLE> * weights,
	390	MDOUBLE* score,
	391	MDOUBLE* alpha) const {
	392
	393	MDOUBLE toll = 0.0001;
	394
	395	MDOUBLE resL = 0.0;
	396	MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
	397
	398	countTableComponentGam ctc; // from technical reasons.
	399
	400	stochasticProcess tmpSp(_sp);
	401
	402	const int maxIter = 30;
	403	MDOUBLE newDist = 0.0;
	404	MDOUBLE lastBestAlpha = 0.0;
	405	for (int i=0; i < maxIter; ++i) {
	406	lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp.
	407	LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<"\t L="<<resL<<"\t");
	408	likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
	409	newDist = tmpld.giveDistance(s1, s2, weights, &resL);
	410	LOG(8,<<"dist="<<newDist<<"(L="<<resL<<")"<<endl);
	411	if (fabs(newDist-currentDistance)<toll) break;
	412	currentDistance = newDist;
	413	}
	414	if (score) *score = resL;
	415	if (alpha) *alpha = lastBestAlpha;
	416	assert (newDist >=0);
	417	return newDist;
	418
	419	}

+72

-0

libs/phylogeny/posteriorDistance.h less more

	0	// $Id: posteriorDistance.h 1752 2007-02-26 14:01:09Z itaymay $
	1
	2
	3	#ifndef POSTERIOR_DISTANCE_H
	4	#define POSTERIOR_DISTANCE_H
	5
	6	#include "likeDist.h"
	7	#include "stochasticProcess.h"
	8	#include "definitions.h"
	9	#include "sequence.h"
	10	#include "gammaDistribution.h"
	11	#include "logFile.h"
	12
	13	#include <cmath>
	14	using namespace std;
	15
	16	class posteriorDistance : public likeDist {
	17	public:
	18	explicit posteriorDistance(const stochasticProcess & sp,
	19	const VVdoubleRep & posteriorProb, // pos * rate
	20	const MDOUBLE toll =0.0001,
	21	const MDOUBLE maxPairwiseDistance = 5.0);
	22
	23	explicit posteriorDistance(stochasticProcess & sp,
	24	const VVdoubleRep & posteriorProb, // pos * rate
	25	const MDOUBLE toll =0.0001,
	26	const MDOUBLE maxPairwiseDistance = 5.0);
	27
	28	explicit posteriorDistance(const stochasticProcess & sp,
	29	const MDOUBLE toll =0.0001,
	30	const MDOUBLE maxPairwiseDistance = 5.0);
	31
	32	explicit posteriorDistance(stochasticProcess & sp,
	33	const MDOUBLE toll =0.0001,
	34	const MDOUBLE maxPairwiseDistance = 5.0);
	35	posteriorDistance(const posteriorDistance& other);
	36	virtual posteriorDistance* clone() const {return new posteriorDistance(*this);}
	37
	38	// distance is computed based on the posterior probability
	39	const MDOUBLE giveDistance(const sequence& s1,
	40	const sequence& s2,
	41	const vector<MDOUBLE> * weights,
	42	MDOUBLE* score=NULL) const;
	43
	44	MDOUBLE giveDistanceOptAlphaForEachPairOfSequences(const sequence& s1,
	45	const sequence& s2,
	46	const vector<MDOUBLE> * weights,
	47	MDOUBLE* score=NULL,
	48	MDOUBLE* alpha=NULL) const;
	49
	50	MDOUBLE giveDistanceOptAlphaForPairOfSequences(const sequence& s1,
	51	const sequence& s2,
	52	const vector<MDOUBLE> * weights,
	53	MDOUBLE* score,
	54	MDOUBLE* alpha) const;
	55
	56	void setPosterior(VVdoubleRep posteriorProb) {_posteriorProb = posteriorProb;}
	57	void setAlpha(MDOUBLE alpha) {
	58	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	59	}
	60
	61	private:
	62	VVdoubleRep _posteriorProb;
	63	MDOUBLE giveInitialGuessOfDistance(const sequence& s1,
	64	const sequence& s2,
	65	const vector<MDOUBLE> * weights,
	66	MDOUBLE* score) const;
	67	};
	68
	69
	70
	71	#endif

+284

-0

libs/phylogeny/readDatMatrix.cpp less more

	0	// $Id: readDatMatrix.cpp 5805 2009-01-20 09:19:26Z adido $
	1	//#ifndef unix
	2	//#define SSTREAM_KNOWN
	3	//#endif
	4
	5	//#ifdef SSTREAM_KNOWN
	6	#include <sstream>
	7	//#else
	8	//#include <strstream> //oldVersion
	9	//#endif
	10
	11
	12	#include <cassert>
	13	#include "readDatMatrix.h"
	14	#include "errorMsg.h"
	15	#include "logFile.h"
	16
	17	//#define VERBOS
	18
	19	void normalizeQ(VVdouble& q, const Vdouble& freq) {
	20	MDOUBLE sum =0;
	21	int i=0,j=0;
	22	for (i=0; i < q.size(); ++i) {
	23	sum += q[i][i]*freq[i];
	24	}
	25	assert(sum!=0);
	26	MDOUBLE oneDividedBySum = -1.0/sum; // to avoid many divisions.
	27
	28	for (i=0; i < q.size(); ++i) {
	29	for (j=0; j < q.size(); ++j) {
	30	q[i][j] = q[i][j]*oneDividedBySum;
	31	}
	32	}
	33	}
	34
	35	void readDatMatrixFromFile(const string & matrixFileName,
	36	VVdouble & subMatrix,
	37	Vdouble & freq) {
	38	cout<<"**readDatMatrixFromFile****"<<endl;
	39	int i=0,j=0; //indices
	40	ifstream in(matrixFileName.c_str());
	41	if (!in) {
	42	errorMsg::reportError("unable to open matrix data file");
	43	}
	44
	45	int alphaSize;
	46	if (matrixFileName == "adrianCodon.dat.q")
	47	alphaSize = 61;
	48	else
	49	alphaSize = 20;
	50	subMatrix.resize(alphaSize);
	51	for ( i=0; i < alphaSize; ++i) subMatrix[i].resize(alphaSize,0.0);
	52	freq.resize(alphaSize,0.0);
	53
	54	for (i=1; i < subMatrix.size(); ++i) {
	55	for (j=0; j <i;++j) {
	56	in>>subMatrix[i][j];
	57	subMatrix[j][i] = subMatrix[i][j];
	58	}
	59	}
	60	for (i=0; i < subMatrix.size(); ++i) {
	61	in>>freq[i];
	62	}
	63	in.close();
	64
	65	//check:
	66	//LOG(5,<<" priting the 5*5 top part of the sub matrix: "<<endl);
	67	//for (i=0; i < 5; ++i) {
	68	// for (j=0; j <5;++j) {
	69	// LOG(5,<<subMatrix[i][j]<<" ");
	70	// }
	71	// LOG(5,<<endl);
	72	//}
	73	//LOG(5,<<"the 5 last freqs: "<<endl);
	74	//for (i=15; i < 20; ++i) {
	75	// LOG(5,<<freq[i]<<" ");
	76	//}
	77	}
	78
	79	void readDatMatrixFromString(const string & matrixFileString,
	80	VVdouble & subMatrix,
	81	Vdouble & freq, int alphaSize) {
	82	int i=0,j=0; //indices
	83	//#ifdef SSTREAM_KNOWN
	84	stringstream in(matrixFileString.c_str());
	85	// #else
	86	// istrstream in(matrixFileString.c_str()); // OLD VERSION
	87	//#endif
	88	if (!in) {
	89	errorMsg::reportError("unable to open matrix data buffer");
	90	}
	91
	92
	93	subMatrix.resize(alphaSize);
	94	for ( i=0; i < alphaSize; ++i) subMatrix[i].resize(alphaSize,0.0);
	95	freq.resize(alphaSize,0.0);
	96
	97	for (i=1; i < alphaSize; ++i) {
	98	for (j=0; j <i;++j) {
	99	in>>subMatrix[i][j];
	100	subMatrix[j][i] = subMatrix[i][j];
	101	}
	102	}
	103	for (i=0; i < alphaSize; ++i) {
	104	in>>freq[i];
	105	}
	106	}
	107
	108
	109	#include "fromQtoPt.h"
	110	#include "definitions.h"
	111
	112	#include <iostream>
	113	using namespace std;
	114
	115	void pupAll::fillMatricesFromFile(const string & dataFileString) {
	116	VVdouble sMatrix;
	117	readDatMatrixFromFile(dataFileString,sMatrix,_freq);
	118	// readDatMatrixFromString(dataFileString,sMatrix,_freq);
	119	VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
	120
	121	q2pt q2pt1;
	122	q2pt1.fillFromRateMatrix(_freq,qMatrix);
	123	_leftEigen = q2pt1.getLeftEigen();
	124	_rightEigen = q2pt1.getRightEigen();
	125	_eigenVector = q2pt1.getEigenVec();
	126	}
	127	void pupAll::fillMatricesFromFile(const string & dataFileString, const Vdouble & freq) {
	128	#ifdef VERBOS
	129	LOG(5,<<"dataFileString = "<<dataFileString<<endl);
	130	#endif
	131
	132	VVdouble sMatrix;
	133	readDatMatrixFromFile(dataFileString,sMatrix,_freq);
	134	_freq=freq;
	135	VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
	136
	137	q2pt q2pt1;
	138	q2pt1.fillFromRateMatrix(_freq,qMatrix);
	139	_leftEigen = q2pt1.getLeftEigen();
	140	_rightEigen = q2pt1.getRightEigen();
	141	_eigenVector = q2pt1.getEigenVec();
	142	}
	143
	144	void pupAll::fillMatrices(const string & dataFileString,int alphaSize) {
	145	VVdouble sMatrix;
	146	readDatMatrixFromString(dataFileString,sMatrix,_freq,alphaSize);
	147	// readDatMatrixFromString(dataFileString,sMatrix,_freq);
	148	VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
	149
	150	q2pt q2pt1;
	151	q2pt1.fillFromRateMatrix(_freq,qMatrix);
	152	_leftEigen = q2pt1.getLeftEigen();
	153	_rightEigen = q2pt1.getRightEigen();
	154	_eigenVector = q2pt1.getEigenVec();
	155	}
	156	void pupAll::fillMatrices(const string & dataFileString, const Vdouble & freq) {
	157	VVdouble sMatrix;
	158	readDatMatrixFromString(dataFileString,sMatrix,_freq);
	159	_freq=freq;
	160	VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
	161
	162	q2pt q2pt1;
	163	q2pt1.fillFromRateMatrix(_freq,qMatrix);
	164	_leftEigen = q2pt1.getLeftEigen();
	165	_rightEigen = q2pt1.getRightEigen();
	166	_eigenVector = q2pt1.getEigenVec();
	167	}
	168
	169	const MDOUBLE pupAll::Pij_t(const int i, const int j, const MDOUBLE t) const {
	170	if (t<0) {
	171	LOG(5,<<"negative length in routine Pij_t "<<endl);
	172	LOG(5,<<" t = " <<t<<endl);
	173	errorMsg::reportError("negative length in routine Pij_t");
	174	}
	175	// if ((_freq[i] == 0.0) \|\| (_freq[j] == 0.0)) return 0.0;
	176	MDOUBLE sum=0;
	177	int alphaSize = _freq.size();
	178	for (int k=0 ; k<alphaSize ; ++k) {
	179	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]*t) );
	180	}
	181	if (currectFloatingPointProblems(sum)) return sum;
	182	// LOG(1,<<"err Pij_t i="<<i<<" j= "<<j<<" dis= "<<t<<" res= "<<sum<<endl);//sum is not in [0,1]
	183	errorMsg::reportError("error in function pijt... ");return 0;
	184	}
	185
	186	const MDOUBLE pupAll::dPij_dt(const int i,const int j, const MDOUBLE t) const {
	187	// if ((_freq[i] == 0.0) \|\| (_freq[j] == 0.0)) return 0.0;
	188	MDOUBLE sum=0;
	189	int alphaSize = _freq.size();
	190	for (int k=0 ; k<alphaSize ; ++k) {
	191	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]t)_eigenVector[k]);
	192	}
	193	return sum;
	194	}
	195
	196
	197	const MDOUBLE pupAll::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
	198	// if ((_freq[i] == 0.0) \|\| (_freq[j] == 0.0)) return 0.0;
	199	MDOUBLE sum=0;;
	200	int alphaSize = _freq.size();
	201	for (int k=0 ; k<alphaSize ; ++k) {
	202	sum+=( _leftEigen[i][k]_rightEigen[k][j]exp(_eigenVector[k]t)_eigenVector[k]*_eigenVector[k]);
	203	}
	204	return sum;
	205	}
	206	// this gives the likelihood of j given i at distance t and gamma
	207	// parameter alpha. The result presented here is the integral over the
	208	// rates (according to the gamma distribution with parameter alpah). see Yang's (93) paper.
	209	const MDOUBLE pupAll::Pij_tAlpha(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
	210	if (t<0) {
	211	LOG(5,<<"negative length in routine Pij_tAlpha "<<endl);
	212	LOG(5,<<" t = " <<t<<endl);
	213	errorMsg::reportError("negative length in routine Pij_tAlpha");
	214	}
	215	MDOUBLE sum=0;
	216	for (int k=0 ; k<20 ; ++k) {
	217	sum+=( _leftEigen[i][k]_rightEigen[k][j]pow(1-_eigenVector[k]*t/alpha,-alpha));
	218	}
	219	if (currectFloatingPointProblems(sum)) return sum;
	220	errorMsg::reportError("error in function pijtAlpha... ");return 0;
	221	}
	222
	223
	224	const MDOUBLE pupAll::Pij_tAlpha_dt(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
	225	if (t<0) {
	226	LOG(5,<<"negative length in routine Pij_tAlpha_dt "<<endl);
	227	LOG(5,<<" t = " <<t<<endl);
	228	errorMsg::reportError("negative length in routine Pij_tAlpha_dt");
	229	}
	230	MDOUBLE sum=0;
	231	for (int k=0 ; k<20 ; ++k) {
	232	sum+=( _leftEigen[i][k]_rightEigen[k][j] _eigenVector[k]* pow(1-_eigenVector[k]*t/alpha,-alpha-1));
	233	}
	234	return sum;
	235	}
	236	const MDOUBLE pupAll::Pij_tAlpha_dt2(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
	237	if (t<0) {
	238	LOG(5,<<"negative length in routine Pij_tAlpha_dt2 "<<endl);
	239	LOG(5,<<" t = " <<t<<endl);
	240	errorMsg::reportError("negative length in routine Pij_tAlpha_dt2");
	241	}
	242	MDOUBLE sum=0;
	243	for (int k=0 ; k<20 ; ++k) {
	244	sum+=( _leftEigen[i][k]_rightEigen[k][j] (1+1/alpha) _eigenVector[k]_eigenVector[k]* pow(1-_eigenVector[k]*t/alpha,-alpha-2));
	245	}
	246	return sum;
	247	}
	248
	249	bool pupAll::currectFloatingPointProblems(MDOUBLE& sum) const {
	250	if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0;
	251	if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1;
	252	if ((sum>1) \|\| (sum<0)) return false;
	253	return true;
	254	}
	255
	256	VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq){
	257	VVdouble q(s.size());
	258	for (int z=0; z < q.size(); ++z) q[z].resize(s.size(),0.0);
	259	int i,j;
	260	MDOUBLE sum;
	261	for ( i=0; i < s.size(); ++i) {
	262	sum =0;
	263	for (j=0; j < s.size(); ++j) {
	264	if (i!=j) q[i][j] = s[i][j]* freq[j];
	265	sum += q[i][j];
	266	}
	267	q[i][i] = -sum;
	268	}
	269
	270	// normalizing q:
	271	normalizeQ(q,freq);
	272
	273
	274	// check:
	275	//sum =0;
	276	//for (i=0; i < s.size(); ++i){
	277	// sum += q[i][i]*freq[i];
	278	//}
	279	//LOG(5,<<" SUM OF DIAGOPNAL Q IS (should be -1) "<<sum<<endl);
	280	return q;
	281
	282	}
	283

+68

-0

libs/phylogeny/readDatMatrix.h less more

	0	// $Id: readDatMatrix.h 5805 2009-01-20 09:19:26Z adido $
	1
	2	#ifndef ___READ_DAT_MATRIX
	3	#define ___READ_DAT_MATRIX
	4
	5	#include "definitions.h"
	6	#include <string>
	7	#include <iostream>
	8	#include <fstream>
	9	#include "datMatrixHolder.h"
	10
	11	using namespace std;
	12
	13	void normalizeQ(VVdouble& q, const Vdouble& freq);
	14
	15	void readDatMatrixFromFile(const string & matrixFileName,
	16	VVdouble & subMatrix,
	17	Vdouble & freq);
	18	void readDatMatrixFromString(const string & matrixFileString,
	19	VVdouble & subMatrix,
	20	Vdouble & freq, int alphaSize = 20);
	21
	22	VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq);
	23
	24	#include "replacementModel.h"
	25	#include "definitions.h"
	26	#include "errorMsg.h"
	27
	28	class pupAll : public replacementModel {
	29	public:
	30	// get matrix from file:
	31	explicit pupAll(const string& matrixFileString) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString);}
	32	explicit pupAll(const string& matrixFileString, const vector<MDOUBLE>& freq) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString,freq);}
	33
	34	// get matrix from within the .exe
	35	explicit pupAll(const datMatrixString& matrixFileString,int alphaSize = 20) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,alphaSize); }
	36	explicit pupAll(const datMatrixString& matrixFileString, const vector<MDOUBLE>& freq) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,freq);}
	37
	38
	39	const int alphabetSize() const {return _freq.size();}//20 or 61
	40	const MDOUBLE err_allow_for_pijt_function; //1e-4
	41	virtual replacementModel* clone() const { return new pupAll(*this); }
	42
	43	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE t) const;
	44	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE t) const;
	45	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE t) const;
	46	const MDOUBLE freq(const int i) const {return _freq[i];}
	47
	48	const MDOUBLE Pij_tAlpha (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
	49	const MDOUBLE Pij_tAlpha_dt (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
	50	const MDOUBLE Pij_tAlpha_dt2(const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
	51
	52	private:
	53	void fillMatrices(const string & matrixName,const vector<MDOUBLE>& freq);
	54	void fillMatrices(const string & matrixName,int alphaSize);
	55	void fillMatricesFromFile(const string & dataFileString,const vector<MDOUBLE>& freq);
	56	void fillMatricesFromFile(const string & dataFileString);
	57
	58
	59	bool currectFloatingPointProblems(MDOUBLE& sum) const;
	60
	61	VVdouble _leftEigen;
	62	VVdouble _rightEigen;
	63	Vdouble _eigenVector;
	64	Vdouble _freq;
	65	};
	66
	67	#endif

+178

-0

libs/phylogeny/readTree.cpp less more

	0	// $Id: readTree.cpp 5525 2008-12-19 20:17:05Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "errorMsg.h"
	4	#include "someUtil.h"
	5	#include "readTree.h"
	6	#include <iostream>
	7	using namespace std;
	8
	9
	10
	11
	12
	13	// forward declarations
	14
	15	//----------------------------------------------------------------------------------------------
	16	// about reading tree topology from files:
	17	// usually a tree topology is represented by a line like this
	18	// (((Langur:0.8,Baboon:0.55):0.3,Human:0.44):0.5,Rat:0.02,(Cow:0.2,Horse:0.04):0.03);
	19	// the syntax of such a line is (part, part, part, part)
	20	// where part is either (part,part, part, ...):distace or name:distance
	21	// or without the distance!
	22	// it should notice that the tree is unrooted.
	23	// if we look at the above file format, one can notice that the number of comas (",") is
	24	// always one less than the number of leaves (synonyms for leaves are OTUs and external nodes)
	25	// the function GetNumberOfLeaves counts the numnber of comas and returns the number of leaves.
	26	// in the example below there are 6 leaves.
	27
	28	//*******************************************************************************
	29	// constructors
	30	//*******************************************************************************
	31
	32
	33
	34
	35
	36	vector<char> PutTreeFileIntoVector(istream &in) {
	37	vector<char> tree_contents;
	38	bool endWithDotComa = false;
	39	char chTemp;
	40	while (( !in.eof()) && (tree_contents.size() < MAX_FILE_SIZE))
	41	{
	42	in.get(chTemp);
	43	#ifdef WIN32
	44	if (chTemp == -52) return tree_contents; //tal addition.
	45	#endif
	46	if ( !isspace( chTemp ) )
	47	tree_contents.push_back(chTemp);
	48	if (chTemp == ';') {
	49	endWithDotComa = true;
	50	break;
	51	}
	52	}
	53
	54	if (tree_contents.size() >= MAX_FILE_SIZE) {
	55	vector<string> err;
	56	err.push_back("Error reading tree file. The tree file is too large");
	57	errorMsg::reportError(err,1); // also quit the program
	58	}
	59	if (endWithDotComa == false) tree_contents.clear(); // remove junk from the last ; till the end of the file.
	60	return tree_contents;
	61	}
	62
	63
	64
	65
	66	int GetNumberOfLeaves(const vector<char> &tree_contents) {
	67	int iCommasCounter = 0;
	68	vector<char>::const_iterator itCurrent = tree_contents.begin();
	69	for ( ; itCurrent != tree_contents.end(); ++itCurrent ) {
	70	if (*itCurrent==COMMA)
	71	++iCommasCounter;
	72	}
	73	return ++iCommasCounter; //#leaves is always one more than number of comas
	74	}
	75
	76	int GetNumberOfInternalNodes(const vector<char> &tree_contents) {
	77	int iCloseCounter = 0;
	78	vector<char>::const_iterator itCurrent = tree_contents.begin();
	79	for ( ; itCurrent != tree_contents.end(); ++itCurrent ) {
	80	if (*itCurrent==CLOSING_BRACE) ++iCloseCounter;
	81	if (*itCurrent==CLOSING_BRACE2) ++iCloseCounter;
	82	}
	83	return iCloseCounter; //number of HTUs is always the number of ")"
	84	}
	85
	86
	87	bool verifyChar(vector<char>::const_iterator &p_itCurrent, const char p_cCharToFind) {
	88	if ( (*p_itCurrent)==p_cCharToFind ) return true;
	89	return false;
	90	}
	91
	92
	93
	94
	95	// IsAtomicPart decides whether we will now read a taxa name (return true),
	96	// or read an OPENING_BRACE which will say us, that we will read a complicated strucure.
	97	bool IsAtomicPart(const vector<char>::const_iterator p_itCurrent) {
	98	if ( (*p_itCurrent)==OPENING_BRACE ) return false;
	99	else if ( (*p_itCurrent)==OPENING_BRACE2 ) return false;
	100	return true;
	101	}
	102
	103	//-----------------------------------------------------------------------------
	104	// there are 2 options for the tree format.
	105	// either (name1:0.43, name2: 0.45 , (name3 : 2 , name 4: 5) : 3.332)
	106	// or without the distances (name1, name2 , (name3 , name4) )
	107	// here we return true if the tree file is with the distance, or false, if the tree file
	108	// has not distances.
	109	// if distances exist: after the name there will always be a colon
	110	// if distance exist, also move the iterator, to the beggining of the number
	111	//-----------------------------------------------------------------------------
	112	bool DistanceExists(vector<char>::const_iterator& p_itCurrent) {
	113
	114	if ((*p_itCurrent)==COLON ) {
	115	++p_itCurrent;
	116	return true;
	117	}
	118	return false;
	119	}
	120
	121	void clearPosibleComment(vector<char>::const_iterator& p_itCurrent) {
	122	if ((*p_itCurrent)=='[' ) {
	123	while (*(++p_itCurrent) != ']');
	124	++p_itCurrent; // move over "]"
	125	}
	126	}
	127
	128	string readPosibleComment(vector<char>::const_iterator& p_itCurrent) {
	129	string comment = "";
	130
	131	if ((*p_itCurrent)=='[' )
	132	{
	133	vector<char>::const_iterator tmp= (p_itCurrent+1);
	134	if ((*tmp++)=='&' &&
	135	(*tmp++)=='&' &&
	136	(*tmp++)=='N' &&
	137	(*tmp++)=='H' &&
	138	(*tmp++)=='X') // see http://www.genetics.wustl.edu/eddy/forester/NHX.pdf
	139	// [&&NHX...]
	140	{
	141	p_itCurrent += 5;
	142	while (*(++p_itCurrent) != ']')
	143	{
	144	comment += *(p_itCurrent);
	145	}
	146	++p_itCurrent; // move over "]"
	147	}
	148	else // [...]
	149	{
	150	// Skip over the text in []
	151	++p_itCurrent;
	152	while (*(p_itCurrent) != ']')
	153	++p_itCurrent;
	154	++p_itCurrent; // move over "]"
	155
	156	}
	157	}
	158	if (comment.size())
	159	LOG(10,<<"comment ="<<comment<<endl);
	160
	161	return comment;
	162	}
	163
	164
	165
	166	MDOUBLE getDistance(vector<char>::const_iterator &p_itCurrent) {
	167	string sTempNumber;
	168	for ( ; isdigit(p_itCurrent) \|\| (p_itCurrent)==PERIOD \|\| (p_itCurrent)=='E'\|\| (p_itCurrent)=='e'\|\| (p_itCurrent)=='-' \|\| (p_itCurrent)=='+'; ++p_itCurrent)
	169	sTempNumber += (*p_itCurrent);
	170	MDOUBLE dDistance = string2double(sTempNumber);
	171	return dDistance;
	172	}
	173
	174
	175
	176
	177

+40

-0

libs/phylogeny/readTree.h less more

	0	// $Id: readTree.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___READ_TREE
	3	#define ___READ_TREE
	4	#include "definitions.h"
	5	#include <iostream>
	6	using namespace std;
	7
	8	#define REMARK ';'
	9	#define MAX_LENGTH_OF_NAME 20
	10	#define MAX_FILE_SIZE 1000000
	11	#define FATHER 0
	12	#define LEFT 1
	13	#define RIGHT 2
	14
	15	#define OPENING_BRACE '('
	16	#define CLOSING_BRACE ')'
	17	#define OPENING_BRACE2 '{'
	18	#define CLOSING_BRACE2 '}'
	19	#define COMMA ','
	20	#define COLON ':'
	21	#define SEMI_COLLON ';'
	22	#define PERIOD '.'
	23
	24
	25
	26	bool DistanceExists(vector<char>::const_iterator& p_itCurrent);
	27	bool verifyChar(vector<char>::const_iterator &p_itCurrent, const char p_cCharToFind);
	28	int GetNumberOfLeaves(const vector<char>& tree_contents);
	29	int GetNumberOfInternalNodes(const vector<char>& tree_contents);
	30	bool IsAtomicPart(const vector<char>::const_iterator p_itCurrent);
	31	vector<char> PutTreeFileIntoVector(istream &in);
	32
	33	MDOUBLE getDistance(vector<char>::const_iterator &p_itCurrent);
	34	bool DistanceExists(vector<char>::const_iterator& p_itCurrent);
	35
	36	void clearPosibleComment(vector<char>::const_iterator& p_itCurrent);
	37	string readPosibleComment(vector<char>::const_iterator& p_itCurrent);
	38	#endif
	39

+87

-0

libs/phylogeny/recognizeFormat.cpp less more

	0	// $Id: recognizeFormat.cpp 6780 2009-09-15 00:55:05Z itaymay $
	1
	2	#include "recognizeFormat.h"
	3	#include "maseFormat.h"
	4	#include "sequenceContainer.h"
	5	#include "molphyFormat.h"
	6	#include "phylipFormat.h"
	7	#include "nexusFormat.h"
	8	#include "fastaFormat.h"
	9	#include "clustalFormat.h"
	10	#include "nexusFormat.h"
	11	#include "phylipSequentialFormat.h"
	12
	13
	14	sequenceContainer recognizeFormat::read(istream &infile, const alphabet* alph) {
	15	sequenceContainer mySeqData = readUnAligned(infile, alph);
	16	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	17	return mySeqData;
	18	}
	19
	20	sequenceContainer recognizeFormat::readUnAligned(istream &infile, const alphabet* alph) {
	21	// recognize a format and returns the sequence container of it.
	22	sequenceContainer sc;
	23	if (!infile){
	24	string tmp = "error unable to open sequence input file ";
	25	errorMsg::reportError(tmp);
	26	}
	27
	28	// this part eats spaces, tabs and such.
	29	char check = infile.peek();
	30	while ((check==' ') \|\| (check == '\n') \|\| (check == '\t')) {
	31	infile.get();
	32	check = infile.peek();
	33	}
	34
	35	switch (check){
	36	case '#':
	37	sc=nexusFormat::readUnAligned(infile,alph);
	38	break;
	39	case '>':
	40	sc=fastaFormat::readUnAligned(infile,alph);
	41	break;
	42	case 'C':
	43	sc=clustalFormat::readUnAligned(infile,alph);
	44	break;
	45	case ';':
	46	sc=maseFormat::readUnAligned(infile,alph);
	47	break;
	48
	49	default:
	50	if (isdigit(check)){
	51	// here it can be either MOLPHY format or one of the PHYLIP type formats (interleaved, sequential)
	52	// in PHYLIP format there are lines that are not empty, but the first 10 characters
	53	// are space.
	54	string s;
	55	getline(infile,s, '\n' ); // read the first line which are numbers in both formats
	56	getline(infile,s, '\n' ); // read the second line
	57	bool phylipFormat = false;
	58	int r = s.find_first_of(' '); // if there is a space somewhere - this is phylip format
	59	if ((r==(s.size()-1)) \|\| (r==-1)) phylipFormat = false;
	60	else phylipFormat = true;
	61
	62
	63	if (phylipFormat == false) {
	64	infile.seekg(0, ios::beg); // file return to the beginning
	65	sc=molphyFormat::readUnAligned(infile,alph);
	66	} else {
	67	getline(infile,s, '\n' ); // read the third line: interleaved will begin with a space, sequential not
	68	infile.seekg(0, ios::beg); // file return to the beginning
	69	if (s[0] == ' ')
	70	sc = phylipSequentialFormat::readUnAligned(infile, alph);
	71	else
	72	sc = phylipFormat::readUnAligned(infile,alph);
	73	}
	74	}
	75	else{
	76	string line;
	77	getline(infile, line, '\n');
	78	string tmp2 = "The program can't recognise your format!";
	79	tmp2+="\nThis is the first line in your format:\n";
	80	tmp2+=line;
	81	errorMsg::reportError(tmp2);
	82	}
	83	break;
	84	}
	85	return sc;
	86	}

+19

-0

libs/phylogeny/recognizeFormat.h less more

	0	// $Id: recognizeFormat.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___RECOGNIZE_FORMAT
	3	#define ___RECOGNIZE_FORMAT
	4
	5	#include "sequenceContainer.h"
	6
	7	class recognizeFormat{
	8	public:
	9	static sequenceContainer read(istream &infile, const alphabet* alph);
	10	static void write(ostream &out, const sequenceContainer& sd);
	11	//readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
	12	static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
	13	};
	14
	15	#endif
	16
	17
	18

+46

-0

libs/phylogeny/replacementMatrixSource/HIVBetween.dat less more

	0	ACDEFGHIKLMNPQRSTVWY
	1
	2	{{ 0, 0.065662251, 0.77200021, 0.7859595, 0.0074953058, 1.1329574, 0.044971782, 0.0026528488, 0.0026528488, 0.11420832, 0.009902713, 0.0026528488, 1.1259592, 0.029241185, 0.16315391, 1.3085601, 8.4457685, 4.0399067, 0.0026528488, 0.0026528488}
	3	{ 0.065662251, 0, 0.0026528488, 0.0026528488, 4.9333171, 0.47638319, 0.12737547, 0.0026528488, 0.0026528488, 0.068855751, 0.0026528488, 0.045663061, 0.0026528488, 0.0026528488, 0.18661252, 2.4900381, 0.39260517, 0.22285362, 1.3968681, 4.0213579}
	4	{ 0.77200021, 0.0026528488, 0, 5.6172481, 0.0026528488, 1.5057888, 1.0170492, 0.0093800488, 0.0026528488, 0.0046480457, 0.0026528488, 9.3704985, 0.018180397, 0.0026528488, 0.0026528488, 0.28026286, 0.14576024, 0.55599996, 0.0026528488, 0.35795048}
	5	{ 0.7859595, 0.0026528488, 5.6172481, 0, 0.0026528488, 2.0839453, 0.063530422, 0.0032315889, 2.4484839, 0.0026528488, 0.093268326, 0.042054709, 0.0063788279, 1.3583647, 0.039751241, 0.0026528488, 0.15374532, 0.54567507, 0.0026528488, 0.042054709}
	6	{ 0.0074953058, 4.9333171, 0.0026528488, 0.0026528488, 0, 0.15469345, 0.077228672, 1.803067, 0.018180397, 4.5230222, 0.099760378, 0.0026528488, 0.0026528488, 0.0026528488, 0.0026528488, 0.50747511, 0.0074953058, 0.38374731, 0.44002431, 8.13894}
	7	{ 1.1329574, 0.47638319, 1.5057888, 2.0839453, 0.15469345, 0, 0.0026528488, 0.0026528488, 0.27680089, 0.0026528488, 0.0026528488, 0.17158679, 0.0026528488, 0.032849536, 1.9384101, 2.324113, 0.19610654, 0.50571521, 0.64556544, 0.0026528488}
	8	{ 0.044971782, 0.12737547, 1.0170492, 0.063530422, 0.077228672, 0.0026528488, 0, 0.054707578, 0.0026528488, 0.92409864, 0.0026528488, 4.0566567, 1.3015831, 3.7434084, 4.796584, 0.20307398, 0.37755025, 0.0026528488, 0.036884095, 9.9186301}
	9	{ 0.0026528488, 0.0026528488, 0.0093800488, 0.0032315889, 1.803067, 0.0026528488, 0.054707578, 0, 0.17101271, 3.1615537, 5.9458299, 0.3610872, 0.021784823, 0.0026528488, 0.35934906, 0.64624988, 4.5693569, 9.4117238, 0.0026528488, 0.078613459}
	10	{ 0.0026528488, 0.0026528488, 0.0026528488, 2.4484839, 0.018180397, 0.27680089, 0.0026528488, 0.17101271, 0, 0.04324117, 0.68043448, 4.1938515, 0.016652568, 3.4738365, 10.850151, 0.26746605, 2.4785142, 0.14104083, 0.0026528488, 0.0026528488}
	11	{ 0.11420832, 0.068855751, 0.0046480457, 0.0026528488, 4.5230222, 0.0026528488, 0.92409864, 3.1615537, 0.04324117, 0, 2.8224242, 0.0026528488, 1.1022958, 0.79296833, 0.37215595, 0.49218621, 0.023221606, 0.74829436, 0.39731344, 0.059416384}
	12	{ 0.009902713, 0.0026528488, 0.0026528488, 0.093268326, 0.099760378, 0.0026528488, 0.0026528488, 5.9458299, 0.68043448, 2.8224242, 0, 0.0026528488, 0.0026528488, 0.1611213, 1.3338205, 0.0026528488, 2.6211525, 3.6361006, 0.047262092, 0.0026528488}
	13	{ 0.0026528488, 0.045663061, 9.3704985, 0.042054709, 0.0026528488, 0.17158679, 4.0566567, 0.3610872, 4.1938515, 0.0026528488, 0.0026528488, 0, 0.0039239772, 0.35657046, 0.15680618, 6.9741802, 3.6538588, 0.014142867, 0.0026528488, 0.93601524}
	14	{ 1.1259592, 0.0026528488, 0.018180397, 0.0063788279, 0.0026528488, 0.0026528488, 1.3015831, 0.021784823, 0.016652568, 1.1022958, 0.0026528488, 0.0039239772, 0, 2.3727663, 0.68101281, 2.8532025, 1.0686577, 0.0026528488, 0.023584144, 0.016149535}
	15	{ 0.029241185, 0.0026528488, 0.0026528488, 1.3583647, 0.0026528488, 0.032849536, 3.7434084, 0.0026528488, 3.4738365, 0.79296833, 0.1611213, 0.35657046, 2.3727663, 0, 1.8153444, 0.061711098, 0.12924096, 0.011097026, 0.014142867, 0.059971891}
	16	{ 0.16315391, 0.18661252, 0.0026528488, 0.039751241, 0.0026528488, 1.9384101, 4.796584, 0.35934906, 10.850151, 0.37215595, 1.3338205, 0.15680618, 0.68101281, 1.8153444, 0, 1.8459052, 1.5220348, 0.043106352, 0.52597396, 0.0052623288}
	17	{ 1.3085601, 2.4900381, 0.28026286, 0.0026528488, 0.50747511, 2.324113, 0.20307398, 0.64624988, 0.26746605, 0.49218621, 0.0026528488, 6.9741802, 2.8532025, 0.061711098, 1.8459052, 0, 4.7385556, 0.039751241, 0.013196755, 0.34382193}
	18	{ 8.4457685, 0.39260517, 0.14576024, 0.15374532, 0.0074953058, 0.19610654, 0.37755025, 4.5693569, 2.4785142, 0.023221606, 2.6211525, 3.6538588, 1.0686577, 0.12924096, 1.5220348, 4.7385556, 0, 0.37629386, 0.0026528488, 0.056055755}
	19	{ 4.0399067, 0.22285362, 0.55599996, 0.54567507, 0.38374731, 0.50571521, 0.0026528488, 9.4117238, 0.14104083, 0.74829436, 3.6361006, 0.014142867, 0.0026528488, 0.011097026, 0.043106352, 0.039751241, 0.37629386, 0, 0.0026528488, 0.021784823}
	20	{ 0.0026528488, 1.3968681, 0.0026528488, 0.0026528488, 0.44002431, 0.64556544, 0.036884095, 0.0026528488, 0.0026528488, 0.39731344, 0.047262092, 0.0026528488, 0.023584144, 0.014142867, 0.52597396, 0.013196755, 0.0026528488, 0.0026528488, 0, 0.67924601}
	21	{ 0.0026528488, 4.0213579, 0.35795048, 0.042054709, 8.13894, 0.0026528488, 9.9186301, 0.078613459, 0.0026528488, 0.059416384, 0.0026528488, 0.93601524, 0.016149535, 0.059971891, 0.0052623288, 0.34382193, 0.056055755, 0.021784823, 0.67924601, 0}
	22	}
	23
	24
	25	{{ 0.060490222}
	26	{ 0.020075899}
	27	{ 0.042109048}
	28	{ 0.071567447}
	29	{ 0.028809447}
	30	{ 0.072308239}
	31	{ 0.022293943}
	32	{ 0.069730629}
	33	{ 0.056968211}
	34	{ 0.098851122}
	35	{ 0.019768318}
	36	{ 0.044127815}
	37	{ 0.046025282}
	38	{ 0.053606488}
	39	{ 0.066039665}
	40	{ 0.05060433}
	41	{ 0.053636813}
	42	{ 0.061625237}
	43	{ 0.033011601}
	44	{ 0.028350243}
	45	}

+46

-0

libs/phylogeny/replacementMatrixSource/HIVWithin.dat less more

	0	ACDEFGHIKLMNPQRSTVWY
	1
	2	{{ 0, 0.049094712, 1.2987859, 1.6291158, 0.17509295, 0.54716271, 0.0014641764, 0.0014641764, 0.17358807, 0.046923924, 0.0014641764, 0.18082842, 0.29570799, 0.0014641764, 0.021810606, 2.5166849, 7.0696878, 7.2650675, 0.0014641764, 0.0014641764}
	3	{ 0.049094712, 0, 0.0014641764, 0.0014641764, 0.1062872, 0.014343013, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.017714543, 0.0014641764, 0.0014641764, 0.83857481, 0.32854654, 0.0014641764, 0.0014641764, 1.6102836, 2.4446914}
	4	{ 1.2987859, 0.0014641764, 0, 3.5501299, 0.0014641764, 3.0445791, 0.67873067, 0.042497426, 0.26188639, 0.0014641764, 0.0014641764, 8.6119047, 0.0014641764, 0.0014641764, 0.019752881, 0.12529865, 0.18460189, 0.85445233, 0.0014641764, 0.66811539}
	5	{ 1.6291158, 0.0014641764, 3.5501299, 0, 0.0014641764, 4.3281346, 0.0014641764, 0.011435569, 7.0170946, 0.038021439, 0.0014641764, 0.059013922, 0.0014641764, 0.93899388, 0.0073686726, 0.0014641764, 0.13433613, 0.64409704, 0.0014641764, 0.0014641764}
	6	{ 0.17509295, 0.1062872, 0.0014641764, 0.0014641764, 0, 0.0014641764, 0.0014641764, 0.43423957, 0.0014641764, 2.1926949, 0.0014641764, 0.0014641764, 0.010022346, 0.0014641764, 0.0014641764, 1.2531563, 0.033533153, 0.66766443, 0.0014641764, 1.2086132}
	7	{ 0.54716271, 0.014343013, 3.0445791, 4.3281346, 0.0014641764, 0, 0.0014641764, 0.0014641764, 0.081825497, 0.014343013, 0.014343013, 0.017714543, 0.0014641764, 0.017714543, 3.9350911, 1.838906, 0.014343013, 0.81883185, 0.82749392, 0.0014641764}
	8	{ 0.0014641764, 0.0014641764, 0.67873067, 0.0014641764, 0.0014641764, 0.0014641764, 0, 0.0014641764, 0.065612672, 0.51650871, 0.0014641764, 2.5180202, 4.0834122, 5.4310694, 2.0041793, 0.21235155, 0.28099302, 0.24231504, 0.0014641764, 13.906425}
	9	{ 0.0014641764, 0.0014641764, 0.042497426, 0.011435569, 0.43423957, 0.0014641764, 0.0014641764, 0, 0.23938727, 2.6655214, 5.0679244, 0.28903662, 0.0014641764, 0.010022346, 0.39260132, 0.21672475, 2.7419485, 7.2690793, 0.0014641764, 0.033533153}
	10	{ 0.17358807, 0.0014641764, 0.26188639, 7.0170946, 0.0014641764, 0.081825497, 0.065612672, 0.23938727, 0, 0.0014641764, 1.1993479, 3.1232346, 0.032776467, 3.8275035, 11.681111, 0.0014641764, 1.185403, 0.037501949, 0.0014641764, 0.0014641764}
	11	{ 0.046923924, 0.0014641764, 0.0014641764, 0.038021439, 2.1926949, 0.014343013, 0.51650871, 2.6655214, 0.0014641764, 0, 3.3336075, 0.0014641764, 2.8788489, 0.8464345, 0.17182315, 1.7991682, 0.0014641764, 0.86487141, 0.40127511, 0.0014641764}
	12	{ 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.014343013, 0.0014641764, 5.0679244, 1.1993479, 3.3336075, 0, 0.059013922, 0.0014641764, 0.0014641764, 0.96240899, 0.11495981, 2.170826, 4.3246792, 0.0014641764, 0.16960961}
	13	{ 0.18082842, 0.017714543, 8.6119047, 0.059013922, 0.0014641764, 0.017714543, 2.5180202, 0.28903662, 3.1232346, 0.0014641764, 0.059013922, 0, 0.10098366, 0.10016958, 0.046923924, 4.2665807, 1.3300754, 0.021810606, 0.0014641764, 1.4831375}
	14	{ 0.29570799, 0.0014641764, 0.0014641764, 0.0014641764, 0.010022346, 0.0014641764, 4.0834122, 0.0014641764, 0.032776467, 2.8788489, 0.0014641764, 0.10098366, 0, 0.89168927, 0.11851717, 4.1726098, 1.2700295, 0.0014641764, 0.0014641764, 0.0014641764}
	15	{ 0.0014641764, 0.0014641764, 0.0014641764, 0.93899388, 0.0014641764, 0.017714543, 5.4310694, 0.010022346, 3.8275035, 0.8464345, 0.0014641764, 0.10016958, 0.89168927, 0, 3.1258994, 0.046923924, 0.059472209, 0.0014641764, 0.012981329, 0.0014641764}
	16	{ 0.021810606, 0.83857481, 0.019752881, 0.0073686726, 0.0014641764, 3.9350911, 2.0041793, 0.39260132, 11.681111, 0.17182315, 0.96240899, 0.046923924, 0.11851717, 3.1258994, 0, 2.4452448, 0.27181058, 0.081825497, 1.7469498, 0.0014641764}
	17	{ 2.5166849, 0.32854654, 0.12529865, 0.0014641764, 1.2531563, 1.838906, 0.21235155, 0.21672475, 0.0014641764, 1.7991682, 0.11495981, 4.2665807, 4.1726098, 0.046923924, 2.4452448, 0, 1.856807, 0.25261054, 0.32257563, 0.27325689}
	18	{ 7.0696878, 0.0014641764, 0.18460189, 0.13433613, 0.033533153, 0.014343013, 0.28099302, 2.7419485, 1.185403, 0.0014641764, 2.170826, 1.3300754, 1.2700295, 0.059472209, 0.27181058, 1.856807, 0, 0.0014641764, 0.0014641764, 0.14366733}
	19	{ 7.2650675, 0.0014641764, 0.85445233, 0.64409704, 0.66766443, 0.81883185, 0.24231504, 7.2690793, 0.037501949, 0.86487141, 4.3246792, 0.021810606, 0.0014641764, 0.0014641764, 0.081825497, 0.25261054, 0.0014641764, 0, 0.0014641764, 0.39673909}
	20	{ 0.0014641764, 1.6102836, 0.0014641764, 0.0014641764, 0.0014641764, 0.82749392, 0.0014641764, 0.0014641764, 0.0014641764, 0.40127511, 0.0014641764, 0.0014641764, 0.0014641764, 0.012981329, 1.7469498, 0.32257563, 0.0014641764, 0.0014641764, 0, 0.0014641764}
	21	{ 0.0014641764, 2.4446914, 0.66811539, 0.0014641764, 1.2086132, 0.0014641764, 13.906425, 0.033533153, 0.0014641764, 0.0014641764, 0.16960961, 1.4831375, 0.0014641764, 0.0014641764, 0.0014641764, 0.27325689, 0.14366733, 0.39673909, 0.0014641764, 0}
	22	}
	23
	24
	25	{{ 0.0377494}
	26	{ 0.0240105}
	27	{ 0.0342034}
	28	{ 0.0618606}
	29	{ 0.0422741}
	30	{ 0.0838496}
	31	{ 0.0156076}
	32	{ 0.0983641}
	33	{ 0.0641682}
	34	{ 0.0577867}
	35	{ 0.0158419}
	36	{ 0.0891129}
	37	{ 0.0458601}
	38	{ 0.0437824}
	39	{ 0.057321}
	40	{ 0.0550846}
	41	{ 0.0813774}
	42	{ 0.0515639}
	43	{ 0.019597}
	44	{ 0.0205847}
	45	}

+24

-0

libs/phylogeny/replacementMatrixSource/cpREV45.dat less more

	0	105
	1	227 357
	2	175 43 4435
	3	669 823 538 10
	4	157 1745 768 400 10
	5	499 152 1055 3691 10 3122
	6	665 243 653 431 303 133 379
	7	66 715 1405 331 441 1269 162 19
	8	145 136 168 10 280 92 148 40 29
	9	197 203 113 10 396 286 82 20 66 1745
	10	236 4482 2430 412 48 3313 2629 263 305 345 218
	11	185 125 61 47 159 202 113 21 10 1772 1351 193
	12	68 53 97 22 726 10 145 25 127 454 1268 72 327
	13	490 87 173 170 285 323 185 28 152 117 219 302 100 43
	14	2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202
	15	1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151
	16	14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29
	17	56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346
	18	968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119
	19
	20	0.076 0.062 0.041 0.037 0.009 0.038 0.049 0.084 0.025 0.081
	21	0.101 0.050 0.022 0.051 0.043 0.062 0.054 0.018 0.031 0.066
	22
	23	cpREV45 model

+93

-0

libs/phylogeny/replacementMatrixSource/dayhoff.dat less more

	0	27
	1	98 32
	2	120 0 905
	3	36 23 0 0
	4	89 246 103 134 0
	5	198 1 148 1153 0 716
	6	240 9 139 125 11 28 81
	7	23 240 535 86 28 606 43 10
	8	65 64 77 24 44 18 61 0 7
	9	41 15 34 0 0 73 11 7 44 257
	10	26 464 318 71 0 153 83 27 26 46 18
	11	72 90 1 0 0 114 30 17 0 336 527 243
	12	18 14 14 0 0 0 0 15 48 196 157 0 92
	13	250 103 42 13 19 153 51 34 94 12 32 33 17 11
	14	409 154 495 95 161 56 79 234 35 24 17 96 62 46 245
	15	371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550
	16	0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0
	17	24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61
	18	208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28
	19
	20	0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530
	21	0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772
	22	0.050680 0.069577 0.058542 0.010494 0.029916 0.064718
	23
	24	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	25
	26	S_ij = S_ji and PI_i for the Dayhoff model, with the rate Q_ij=S_ij*PI_j
	27	The rest of the file is not used.
	28	Prepared by Z. Yang, March 1995.
	29
	30
	31	See the following reference for notation used here:
	32
	33	Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
	34	applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.
	35
	36
	37	-----------------------------------------------------------------------
	38
	39
	40	30
	41	109 17
	42	154 0 532
	43	33 10 0 0
	44	93 120 50 76 0
	45	266 0 94 831 0 422
	46	579 10 156 162 10 30 112
	47	21 103 226 43 10 243 23 10
	48	66 30 36 13 17 8 35 0 3
	49	95 17 37 0 0 75 15 17 40 253
	50	57 477 322 85 0 147 104 60 23 43 39
	51	29 17 0 0 0 20 7 7 0 57 207 90
	52	20 7 7 0 0 0 0 17 20 90 167 0 17
	53	345 67 27 10 10 93 40 49 50 7 43 43 4 7
	54	772 137 432 98 117 47 86 450 26 20 32 168 20 40 269
	55	590 20 169 57 10 37 31 50 14 129 52 200 28 10 73 696
	56	0 27 3 0 0 0 0 0 3 0 13 0 0 10 0 17 0
	57	20 3 36 0 30 0 10 0 40 13 23 10 0 260 0 22 23 6
	58	365 20 13 17 33 27 37 97 30 661 303 17 77 10 50 43 186 0 17
	59	A R N D C Q E G H I L K M F P S T W Y V
	60	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	61
	62	Accepted point mutations (x10) Figure 80 (Dayhoff 1978)
	63	-------------------------------------------------------
	64
	65	A 100 /* Ala / A 0.087 / Ala */
	66	R 65 /* Arg / R 0.041 / Arg */
	67	N 134 /* Asn / N 0.040 / Asn */
	68	D 106 /* Asp / D 0.047 / Asp */
	69	C 20 /* Cys / C 0.033 / Cys */
	70	Q 93 /* Gln / Q 0.038 / Gln */
	71	E 102 /* Glu / E 0.050 / Glu */
	72	G 49 /* Gly / G 0.089 / Gly */
	73	H 66 /* His / H 0.034 / His */
	74	I 96 /* Ile / I 0.037 / Ile */
	75	L 40 /* Leu / L 0.085 / Leu */
	76	K 56 /* Lys / K 0.081 / Lys */
	77	M 94 /* Met / M 0.015 / Met */
	78	F 41 /* Phe / F 0.040 / Phe */
	79	P 56 /* Pro / P 0.051 / Pro */
	80	S 120 /* Ser / S 0.070 / Ser */
	81	T 97 /* Thr / T 0.058 / Thr */
	82	W 18 /* Trp / W 0.010 / Trp */
	83	Y 41 /* Tyr / Y 0.030 / Tyr */
	84	V 74 /* Val / V 0.065 / Val */
	85
	86	scale factor = SUM_OF_PRODUCT = 75.246
	87
	88
	89	Relative Mutability The equilibrium freqs.
	90	(Table 21) Table 22
	91	(Dayhoff 1978) Dayhoff (1978)
	92	----------------------------------------------------------------

+150

-0

libs/phylogeny/replacementMatrixSource/jones.dat less more

	0	58
	1	54 45
	2	81 16 528
	3	56 113 34 10
	4	57 310 86 49 9
	5	105 29 58 767 5 323
	6	179 137 81 130 59 26 119
	7	27 328 391 112 69 597 26 23
	8	36 22 47 11 17 9 12 6 16
	9	30 38 12 7 23 72 9 6 56 229
	10	35 646 263 26 7 292 181 27 45 21 14
	11	54 44 30 15 31 43 18 14 33 479 388 65
	12	15 5 10 4 78 4 5 5 40 89 248 4 43
	13	194 74 15 15 14 164 18 24 115 10 102 21 16 17
	14	378 101 503 59 223 53 30 201 73 40 59 47 29 92 285
	15	475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477
	16	9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12
	17	11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71
	18	298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16
	19
	20	0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830
	21	0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126
	22	0.050901 0.068765 0.058565 0.014261 0.032102 0.066005
	23
	24	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	25
	26	S_ij = S_ji and PI_i for the Jones model based on the SWISSPROT
	27	Version 22 data.
	28	Rate Q_ij=S_ij*PI_j.
	29	The rest of the file is not used.
	30	Prepared by Z. Yang, March 1995.
	31
	32	See the following reference for notation:
	33
	34	Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
	35	applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.
	36
	37	-----------------------------------------------------------------------
	38
	39	426
	40	333 185
	41	596 80 2134
	42	159 214 54 20
	43	332 1203 277 192 14
	44	920 176 286 4497 11 1497
	45	1853 954 470 907 158 144 999
	46	88 716 704 244 58 1027 69 71
	47	286 114 198 59 34 37 72 44 37
	48	394 332 88 62 79 497 101 80 217 2086
	49	294 3606 1209 148 15 1289 1210 215 115 121 140
	50	185 100 56 34 27 78 50 47 33 1129 1567 167
	51	84 21 33 16 115 14 23 28 69 354 1690 17 76
	52	1395 360 64 74 27 629 106 171 249 54 882 117 36 66
	53	3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839
	54	3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527
	55	19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18
	56	49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60
	57	2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63
	58
	59	A R N D C Q E G H I L K M F P S T W Y V
	60	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	61
	62	Accepted point mutations (x10), similar to Figure 80 of Dayhoff et
	63	al. (1978). SwissProt version 22 data.
	64	------------------------------------------------------------------------------
	65
	66	256458 426 333 596 159 332 920 1853 88 286 394 294 185 84 1395 3664 3920 19 49 2771
	67	426 182302 185 80 214 1203 176 954 716 114 332 3606 100 21 360 661 360 171 62 111
	68	333 185 150772 2134 54 277 286 470 704 198 88 1209 56 33 64 2706 1069 9 178 86
	69	596 80 2134 178390 20 192 4497 907 244 59 62 148 34 16 74 390 216 5 142 195
	70	159 214 54 20 68120 14 11 158 58 34 79 15 27 115 27 559 91 60 246 150
	71	332 1203 277 192 14 139546 1497 144 1027 37 497 1289 78 14 629 278 227 20 59 100
	72	920 176 286 4497 11 1497 218432 999 69 72 101 1210 50 23 106 236 217 17 26 336
	73	1853 954 470 907 158 144 999 255274 71 44 80 215 47 28 171 1861 266 106 34 420
	74	88 716 704 244 58 1027 69 71 77124 37 217 115 33 69 249 214 116 5 777 32
	75	286 114 198 59 34 37 72 44 37 191018 2086 121 1129 354 54 274 1420 13 102 6260
	76	394 332 88 62 79 497 101 80 217 2086 319504 140 1567 1690 882 691 256 127 131 2020
	77	294 3606 1209 148 15 1289 1210 215 115 121 140 206568 167 17 117 351 653 16 30 99
	78	185 100 56 34 27 78 50 47 33 1129 1567 167 84670 76 36 89 579 15 25 937
	79	84 21 33 16 115 14 23 28 69 354 1690 17 76 143088 66 468 54 56 1276 307
	80	1395 360 64 74 27 629 106 171 249 54 882 117 36 66 175488 1839 653 8 32 142
	81	3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 234536 3527 64 259 320
	82	3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 203636 18 73 805
	83	19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 50486 60 44
	84	49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 114728 63
	85	2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 223724
	86
	87	Observed difference counts from pairwise comparisons, with ancestral sequences
	88	constructed by parsimony. F(t) = PI*P(t).
	89	Based on the SwissProt 22 data, kindly provided by D. Jones (Jones et al. 1992)
	90	-------------------------------------------------------------------------------
	91
	92
	93	Ala 0.98754 0.00030 0.00023 0.00042 0.00011 0.00023 0.00065 0.00130 0.00006 0.00020 0.00028 0.00021 0.00013 0.00006 0.00098 0.00257 0.00275 0.00001 0.00003 0.00194
	94	Arg 0.00044 0.98974 0.00019 0.00008 0.00022 0.00125 0.00018 0.00099 0.00075 0.00012 0.00035 0.00376 0.00010 0.00002 0.00037 0.00069 0.00037 0.00018 0.00006 0.00012
	95	Asn 0.00042 0.00023 0.98720 0.00269 0.00007 0.00035 0.00036 0.00059 0.00089 0.00025 0.00011 0.00153 0.00007 0.00004 0.00008 0.00342 0.00135 0.00001 0.00022 0.00011
	96	Asp 0.00062 0.00008 0.00223 0.98954 0.00002 0.00020 0.00470 0.00095 0.00025 0.00006 0.00006 0.00015 0.00004 0.00002 0.00008 0.00041 0.00023 0.00001 0.00015 0.00020
	97	Cys 0.00043 0.00058 0.00015 0.00005 0.99432 0.00004 0.00003 0.00043 0.00016 0.00009 0.00021 0.00004 0.00007 0.00031 0.00007 0.00152 0.00025 0.00016 0.00067 0.00041
	98	Gln 0.00044 0.00159 0.00037 0.00025 0.00002 0.98955 0.00198 0.00019 0.00136 0.00005 0.00066 0.00170 0.00010 0.00002 0.00083 0.00037 0.00030 0.00003 0.00008 0.00013
	99	Glu 0.00080 0.00015 0.00025 0.00392 0.00001 0.00130 0.99055 0.00087 0.00006 0.00006 0.00009 0.00105 0.00004 0.00002 0.00009 0.00021 0.00019 0.00001 0.00002 0.00029
	100	Gly 0.00136 0.00070 0.00035 0.00067 0.00012 0.00011 0.00074 0.99350 0.00005 0.00003 0.00006 0.00016 0.00003 0.00002 0.00013 0.00137 0.00020 0.00008 0.00003 0.00031
	101	His 0.00021 0.00168 0.00165 0.00057 0.00014 0.00241 0.00016 0.00017 0.98864 0.00009 0.00051 0.00027 0.00008 0.00016 0.00058 0.00050 0.00027 0.00001 0.00182 0.00008
	102	Ile 0.00029 0.00011 0.00020 0.00006 0.00003 0.00004 0.00007 0.00004 0.00004 0.98729 0.00209 0.00012 0.00113 0.00035 0.00005 0.00027 0.00142 0.00001 0.00010 0.00627
	103	Leu 0.00023 0.00019 0.00005 0.00004 0.00005 0.00029 0.00006 0.00005 0.00013 0.00122 0.99330 0.00008 0.00092 0.00099 0.00052 0.00040 0.00015 0.00007 0.00008 0.00118
	104	Lys 0.00027 0.00331 0.00111 0.00014 0.00001 0.00118 0.00111 0.00020 0.00011 0.00011 0.00013 0.99100 0.00015 0.00002 0.00011 0.00032 0.00060 0.00001 0.00003 0.00009
	105	Met 0.00042 0.00023 0.00013 0.00008 0.00006 0.00018 0.00011 0.00011 0.00007 0.00255 0.00354 0.00038 0.98818 0.00017 0.00008 0.00020 0.00131 0.00003 0.00006 0.00212
	106	Phe 0.00011 0.00003 0.00004 0.00002 0.00015 0.00002 0.00003 0.00004 0.00009 0.00047 0.00227 0.00002 0.00010 0.99360 0.00009 0.00063 0.00007 0.00008 0.00171 0.00041
	107	Pro 0.00148 0.00038 0.00007 0.00008 0.00003 0.00067 0.00011 0.00018 0.00026 0.00006 0.00093 0.00012 0.00004 0.00007 0.99270 0.00194 0.00069 0.00001 0.00003 0.00015
	108	Ser 0.00287 0.00052 0.00212 0.00031 0.00044 0.00022 0.00018 0.00146 0.00017 0.00021 0.00054 0.00027 0.00007 0.00037 0.00144 0.98556 0.00276 0.00005 0.00020 0.00025
	109	Thr 0.00360 0.00033 0.00098 0.00020 0.00008 0.00021 0.00020 0.00024 0.00011 0.00131 0.00024 0.00060 0.00053 0.00005 0.00060 0.00324 0.98665 0.00002 0.00007 0.00074
	110	Trp 0.00007 0.00065 0.00003 0.00002 0.00023 0.00008 0.00006 0.00040 0.00002 0.00005 0.00048 0.00006 0.00006 0.00021 0.00003 0.00024 0.00007 0.99686 0.00023 0.00017
	111	Tyr 0.00008 0.00010 0.00030 0.00024 0.00041 0.00010 0.00004 0.00006 0.00130 0.00017 0.00022 0.00005 0.00004 0.00214 0.00005 0.00043 0.00012 0.00010 0.99392 0.00011
	112	Val 0.00226 0.00009 0.00007 0.00016 0.00012 0.00008 0.00027 0.00034 0.00003 0.00511 0.00165 0.00008 0.00076 0.00025 0.00012 0.00026 0.00066 0.00004 0.00005 0.98761
	113
	114	P(0.01), amino acid exchange data generated from SWISSPROT Release 22.0
	115	Ref. Jones D.T., Taylor W.R. and Thornton J.M. (1992) CABIOS 8:275-282
	116
	117
	118	Usable sequences: 23824
	119	Final alignments: 5437
	120	Accepted point mutations: 92883
	121
	122	A R N D C Q E G H I L K M F P S T W Y V
	123
	124
	125	0.0767477 100
	126	0.0516907 82.3263
	127	0.0426448 102.697
	128	0.0515445 83.8924
	129	0.0198027 45.6097
	130	0.0407523 83.8825
	131	0.0618296 75.7914
	132	0.0731516 52.1273
	133	0.0229438 91.1374
	134	0.0537609 101.99
	135	0.0919042 53.7672
	136	0.0586762 72.2308
	137	0.0238262 94.8144
	138	0.0401265 51.3146
	139	0.0509007 58.5874
	140	0.0687652 115.899
	141	0.0585647 107.092
	142	0.0142613 25.2297
	143	0.0321015 48.7629
	144	0.0660051 99.4571
	145
	146	Normalized Relative
	147	frequency mutabilities
	148	(SUM m*f) = 80.240436
	149	-------------------------------------------

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaAscidian.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG AGA AGG "
	8	"H CAT CAC "
	9	"I ATT ATC "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG ATA "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaEchinoderm.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG "
	2	"N AAT AAC AAA "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC AGA AGG "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaFlatworm.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG "
	2	"N AAT AAC AAA "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC AGA AGG "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC TAA "
	19	"V GTT GTC GTA GTG "
	20	"* TAG "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaInvertebrate.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG ATA "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC AGA AGG "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG GTG ATA ATC ATT TTG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaProtozoan.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGA AGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG GTG ATA ATC ATT CTG TTG TTA "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaVertebrate.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG ATA "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG AGG AGA "
	21	"i ATG GTG ATA ATC ATT "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/mitochondriaYeast.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGG AGA "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC "
	10	"L TTA TTG "
	11	"K AAA AAG "
	12	"M ATG ATA "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG CTT CTC CTA CTG "
	17	"W TGG TGA "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+41

-0

libs/phylogeny/replacementMatrixSource/mtREV24.dat less more

	0
	1	23.18
	2	26.95 13.24
	3	17.67 1.90 794.38
	4	59.93 103.33 58.94 1.90
	5	1.90 220.99 173.56 55.28 75.24
	6	9.77 1.90 63.05 583.55 1.90 313.56
	7	120.71 23.03 53.30 56.77 30.71 6.75 28.28
	8	13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90
	9	96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26
	10	25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09
	11	8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88
	12	141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37
	13	6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82
	14	54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31
	15	387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90
	16	480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21
	17	1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99
	18	6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25
	19	195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90
	20
	21
	22	0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169
	23	0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043
	24
	25	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	26
	27
	28	S_ij = S_ji and PI_i for the mtREV24 model (Adachi and Hasegawa 1996).
	29	The PI's used to sum to 0.999 and I changed one of the freq from 0.168
	30	into 0.169 so that the sum is 1. Prepared by Z. Yang according to
	31	data sent by Dr M. Hasegawa. This matrix was obtained from the 12
	32	mitochondrial proteins encoded by the same strand of the DNA from a
	33	diverse range of species including bird, fish, frog, lamprey, as well
	34	as mammals (see Adachi and Hasegawa 1996 for details). The other
	35	matrix (mtmam.dat) included in the package is based on the same
	36	proteins from mammals only.
	37
	38	Adachi, J. and Hasegawa, M. (1996) MOLPHY version 2.3: programs for
	39	molecular phylogenetics based on maximum likelihood. Computer Science
	40	Monographs of Institute of Statistical Mathematics 28:1-150.

+24

-0

libs/phylogeny/replacementMatrixSource/nuclearBlepharisma.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGA AGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG TAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TGA "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/nuclearCiliate.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGA AGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG TAA TAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TGA "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/nuclearEuplotid.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGA AGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC TGA "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG "
	21	"i ATG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "⏎

+24

-0

libs/phylogeny/replacementMatrixSource/nuclearStandard.code less more

	0	"A GCT GCC GCA GCG "
	1	"R CGT CGC CGA CGG AGA AGG "
	2	"N AAT AAC "
	3	"D GAT GAC "
	4	"C TGT TGC "
	5	"Q CAA CAG "
	6	"E GAA GAG "
	7	"G GGT GGC GGA GGG "
	8	"H CAT CAC "
	9	"I ATT ATC ATA "
	10	"L CTT CTC CTA CTG TTA TTG "
	11	"K AAA AAG "
	12	"M ATG "
	13	"F TTT TTC "
	14	"P CCT CCC CCA CCG "
	15	"S TCT TCC TCA TCG AGT AGC "
	16	"T ACT ACC ACA ACG "
	17	"W TGG "
	18	"Y TAT TAC "
	19	"V GTT GTC GTA GTG "
	20	"* TAA TAG TGA "
	21	"i ATG CTG TTG "
	22	"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
	23	"### NOTE: initiation codons must appear after all codon lines "

+47

-0

libs/phylogeny/replacementMatrixSource/wag.dat less more

	0
	1	0.551571
	2	0.509848 0.635346
	3	0.738998 0.147304 5.429420
	4	1.027040 0.528191 0.265256 0.0302949
	5	0.908598 3.035500 1.543640 0.616783 0.0988179
	6	1.582850 0.439157 0.947198 6.174160 0.021352 5.469470
	7	1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717
	8	0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410
	9	0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190
	10	0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970
	11	0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555
	12	0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276
	13	0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630
	14	1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444
	15	3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280
	16	2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020
	17	0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864
	18	0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390
	19	2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730
	20
	21	0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956
	22
	23	A R N D C Q E G H I L K M F P S T W Y V
	24	Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
	25
	26	Symmetrical part of the rate matrix and aa frequencies,
	27	estimated from 3905 globular protein amino acid sequences forming 182
	28	protein families.
	29	The first part above indicates the symmetric 'exchangeability'
	30	parameters, where s_ij = s_ji. The s_ij above are not scaled, but the
	31	PAML package will perform this scaling.
	32	The second part gives the amino acid frequencies (pi_i)
	33	estimated from the 3905 sequences. The net replacement rate from i to
	34	j is Q_ij = s_ij*pi_j.
	35	Prepared by Simon Whelan and Nick Goldman, September 2000.
	36
	37	Citation:
	38	Whelan, S. and N. Goldman. In press. A general empirical model of
	39	protein evolution derived from multiple protein families using
	40	a maximum likelihood approach. Molecular Biology and
	41	Evolution.
	42
	43	See the following reference for notation used here:
	44
	45	Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
	46	applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.

+9

-0

libs/phylogeny/replacementModel.cpp less more

	0	// $Id: replacementModel.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "replacementModel.h"
	3
	4	replacementModel::~replacementModel(){}
	5	// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
	6	// assignment
	7
	8

+26

-0

libs/phylogeny/replacementModel.h less more

	0	// $Id: replacementModel.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___REPLACEMENT_MODEL
	3	#define ___REPLACEMENT_MODEL
	4
	5	#include "definitions.h"
	6
	7	class replacementModel{
	8	public:
	9	virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0;
	10	virtual const MDOUBLE freq(const int i) const = 0;
	11	virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0;
	12	virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0;
	13	virtual replacementModel* clone() const = 0;
	14	virtual ~replacementModel()=0;
	15	virtual const int alphabetSize() const =0;
	16
	17	//virtual const MDOUBLE Q(const int i, const int j, const MDOUBLE r = 1.0) const = 0;
	18	//note that we ask that sigma over i sigma over j!=i of p(i)Qij = 1;
	19	//this is beacuse we ask the [sigma over i sigma over j!=i p(i)*pij(d)]/d approaches
	20	//1 as d -> 0. (and l'hopital from here).
	21	};
	22
	23
	24	#endif
	25

+198

-0

libs/phylogeny/replacementModelSSRV.cpp less more

	0	// $Id: replacementModelSSRV.cpp 4165 2008-06-04 09:19:48Z osnatz $
	1
	2	#include "replacementModelSSRV.h"
	3	#include "logFile.h"
	4	#include <iomanip>
	5	#include <iostream>
	6
	7
	8	replacementModelSSRV::replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate /= 1 /) :
	9	_dist(dist->clone()),
	10	_baseRM(baseRM->clone()),
	11	_rateOfRate(rateOfRate)
	12	{
	13	if (_dist->categories() == 0)
	14	errorMsg::reportError("replacementModelSSRV::replacementModelSSRV : number of categories == 0");
	15
	16	updateFreq();
	17	updateQ();
	18
	19
	20	}
	21
	22	//// similar to goldmanYangModel.cpp
	23	//replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) :
	24	//_dist(other._dist->clone()),
	25	//_baseRM(other._baseRM->clone()),
	26	//_rateOfRate(other._rateOfRate)
	27	//{
	28	// int size = alphabetSize();
	29	// _Q.resize(size);
	30	// for (int z=0; z < _Q.size();++z)
	31	// _Q[z].resize(size,0);
	32	// updateFreq();
	33	// updateQ();
	34	//}
	35
	36	// Instead of calling updateQ here, like in goldmanYangModel.cpp,
	37	// this method uses the copy constructor of q2pt and also copies _freq and _Q
	38	replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) :
	39	_dist(other._dist->clone()),
	40	_baseRM(other._baseRM->clone()),
	41	_rateOfRate(other._rateOfRate),
	42	_q2pt(other._q2pt),
	43	_freq(other._freq),
	44	_Q(other._Q)
	45	{
	46	}
	47
	48	replacementModelSSRV::~replacementModelSSRV()
	49	{
	50	if (_dist) delete (_dist);
	51	if (_baseRM) delete (_baseRM);
	52	}
	53
	54
	55	replacementModelSSRV& replacementModelSSRV::operator=(const replacementModelSSRV &other)
	56	{
	57	if (_dist) delete (_dist);
	58	if (_baseRM) delete (_baseRM);
	59
	60	_dist = other._dist->clone();
	61	_baseRM = other._baseRM->clone();
	62	_rateOfRate = other._rateOfRate;
	63	_q2pt = other._q2pt; //@@@@ why doesn't this work ? explicit ?
	64	// _q2pt.fillFromRateMatrix(other._freq,other._Q);
	65	_freq = other._freq;
	66	_Q = other._Q;
	67
	68	return (*this);
	69	}
	70
	71	const int replacementModelSSRV::alphabetSize() const
	72	{
	73	return (_baseRM->alphabetSize() * _dist->categories());
	74	}
	75
	76
	77
	78	// The freq of each mulCharacter is its freq in the _baseRM * the freq of the rate-category
	79	void replacementModelSSRV::updateFreq()
	80	{
	81	_freq.clear();
	82	int size = alphabetSize();
	83	int numCategories = _dist->categories();
	84	_freq.resize(size);
	85	int idInCategory;
	86
	87	for(idInCategory=0; idInCategory < _baseRM->alphabetSize() ; ++idInCategory)
	88	{
	89	for (int categoryNumber=0; categoryNumber < numCategories; ++categoryNumber)
	90	_freq[categoryNumber*_baseRM->alphabetSize() + idInCategory] =
	91	_baseRM->freq(idInCategory) * _dist->ratesProb(categoryNumber);
	92	}
	93	}
	94
	95
	96	void replacementModelSSRV::updateQ()
	97	{
	98	if (_rateOfRate < EPSILON) _rateOfRate = EPSILON; // Temporary - to overcome a bug in QL algorithm, when _rateOfRate == 0
	99
	100	_Q.clear();
	101	int size = alphabetSize();
	102	_Q.resize(size);
	103	for (int z=0; z < _Q.size();++z)
	104	_Q[z].resize(size,0.0);
	105
	106	// fill Q
	107	int _BaseRM_alphabetSize = _baseRM->alphabetSize();
	108	int numCategories = _dist->categories();
	109	// i,j : go over all the base-alphabet.
	110	// z,w : go over all the categories.
	111	for (int i=0; i < _BaseRM_alphabetSize; ++i)
	112	{
	113	for (int j=0; j < _BaseRM_alphabetSize; ++j)
	114	{
	115	for (int z=0; z < numCategories; ++z)
	116	{
	117	for (int w=0; w < numCategories; ++w)
	118	{
	119	if (i!=j)
	120	{
	121	// different alphabet, same rate category
	122	if (z==w)
	123	_Q[z_BaseRM_alphabetSize + i][z_BaseRM_alphabetSize+j]
	124	= _dist->rates(z) * _baseRM->dPij_dt(i,j,0);
	125	}
	126	else
	127	{
	128	// same alphabet, different rate category
	129	if (z!=w)
	130	{
	131	_Q[z_BaseRM_alphabetSize+i][w_BaseRM_alphabetSize+i] = _rateOfRate * _dist->ratesProb(w);
	132	}
	133	// same alphabet, same rate category
	134	else
	135	_Q[z_BaseRM_alphabetSize+i][z_BaseRM_alphabetSize+i] =
	136	_dist->rates(z) * _baseRM->dPij_dt(i,j,0)
	137	- ( _rateOfRate * (1.0 - _dist->ratesProb(z)));
	138	}
	139
	140	}
	141	}
	142	}
	143	}
	144
	145	// // check OZ
	146	// LOG(4, <<"THE Q MATRIX IS: "<<endl ) ;
	147	// VVdouble::iterator itr1 = _Q.begin();
	148	// Vdouble::iterator itr2;
	149	// for (; itr1 != _Q.end(); ++itr1)
	150	// {
	151	// for (itr2 = itr1->begin(); itr2 != itr1->end(); ++itr2)
	152	// LOG(4,<< setprecision(3) << setw(5) << *itr2 <<'\t');
	153	// LOG(4,<<endl);
	154	// }
	155	// LOG (4,<<endl);
	156	//// end of check
	157
	158	_q2pt.fillFromRateMatrix(_freq,_Q);
	159
	160	}
	161
	162	void replacementModelSSRV::setDistribution(const distribution* dist)
	163	{
	164	if (dist->categories() == 0)
	165	errorMsg::reportError("replacementModelSSRV::setDistribution : number of categories == 0");
	166	if (_dist) delete (_dist);
	167	_dist=dist->clone();
	168	updateQ();
	169	}
	170
	171	MDOUBLE replacementModelSSRV::sumPijQij() const{
	172	MDOUBLE sum=0.0;
	173	for (int i=0; i < _Q.size(); ++i) {
	174	sum -= _Q[i][i]*_freq[i];
	175	}
	176	return sum;
	177	}
	178
	179
	180	//void replacementModelSSRV::norm(MDOUBLE scale){
	181	//
	182	// for (int i=0; i < _Q.size(); ++i) {
	183	// for (int j=0; j < _Q.size(); ++j) {
	184	// _Q[i][j]*=scale;
	185	// }
	186	// }
	187	//
	188	// _q2pt.fillFromRateMatrix(_freq,_Q);
	189	//}
	190
	191
	192
	193
	194
	195
	196
	197

+73

-0

libs/phylogeny/replacementModelSSRV.h less more

	0	// $Id: replacementModelSSRV.h 1914 2007-04-04 08:40:35Z osnatz $
	1	#ifndef ___REPLACEMENT_MODEL_SSRV
	2	#define ___REPLACEMENT_MODEL_SSRV
	3
	4	#include <cmath>
	5	#include "replacementModel.h"
	6	#include "distribution.h"
	7	#include "fromQtoPt.h"
	8	#include "errorMsg.h"
	9	#include "definitions.h"
	10
	11	class replacementModelSSRV : public replacementModel
	12	{
	13	public:
	14	explicit replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate = 1);
	15	explicit replacementModelSSRV(const replacementModelSSRV& other);
	16	~replacementModelSSRV();
	17	replacementModelSSRV& operator=(const replacementModelSSRV &other);
	18	const int alphabetSize() const;
	19	virtual replacementModel* clone() const {return new replacementModelSSRV(*this);}
	20	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	21	return _q2pt.Pij_t(i,j,d);
	22	}
	23	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	24	return _q2pt.dPij_dt(i,j,d);
	25	}
	26	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	27	return _q2pt.d2Pij_dt2(i,j,d);
	28	}
	29
	30	const MDOUBLE freq(const int i) const {return _freq[i];}
	31
	32	distribution* getDistribution() const { return _dist;} // @@@@ this const is a lie !!!
	33	void setDistribution(const distribution* dist); // it's important to call updateQ after changing the distribution parameters
	34
	35	replacementModel* getBaseRM() const { return _baseRM;} // @@@@ this const is a lie (for the same reason as getDistribution()
	36
	37	MDOUBLE getRateOfRate() const { return _rateOfRate;}
	38	void setRateOfRate(MDOUBLE rateOfRate) { _rateOfRate=rateOfRate; updateQ();}
	39
	40	VVdouble getQ() const { return _Q;}
	41	Vdouble getFreqs() const {return _freq;}
	42
	43	MDOUBLE sumPijQij() const;
	44
	45	void updateQ();
	46	void updateFreq();
	47	q2pt getQ2pt() const {return _q2pt;} // used for debug only
	48	//void norm(MDOUBLE scale);
	49
	50	private:
	51	distribution* _dist;
	52	replacementModel* _baseRM;
	53	MDOUBLE _rateOfRate;
	54	q2pt _q2pt;
	55	Vdouble _freq;
	56	VVdouble _Q;
	57
	58	};
	59
	60	#endif
	61
	62	/* @@@@ When we want to optimize alpha, we usually get the distibution from the stochastic process and then
	63	convert it using static_cast, for example to gammaDistribution and use its method setAlpha.
	64	For this reason, the method distr() in replacmentModel and the method getDistribution here are both const, although
	65	they actually allow changing the distribution.
	66	A good solution for this is to add a setDistribution in the stochasticProcess.
	67	This will check if the distributions are of the same type and if so, will just update the alpha.
	68	*/
	69
	70	// @@@@ Idea - maybe there is no need of replacementModelSSRV. This can be stochasticProcessSSRV - not good. the SP also has an accelerator.
	71
	72

+193

-0

libs/phylogeny/samplingSequences.cpp less more

	0	#include "samplingSequences.h"
	1	#include "logFile.h"
	2	#include "talRandom.h"
	3
	4
	5	sampleSequences::sampleSequences(sequenceContainer &sc){
	6	_sc = sc;
	7	}
	8
	9	sequenceContainer sampleSequences::removeSequences(sequenceContainer &sc){
	10	int noOfSeq = sc.numberOfSeqs();
	11	int gap = sc.getAlphabet()->gap();
	12	int unknown = sc.getAlphabet()->unknown();
	13	bool seqToAdd;
	14	int n =0;
	15	sequenceContainer newSc;
	16	for (int i=0;i<noOfSeq;i++){
	17	seqToAdd = true;
	18	for (int j=0;j<sc[i].seqLen();j++){
	19	if ((sc[i][j]== gap) \|\| (sc[i][j]== unknown ) \|\| (sc[i].seqLen() != 297)){
	20	seqToAdd = false;
	21	}
	22	}
	23	if (seqToAdd == true) {
	24	sequence add = sc[i];
	25	sequence sc(add);
	26	sc.setID(n);
	27	n++;
	28	newSc.add(sc);
	29	}
	30	}
	31	return newSc;
	32	}
	33
	34
	35	void sampleSequences::printDistances(){
	36	for (int i=0;i< _distances.size();i++){
	37	for (int j=0;j<_distances[i].size();j++){
	38	cout<<_distances[i][j]<<" ";
	39	}
	40	cout<<endl;
	41	}
	42	}
	43
	44	void sampleSequences::setDistance(int i,int j,MDOUBLE dist){
	45	(i<j ? _distances[i][j-i] :_distances[j][i-j]) = dist;
	46	}
	47
	48	MDOUBLE sampleSequences::getDistance(int i,int j){
	49	return (i<j ? _distances[i][j-i] :_distances[j][i-j]);
	50	}
	51
	52
	53	sequenceContainer sampleSequences::sampleFarthestSequences(int n, distanceMethod *dm){
	54	_sc.removeIdenticalSequences();
	55	if (n >= _sc.numberOfSeqs()){
	56	cerr<<"Number of sequences to sample is bigger than the origin number of sequences so the all sequences were chosen in sampleSequences::sampleFarthestSequences"<<endl;
	57	return _sc;
	58	}
	59
	60	int numberOfSeq = _sc.numberOfSeqs();
	61	_distances.resize(numberOfSeq);
	62	int i;
	63	for (i=0;i<numberOfSeq;i++)
	64	_distances[i].resize(numberOfSeq-i);
	65
	66	for (i=0;i<numberOfSeq;i++){
	67	for(int j=i;j<numberOfSeq;j++){
	68	int id1 = _sc.placeToId(i);
	69	int id2 = _sc.placeToId(j);
	70
	71	setDistance(i,j,dm->giveDistance(_sc[id1],_sc[id2],NULL));
	72	}
	73	}
	74
	75	sequenceContainer newSc;
	76	vector<int> sampled;
	77	sampled.push_back(0);//to change
	78	int id = 0;
	79	int p = _sc.placeToId(0);
	80	sequence sc(_sc[p]);
	81	sc.setID(id++);
	82	newSc.add(sc);
	83	while (newSc.numberOfSeqs()<n){
	84	int i = findNextSeq(sampled);
	85	p = _sc.placeToId(i);
	86	sequence sc(_sc[p]);
	87	sc.setID(id);
	88	newSc.add(sc);
	89	id++;
	90	sampled.push_back(i);
	91	}
	92	return newSc;
	93	}
	94
	95	int sampleSequences::findNextSeq(vector<int> &sampled){
	96	MDOUBLE max = 0,min;
	97	int seqi = -1;
	98	for(int i=0;i< _sc.numberOfSeqs();i++){
	99	min=10000;//to update
	100	for (int j=0;j<sampled.size();j++){
	101	if (getDistance(i,sampled[j])<min)
	102	min = getDistance(i,sampled[j]);
	103	}
	104	if (max<min){
	105	max=min;
	106	seqi = i;
	107	}
	108	}
	109
	110	if (seqi>_sc.numberOfSeqs() \|\|seqi<0){
	111	errorMsg::reportError("Error in sampleSequences::findNextSeq");
	112	}
	113	return seqi;
	114	}
	115
	116	//sequenceContainer sampleSequences::sampleRandomSequences(int seqNum)
	117	//{
	118	// if (seqNum > _sc.numberOfSeqs())
	119	// errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA");
	120	// sequenceContainer newSc(_sc);
	121	// while (newSc.numberOfSeqs() > seqNum)
	122	// {
	123	// int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.numberOfSeqs());
	124	// newSc.remove(newSc.placeToId(seqPlaceToRemove));
	125	// }
	126	// return newSc;
	127	//}
	128
	129
	130	sequenceContainer sampleSequences::sampleRandomSequences(int seqNum)
	131	{
	132	if (seqNum > _sc.numberOfSeqs())
	133	errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA");
	134	sequenceContainer newSc;
	135	Vint vec2Add(_sc.numberOfSeqs(),0);
	136	int n = 0;
	137	while (n < seqNum)
	138	{
	139	int seqPlaceToAdd = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.numberOfSeqs());
	140	if (vec2Add[seqPlaceToAdd] == 0){
	141	vec2Add[seqPlaceToAdd] = 1;
	142	n++;
	143	}
	144
	145	}
	146	for (int i = 0; i<vec2Add.size();i++){
	147	if (vec2Add[i] == 1)
	148	newSc.add(_sc[i]);
	149	}
	150	return newSc;
	151	}
	152	//sequenceContainer sampleSequences::sampleRandomCharacters(int seqLen)
	153	//{
	154	// if (seqLen > _sc.seqLen())
	155	// errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA");
	156	// Vint posToRemove(_sc.seqLen(),1);
	157	// //first create a vector with seqLen positions to be sampled in the begining of the vector
	158	// for (int i = 0; i < seqLen; ++i)
	159	// posToRemove[i] = 0;
	160	// //then randomly swap the positions in posToRemove.
	161	// //The end result is a random vector with the positions to remove marked with '1'
	162	// int swapNum = _sc.seqLen() * 10;
	163	// for (int x = 0; x < swapNum; ++x)
	164	// {
	165	// int pos1 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen());
	166	// int pos2 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen());
	167	// int tmp = posToRemove[pos1];
	168	// posToRemove[pos1] = posToRemove[pos2];
	169	// posToRemove[pos2] = tmp;
	170	// }
	171	//
	172	// sequenceContainer newSc(_sc);
	173	// newSc.removePositions(posToRemove);
	174	// return newSc;
	175	//}
	176
	177
	178	sequenceContainer sampleSequences::sampleRandomCharacters(int seqLen)
	179	{
	180	if (seqLen > _sc.seqLen())
	181	errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA");
	182	sequenceContainer newSc(_sc);
	183
	184	while (newSc.seqLen() > seqLen)
	185	{
	186	Vint posToRemove(newSc.seqLen(),0);
	187	int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.seqLen());
	188	posToRemove[seqPlaceToRemove] = 1;
	189	newSc.removePositions(posToRemove);
	190	}
	191	return newSc;
	192	}

+33

-0

libs/phylogeny/samplingSequences.h less more

	0	#ifndef SAMPLE_SEQUENCES_H
	1	#define SAMPLE_SEQUENCES_H
	2
	3	#include "definitions.h"
	4	#include "distanceMethod.h"
	5	#include "sequenceContainer.h"
	6	#include "pDistance.h"
	7
	8
	9	class sampleSequences{
	10	public:
	11	explicit sampleSequences(sequenceContainer &sc);
	12	virtual ~sampleSequences() {};
	13
	14	sequenceContainer sampleFarthestSequences(int n, distanceMethod *dm);
	15	//sampleRandomSequences: samples seqNum sequences from the sequence container
	16	sequenceContainer sampleRandomSequences(int seqNum);
	17	//sampleRandomCharacters: samples seqLen characters from the sequenceContainer
	18	sequenceContainer sampleRandomCharacters(int seqLen);
	19
	20
	21	private:
	22	int findNextSeq(vector<int> &sampled);
	23	void setDistance(int i,int j,MDOUBLE dist);
	24	MDOUBLE getDistance(int i,int j);
	25	void removeSequenceWithGap();
	26	sequenceContainer removeSequences(sequenceContainer &sc);
	27	void printDistances();
	28	private:
	29	VVdouble _distances;
	30	sequenceContainer _sc;
	31	};
	32	#endif

+9

-0

libs/phylogeny/searchStatus.cpp less more

	0	// $Id: searchStatus.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "searchStatus.h"
	3
	4	searchStatus::searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor ):
	5	_currentTmp(startingTmp),
	6	_factor(factor) {}
	7
	8

+30

-0

libs/phylogeny/searchStatus.h less more

	0	// $Id: searchStatus.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___SEARCH_STATUS
	3	#define ___SEARCH_STATUS
	4
	5	#include "definitions.h"
	6
	7	class searchStatus {
	8	public:
	9	explicit searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor);
	10	explicit searchStatus(){};
	11	void setParameters(const MDOUBLE tmp, const MDOUBLE factor) {
	12	_currentTmp=tmp;
	13	_factor=factor;
	14	}
	15
	16	void tmpUp1(){_currentTmp *= _factor;}
	17	void tmpDown1(){_currentTmp /= _factor;}
	18	const MDOUBLE getTmp() const {return _currentTmp;}
	19	void setTmp(const MDOUBLE newTmp) {_currentTmp=newTmp;}
	20	virtual ~searchStatus(){}
	21
	22	private:
	23	MDOUBLE _currentTmp;
	24	MDOUBLE _factor;
	25	};
	26
	27	#endif
	28
	29

+185

-0

libs/phylogeny/seqContainerTreeMap.cpp less more

	0	// $Id: seqContainerTreeMap.cpp 11896 2013-12-19 17:50:51Z haim $
	1
	2	#include "seqContainerTreeMap.h"
	3	#include "logFile.h"
	4	#include "treeUtil.h"
	5	#include <stdlib.h>
	6
	7	/********************************************************************************************
	8	*********************************************************************************************/
	9	void intersectNamesInTreeAndSequenceContainer(tree& et, sequenceContainer & sc, bool bLeavesOnly){
	10	LOGnOUT(4,<<"\n intersectNames Tree vs Sequence. Before intersect numOfSeq= "<<sc.numberOfSeqs()<<" nunOfTaxa= "<<et.getLeavesNum()<<" Remove "<<abs(et.getLeavesNum() -sc.numberOfSeqs())<<" taxa"<<endl);
	11	treeIterDownTopConst tIt(et);
	12	vector<tree::nodeP> nodes2remove;
	13	vector<int> seqIDs2remove;
	14
	15	//cout<<"tree names:"<<endl;
	16
	17	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	18	bool bFound = false;
	19	bool bFound_more = false;
	20
	21	if (bLeavesOnly) {
	22	if (mynode->isInternal())
	23	continue;
	24	}
	25	sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
	26	for (;it != sc.constTaxaEnd(); ++it)
	27	{
	28	string scName = it->name();
	29	string treeNodeName = mynode->name();
	30
	31	if (it->name() == mynode->name())
	32	{
	33	if(bFound)
	34	bFound_more = true;
	35	bFound = true;
	36	//break;
	37	}
	38	if (bFound_more == true)
	39	{
	40	string errMsg = "The taxID:\t";
	41	errMsg += mynode->name();
	42	errMsg += "\twas found again in the sequence file. Removed from sequence.";
	43	LOGnOUT(4,<<errMsg<<endl);
	44	seqIDs2remove.push_back(it->id());
	45	bFound_more = false;
	46	}
	47	}
	48	if (bFound == false)
	49	{
	50	string errMsg = "The taxID:\t";
	51	errMsg += mynode->name();
	52	errMsg += "\twas found in the tree file but not found in the sequence file. Removed from tree.";
	53	LOGnOUT(4,<<errMsg<<endl);
	54	nodes2remove.push_back(mynode);
	55	}
	56
	57	}
	58	for(int i=0; i<nodes2remove.size(); ++i){
	59	et.removeLeaf(nodes2remove[i]);
	60	}
	61	sequenceContainer::constTaxaIterator myseq=sc.constTaxaBegin();
	62	for (;myseq != sc.constTaxaEnd(); ++myseq){
	63	bool bFound = false;
	64	bool bFound_more = false;
	65	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	66	if (bLeavesOnly)
	67	{
	68	if (mynode->isInternal())
	69	continue;
	70	}
	71	if (myseq->name() == mynode->name())
	72	{
	73	if(bFound)
	74	bFound_more = true;
	75	bFound = true;
	76	//break;
	77	}
	78	if (bFound_more == true)
	79	{
	80	string errMsg = "The taxID name:\t";
	81	errMsg += myseq->name();
	82	errMsg += "\twas found again in the tree file. Removed.";
	83	LOGnOUT(4,<<errMsg<<endl);
	84	nodes2remove.push_back(mynode);
	85	bFound_more = false;
	86	}
	87	}
	88	if (bFound == false)
	89	{
	90	string errMsg = "The taxID name:\t";
	91	errMsg += myseq->name();
	92	errMsg += "\twas found in the sequence file but not found in the tree file. Removed.";
	93	LOGnOUT(4,<<errMsg<<endl);
	94	seqIDs2remove.push_back(myseq->id());
	95	}
	96	}
	97	for(int i=0; i<seqIDs2remove.size(); ++i){
	98	sc.remove(seqIDs2remove[i]);
	99	}
	100	}
	101
	102	/********************************************************************************************
	103	*********************************************************************************************/
	104	//if bLeavesOnly == true then checks only leaves, otherwise the sequence container includes also internal nodes (as may be the result of simlations
	105	void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly){
	106	treeIterDownTopConst tIt(et);
	107	//cout<<"tree names:"<<endl;
	108
	109	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	110	bool bFound = false;
	111	if (bLeavesOnly) {
	112	if (mynode->isInternal())
	113	continue;
	114	}
	115	sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
	116	for (;it != sc.constTaxaEnd(); ++it)
	117	{
	118	string scName = it->name();
	119	string treeNodeName = mynode->name();
	120
	121	if (it->name() == mynode->name())
	122	{
	123	bFound = true;
	124	break;
	125	}
	126	}
	127	if (bFound == false)
	128	{
	129	string errMsg = "The sequence name: ";
	130	errMsg += mynode->name();
	131	errMsg += " was found in the tree file but not found in the sequence file.\n";
	132	errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n";
	133	LOG(4,<<errMsg<<endl);
	134	errorMsg::reportError(errMsg);
	135	}
	136	}
	137	sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
	138	for (;it != sc.constTaxaEnd(); ++it){
	139	bool bFound = false;
	140	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	141	if (bLeavesOnly)
	142	{
	143	if (mynode->isInternal())
	144	continue;
	145	}
	146	if (it->name() == mynode->name())
	147	{
	148	bFound = true;
	149	break;
	150	}
	151	}
	152	if (bFound == false)
	153	{
	154	string errMsg = "The sequence name: ";
	155	errMsg += it->name();
	156	errMsg += " was found in the sequence file but not found in the tree file.\n";
	157	errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n";
	158	errorMsg::reportError(errMsg);
	159	}
	160	}
	161	}
	162
	163	/********************************************************************************************
	164	// input: a tree and a sequence-container containing all of the leaves sequences.
	165	// output: fills sc_leaves with the sequences of the leaves only.
	166	*********************************************************************************************/
	167	void getLeavesSequences(const sequenceContainer& sc,
	168	const tree& tr, sequenceContainer& sc_leaves) {
	169	vector<string> leavesNames = getSequencesNames(tr);
	170	vector<string>::iterator itr_leaves;
	171	for (itr_leaves=leavesNames.begin();itr_leaves!=leavesNames.end();++itr_leaves) {
	172	sequenceContainer::constTaxaIterator it_sc=sc.constTaxaBegin();
	173	for (;it_sc != sc.constTaxaEnd(); ++it_sc) {
	174	if (it_sc->name() == *(itr_leaves)) {
	175	sc_leaves.add(*it_sc);
	176	break;
	177	}
	178	}
	179	}
	180	if (tr.getLeavesNum() != sc_leaves.numberOfSeqs()) {
	181	string errMsg = "getLeavesSequencese: the number of leaves is not equal to the number of leaves' sequences";
	182	errorMsg::reportError(errMsg);
	183	}
	184	}

+38

-0

libs/phylogeny/seqContainerTreeMap.h less more

	0	// $Id: seqContainerTreeMap.h 8985 2010-11-16 19:56:20Z cohenofi $
	1
	2	#ifndef ___SEQUENCE_CONTAINER_TREE_MAP
	3	#define ___SEQUENCE_CONTAINER_TREE_MAP
	4	#include "definitions.h"
	5	#include "tree.h"
	6	#include "treeIt.h"
	7	#include "sequenceContainer.h"
	8
	9	void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly = true);
	10	void intersectNamesInTreeAndSequenceContainer(tree& et,sequenceContainer & sc, bool bLeavesOnly= true);
	11
	12	void getLeavesSequences(const sequenceContainer& sc, const tree& tr, sequenceContainer& sc_leaves);
	13
	14	class seqContainerTreeMap {
	15	public:
	16	explicit seqContainerTreeMap(const sequenceContainer& sc,
	17	const tree& et) {
	18	checkThatNamesInTreeAreSameAsNamesInSequenceContainer(et,sc);
	19	_V.resize(et.getNodesNum());
	20	treeIterTopDownConst tit(et);
	21	for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) {
	22	if (myN->isInternal()) {
	23	_V[myN->id()] = -1;
	24	} else {
	25	_V[myN->id()] = sc.getId(myN->name(),false);
	26	}
	27	}
	28	}
	29	int seqIdOfNodeI(const int nodeID) {
	30	return _V[nodeID];
	31	}
	32
	33	private:
	34	vector<int> _V;// _V[i] is the sequenceId of node I.
	35	};
	36
	37	#endif

+233

-0

libs/phylogeny/seqeuncesFilter.cpp less more

	0	#include "seqeuncesFilter.h"
	1	#include "nucleotide.h"
	2
	3	seqeuncesFilter::~seqeuncesFilter()
	4	{}
	5
	6	void seqeuncesFilter::removeSequencesWithStop(sequenceContainer & sc, codon & alpha)
	7	{
	8
	9	//going over al seqeunces
	10	for (int i = 0; i < sc.numberOfSeqs();++i) {
	11	int id = sc.placeToId(i);
	12	//going over all sequence len
	13	for (int j = 0; j < sc.seqLen();++j) {
	14	//remove seqeunces with stop data not in the middle
	15	if ((j != sc.seqLen()-1) && (alpha.isStopCodon(sc[id][j])))
	16	{
	17	LOG(4, <<"removing sequence = "<<sc.name(id)<<" : STOP codon in the middle of the reading frame!"<<endl);
	18	sc.remove(id);
	19	i--;
	20	break;
	21	}
	22	}
	23	}
	24	}
	25
	26	void seqeuncesFilter::removeSequencesWithMissingData(sequenceContainer & sc)
	27	{
	28
	29	//going over al seqeunces
	30	for (int i = 0; i < sc.numberOfSeqs(); ++i)
	31	{
	32	//going over all sequence len
	33	for (int j = 0; j < sc.seqLen(); ++j)
	34	{
	35	int id = sc.placeToId(i);
	36	//remove seqeunces with unkonwn data
	37	if (sc[id][j] == sc.getAlphabet()->unknown())
	38	{
	39	sc.remove(id);
	40	i--;
	41	break;
	42	}
	43	}
	44	}
	45	}
	46
	47	void seqeuncesFilter::removeSequencesWithMissingDataAndStop(sequenceContainer & sc, codon & alpha)
	48	{
	49
	50	//going over al seqeunces
	51	for (int i = 0; i < sc.numberOfSeqs(); ++i) {
	52	int id = sc.placeToId(i);
	53	//going over all sequence len
	54	for (int j = 0; j < sc.seqLen();++j) {
	55	//remove seqeunces with stop data not in the middle or missing data
	56	if ((j != sc.seqLen()-1) && (sc[id][j] == sc.getAlphabet()->unknown() \|\| alpha.isStopCodon(sc[id][j])))
	57	{
	58
	59	sc.remove(id);
	60	i--;
	61	break;
	62	}
	63	}
	64	}
	65
	66	}
	67
	68
	69	void seqeuncesFilter::removeSequencesNotStartWithATG(sequenceContainer & sc, codon & alpha)
	70	{
	71	amino aa;
	72	//going over al seqeunces
	73	for (int i = 0; i < sc.numberOfSeqs();++i) {
	74	int id = sc.placeToId(i);
	75	int in_first = codonUtility::aaOf(sc[id][0], alpha);
	76	if (in_first != aa.fromChar('M'))
	77	{
	78	LOG(4, <<"removing sequence = "<<sc.name(id)<<" : not starting with ATG!"<<endl);
	79	sc.remove(id);
	80	i--;
	81	}
	82	}
	83	}
	84
	85	void seqeuncesFilter::removeSequencesNotStartWithInitiationCodons(sequenceContainer & sc,codon & alpha)
	86	{
	87	for (int i = 0; i < sc.numberOfSeqs();++i) {
	88	int id = sc.placeToId(i);
	89	int in_first = sc[id][0];
	90	if(!alpha.isInitiationCodon(in_first)){
	91	LOG(4, <<"removing sequence = "<<sc.name(id)<<" : not starting with initiation codon!"<<endl);
	92	sc.remove(id);
	93	i--;
	94	}
	95	}
	96	}
	97
	98
	99	void seqeuncesFilter::removeSequencesWithGapsAccordingRef(sequenceContainer & sc,int precent,string refName)
	100	{
	101	int refID = sc.getId(refName);
	102	Vint seqToRemove;
	103	//going over all position in reference seqeunce
	104	for (int pos = 0; pos < sc[refID].seqLen(); pos++)
	105	{
	106
	107	//check if the pos is gap
	108	if (sc[refID][pos] == sc.getAlphabet()->gap())
	109	//going over all other seqeunces to compute the precents of gaps
	110	{
	111	cout<<pos<<" ";
	112	seqToRemove.clear();
	113	MDOUBLE numOfSeqWithOutGap = 0;
	114	cout<<sc.numberOfSeqs()<<" ";
	115	for (int i = 0; i < sc.numberOfSeqs(); i++)
	116	{
	117
	118	int id = sc.placeToId(i);
	119	if (sc[id][pos] != sc.getAlphabet()->gap())
	120	{
	121	numOfSeqWithOutGap++;
	122	seqToRemove.push_back(id);
	123	}
	124	}
	125	cout<<seqToRemove.size()<<endl;
	126	if ((100 * ((sc.numberOfSeqs() - numOfSeqWithOutGap)/sc.numberOfSeqs())) > precent)
	127	{
	128	for (int j = 0; j < seqToRemove.size(); j++){
	129	sc.remove(seqToRemove[j]);
	130	}
	131
	132	}
	133	}
	134	}
	135	}
	136
	137	//removes all sequences that are shorter than lowerBound and longer than upperBound
	138	void seqeuncesFilter::removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound)
	139	{
	140	const alphabet* pAlph = sc.getAlphabet();
	141	//going over al seqeunces
	142	for (int seq = 0; seq < sc.numberOfSeqs(); ++seq)
	143	{
	144	int id = sc.placeToId(seq);
	145	//checking sequence length
	146	int seqLen = sc[id].seqLenSpecific();
	147	if ((seqLen < lowerBound) \|\| (seqLen > upperBound))
	148	{
	149	cerr<<"removing sequence: "<<sc.name(id)<<" sequence Length = "<<seqLen<<endl;
	150	sc.remove(id);
	151	--seq;
	152	}
	153	}
	154	}
	155
	156	//removes all sequences that have inserts in which most other sequences (> percent) have gaps.
	157	//in case refName is given: check only positions in which the reference sequence has gaps.
	158	//The remained sequences are stored in newSc.
	159	void seqeuncesFilter::removeSequencesWithInserts(sequenceContainer & newSc,const sequenceContainer & sc,int percent, const string& refName, string outFileName)
	160	{
	161	if (outFileName.empty())
	162	outFileName = "removedSequences" + double2string(percent) + ".txt";
	163	ofstream outF(outFileName.c_str());
	164	int refID;
	165	if (!refName.empty())
	166	refID = sc.getId(refName);
	167	Vint seqToAdd(sc.numberOfSeqs(), 1);//1== add the sequence to newSc. 0 = don't add.
	168	//going over all position (in reference seqeunce if given)
	169	for (int pos = 0; pos < sc.seqLen(); ++pos)
	170	{
	171
	172	if (!refName.empty())
	173	{ //don't remove this position if it isn't gap in the refSeqeunce
	174	if (sc[refID][pos] != sc.getAlphabet()->gap())
	175	continue;
	176	}
	177	Vint seqToRemove; //holds the ids of sequences without gaps in the current positions
	178	//going over all seqeunces to compute the percent of gaps
	179	MDOUBLE numOfSeqWithGap = 0;
	180	for (int i = 0; i < sc.numberOfSeqs(); i++)
	181	{
	182	int id = sc.placeToId(i);
	183	if (sc[id][pos] != sc.getAlphabet()->gap())
	184	{
	185	seqToRemove.push_back(id);
	186	}
	187	else
	188	numOfSeqWithGap++;
	189	}
	190	//outF<<"POS "<<pos<<" seqWithGaps = "<<numOfSeqWithGap<<" seqWithoutGaps = "<<sc.numberOfSeqs() - numOfSeqWithGap<<endl;
	191	//in case most sequences have gaps in that position: remove the sequences that have inserts at that position
	192	MDOUBLE percentGapsinPos = 100.0 * (numOfSeqWithGap / sc.numberOfSeqs());
	193	if (percentGapsinPos > percent)
	194	{
	195	//outF<<"removing sequences: ";
	196	for (int j = 0; j < seqToRemove.size(); j++)
	197	{
	198	int x = seqToRemove[j];
	199	seqToAdd[seqToRemove[j]] = 0;
	200	outF<<sc.name(sc.placeToId(x))<<endl;
	201	}
	202	outF<<endl;
	203	}
	204	}
	205
	206
	207	for (int i=0; i<seqToAdd.size(); i++)
	208	{
	209	if (seqToAdd[i] == 1)
	210	{
	211	int id = sc.placeToId(i);
	212	newSc.add(sc[id]);
	213	}
	214	}
	215	outF.close();
	216	}
	217
	218	void seqeuncesFilter::removeSequencesNotDivisableBy3(sequenceContainer & sc)
	219	{
	220	nucleotide nucAlph;
	221	for (int i = 0; i < sc.numberOfSeqs();++i)
	222	{
	223	int id = sc.placeToId(i);
	224	int seqL = sc[id].seqLen();
	225	if ((seqL % 3) != 0)
	226	{
	227	LOG(4, <<"removing sequence = "<<sc.name(id)<<" : nucleotide sequence length is not divisable by 3!"<<endl);
	228	sc.remove(id);
	229	--i;
	230	}
	231	}
	232	}

+35

-0

libs/phylogeny/seqeuncesFilter.h less more

	0	#ifndef __SEQEUNCES_FILTER
	1	#define __SEQEUNCES_FILTER
	2
	3
	4	#include "definitions.h"
	5	#include "sequenceContainer.h"
	6	#include "codon.h"
	7	#include "amino.h"
	8	#include <string>
	9	#include <fstream>
	10	#include "fastaFormat.h"
	11
	12
	13	using namespace std;
	14
	15	class seqeuncesFilter{
	16
	17	public:
	18	static void removeSequencesWithStop(sequenceContainer & sc,codon & alpha);
	19	static void removeSequencesWithMissingData(sequenceContainer & sc);
	20	//applied only to coding nucleotide seqeunces: remove sequence that are not divisable by 3.
	21	static void removeSequencesNotDivisableBy3(sequenceContainer & sc);
	22	static void removeSequencesWithMissingDataAndStop(sequenceContainer & sc,codon & alpha);
	23	static void removeSequencesNotStartWithATG(sequenceContainer & sc,codon & alpha);
	24	static void removeSequencesNotStartWithInitiationCodons(sequenceContainer & sc,codon & alpha);
	25	static void removeSequencesWithGapsAccordingRef(sequenceContainer & sc,int precent, string refName);
	26	static void removeSequencesWithInserts(sequenceContainer & newSc, const sequenceContainer & sc, int percent, const string& refName = "", string outFileName = "");
	27
	28
	29	//removes all sequences that are shorter than lowerBound and longer than upperBound
	30	static void removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound);
	31	virtual ~seqeuncesFilter();
	32
	33	};
	34	#endif

+192

-0

libs/phylogeny/sequence.cpp less more

	0	// $Id: sequence.cpp 7627 2010-03-06 21:56:30Z cohenofi $
	1
	2	#include "sequence.h"
	3
	4	#include <algorithm>
	5	using namespace std;
	6
	7
	8	sequence::sequence(const string& str,
	9	const string& name,
	10	const string& remark,
	11	const int id,
	12	const alphabet* inAlph)
	13	: _alphabet(inAlph->clone()), _remark(remark), _name(name),_id(id)
	14	{
	15	for (int k=0; k < str.size() ;k += _alphabet->stringSize()) {
	16	int charId = inAlph->fromChar(str, k);
	17	if (charId == -99) {
	18	string textToPrint = "unable to read sequence: " + name;
	19	errorMsg::reportError(textToPrint);
	20	}
	21
	22	_vec.push_back(charId);
	23	}
	24	}
	25
	26
	27	sequence::sequence(const sequence& other)
	28	: _vec(other._vec), _alphabet(other._alphabet->clone()),
	29	_remark(other._remark), _name(other._name),_id(other._id)
	30	{
	31
	32	}
	33	// convert the other sequence to the alphabet inAlph.
	34	sequence::sequence(const sequence& other,const alphabet* inAlph)
	35	: _alphabet(inAlph->clone()), _remark(other._remark), _name(other._name), _id(other._id)
	36	{
	37	const mulAlphabet* pMulAlphabet;
	38	// if the other.alphabet is amino or nucleotide and the inAlph is indel
	39
	40	if ( (other._alphabet->size() == 20 && inAlph->size() == 2)
	41	\|\| (other._alphabet->size() == 4 && inAlph->size() == 2) )
	42	{
	43	for (int k=0; k < other.seqLen() ;k += other._alphabet->stringSize())
	44	{
	45	int charId = other._vec[k];
	46
	47	if (charId == other._alphabet->gap())
	48	_vec.push_back(inAlph->fromChar("-",0));
	49	else
	50	_vec.push_back(inAlph->fromChar("X",0)); //also converts "." (charId==-3) to "X"
	51	// unknown amino/nucleotide is converted to "X" and not to "?"
	52	}
	53	}
	54
	55	// if the other.alphabet is amino or nucleotide and the inAlph is mulAlphabet
	56	else if ( (other._alphabet->size() == 20 && inAlph->size()%20 == 0)
	57	\|\| (other._alphabet->size() == 4 && inAlph->size()%4 == 0) )
	58	{
	59	for (int k=0; k < other.seqLen() ;++k)
	60	{
	61	int charId = other._vec[k];
	62	string ch = other._alphabet->fromInt(charId);
	63	int mulCharId = _alphabet->fromChar(ch,0);
	64	_vec.push_back(mulCharId);
	65	}
	66	// debug OZ
	67	//cout << "other sequence: " << other << endl;
	68	//cout << "mul sequence " << (*this) << endl;
	69	// end of debug
	70	}
	71	// if the other.alphabet is mulAlphabet and the inAlph is it's baseAlphabet
	72	// (for example, if other.alphabet is a multiplied-amino and inAlph is amino, then the converted sequence
	73	// will have alphabet amino)
	74	else if ( ((inAlph->size() == 20) && (other._alphabet->size()%20 == 0))
	75	\|\| (inAlph->size() == 4) && (other._alphabet->size()%4 == 0))
	76	{
	77	pMulAlphabet=(mulAlphabet*)(other._alphabet);
	78	for (int k=0; k < other.seqLen() ;++k)
	79	{
	80	int mulCharId = other._vec[k];
	81	int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
	82	_vec.push_back(baseId);
	83	}
	84	}
	85
	86	// for gainLoss project - {0,1} in both, hence no conversion needed.
	87	// it should be the same for all cases with same alphabet
	88	else if ( inAlph->size() == other._alphabet->size() )
	89	{
	90	pMulAlphabet=(mulAlphabet*)(other._alphabet);
	91	for (int k=0; k < other.seqLen() ;++k)
	92	{
	93	int mulCharId = other._vec[k];
	94	//int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
	95	_vec.push_back(mulCharId);
	96	}
	97	}
	98	// I tried to implement it using dynamic_cast but it doesn't work...
	99	/*else if
	100	(
	101	(pMulAlphabet = dynamic_cast<const mulAlphabet*>(other._alphabet)) != NULL
	102	)
	103	{
	104	if (pMulAlphabet->getBaseAlphabet()->size() == inAlph->size())
	105	{
	106	for (int k=0; k < other.seqLen() ;++k)
	107	{
	108	int mulCharId = other._vec[k];
	109	int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
	110	_vec.push_back(baseId);
	111	}
	112	}
	113	}*/
	114
	115	// (currently, there is no implimentions for other converts)
	116	else
	117	{
	118	string error = "unable to convert this kind of alphabet";
	119	errorMsg::reportError(error);
	120	}
	121	}
	122
	123	sequence::~sequence()
	124	{
	125	if (_alphabet)
	126	delete _alphabet;
	127	}
	128
	129	void sequence::resize(const int k, const int* val) {
	130	if (val == NULL) {
	131	_vec.resize(k,_alphabet->unknown());
	132	}
	133	else {
	134	_vec.resize(k,*val);
	135	}
	136	}
	137
	138	string sequence::toString() const{
	139	string tmp;
	140	for (int k=0; k < _vec.size() ; ++k ){
	141	tmp+= _alphabet->fromInt(_vec[k]);
	142	}
	143	return tmp;
	144	}
	145
	146	string sequence::toString(const int pos) const{
	147	return _alphabet->fromInt(_vec[pos]);
	148	}
	149
	150	void sequence::addFromString(const string& str) {
	151	for (int k=0; k < str.size() ; k+=_alphabet->stringSize()) {
	152	_vec.push_back(_alphabet->fromChar(str,k));
	153	}
	154	}
	155
	156	class particip {
	157	public:
	158	explicit particip() {}
	159	bool operator()(int i) {
	160	return (i==-1000);
	161	}
	162	};
	163
	164	//removePositions: the poitions to be removed are marked as '1' in posToRemoveVec
	165	//all othehr positions are '0'
	166	void sequence::removePositions(const vector<int> & posToRemoveVec)
	167	{
	168	if(posToRemoveVec.size() != seqLen())
	169	errorMsg::reportError("the input vector must be same size as sequence length. in sequence::removePositions");
	170	for (int k=0; k < posToRemoveVec.size(); ++k) {
	171	if (posToRemoveVec[k] == 1)
	172	_vec[k] = -1000;
	173	}
	174	vector<int>::iterator vec_iter;
	175	vec_iter = remove_if(_vec.begin(),_vec.end(),particip());
	176	_vec.erase(vec_iter,_vec.end()); // pg 1170, primer.
	177	}
	178
	179	//return the number of sites that are specific = not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	180	int sequence::seqLenSpecific() const
	181	{
	182	int res = 0;
	183	for (int pos = 0; pos < seqLen(); ++pos)
	184	{
	185	if (isSpecific(pos))
	186	++res;
	187	}
	188	return res;
	189	}
	190
	191

+142

-0

libs/phylogeny/sequence.h less more

	0	// $Id: sequence.h 7627 2010-03-06 21:56:30Z cohenofi $
	1
	2	#ifndef ___SEQUENCE
	3	#define ___SEQUENCE
	4	#include "definitions.h"
	5	#include "errorMsg.h"
	6	#include "alphabet.h"
	7	#include "mulAlphabet.h"
	8	#include <iostream>
	9	using namespace std;
	10
	11	class sequence {
	12
	13
	14	public:
	15	class Iterator;
	16	friend class Iterator;
	17	class constIterator;
	18	friend class constIterator;
	19
	20	// constructors
	21	explicit sequence(const string& str,
	22	const string& name,
	23	const string& remark,
	24	const int id,
	25	const alphabet* inAlph);
	26
	27	sequence(const sequence& other);
	28	sequence(const sequence& other,const alphabet* inAlph); // convert the other sequence to the alphabet inAlph.
	29	explicit sequence(const alphabet* inAlph) {
	30	if (inAlph == NULL) {
	31	errorMsg::reportError("must give a non Null alphabet when constructing sequences");
	32	}
	33	_alphabet = inAlph->clone();
	34	}
	35	virtual ~sequence();
	36
	37	int seqLen() const {return _vec.size();}
	38	int seqLenSpecific() const; //return the number of sites that are isSpecific()
	39	const string& name() const {return _name;}
	40	void setName(const string & inName) { _name =inName ;}
	41	const int id() const {return _id;}
	42	void setID(const int inID) { _id =inID ;}
	43	const string& remark() const {return _remark;}
	44	void setRemarks(const string & inRemarks) { _remark =inRemarks ;}
	45	string toString() const;
	46	string toString(const int pos) const;
	47
	48	void addFromString(const string& str);
	49	//push_back: add a single characer to the sequence
	50	void push_back(int p) {_vec.push_back(p);}
	51	void resize(const int k, const int* val = NULL);
	52	void removePositions(const vector<int> & parCol);
	53
	54	void setAlphabet(const alphabet* inA) {if (_alphabet) delete _alphabet;
	55	_alphabet=inA->clone();
	56	}
	57	const alphabet* getAlphabet() const {return _alphabet;}
	58
	59	inline sequence& operator=(const sequence& other);
	60	inline sequence& operator+=(const sequence& other);
	61	int& operator[](const int i) {return _vec[i];}
	62	const int& operator[](const int pos) const {return _vec[pos];}
	63
	64	bool isUnknown(const int pos) const {return _vec[pos] == _alphabet->unknown();}
	65
	66	// "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
	67	bool isSpecific(const int pos) const {return _alphabet->isSpecific(_vec[pos]);}
	68
	69	private:
	70	vector<int> _vec;
	71	const alphabet* _alphabet;
	72	string _remark;
	73	string _name;
	74	int _id;
	75
	76
	77	public:
	78	class Iterator {
	79	public:
	80	explicit Iterator(){};
	81	~Iterator(){};
	82	void begin(sequence& seq){_pointer = seq._vec.begin();}
	83	void end(sequence& seq){_pointer = seq._vec.end();}
	84	int& operator* (){return *_pointer;}
	85	int const &operator* () const {return *_pointer;}
	86	void operator ++() {++_pointer;}
	87	void operator --() { --_pointer; }
	88	bool operator != (const Iterator& rhs){return (_pointer != rhs._pointer);}
	89	bool operator == (const Iterator& rhs){return (_pointer == rhs._pointer);}
	90	private:
	91	vector<int>::iterator _pointer;
	92	};
	93
	94	class constIterator {
	95	public:
	96	explicit constIterator(){};
	97	~constIterator(){};
	98	void begin(const sequence& seq){_pointer = seq._vec.begin();}
	99	void end(const sequence& seq){_pointer = seq._vec.end();}
	100	int const &operator* () const {return *_pointer;}
	101	void operator ++(){++_pointer;}
	102	void operator --(){--_pointer;}
	103	bool operator != (const constIterator& rhs) {
	104	return (_pointer != rhs._pointer);
	105	}
	106	bool operator == (const constIterator& rhs) {
	107	return (_pointer == rhs._pointer);
	108	}
	109	private:
	110	vector<int>::const_iterator _pointer;
	111	};
	112
	113
	114	} ;
	115
	116	inline sequence& sequence::operator=(const sequence& other) {
	117	_vec = other._vec;
	118	_alphabet = other._alphabet->clone();
	119	_name=other.name();
	120	_id=other.id();
	121	_remark=other.remark();
	122
	123	return *this;
	124	}
	125
	126	inline sequence& sequence::operator+=(const sequence& other) {
	127	for (int i=0; i <other._vec.size();++i) {
	128	_vec.push_back(other._vec[i]);
	129	}
	130	return *this;
	131	}
	132
	133
	134	inline ostream & operator<<(ostream & out, const sequence &Seq){
	135	out<< Seq.toString();
	136	return out;
	137	}
	138
	139
	140	#endif
	141

+515

-0

libs/phylogeny/sequenceContainer.cpp less more

	0	// $Id: sequenceContainer.cpp 11751 2013-09-12 21:52:03Z cohenofi $
	1	#include "sequenceContainer.h"
	2	#include "logFile.h"
	3	#include "someUtil.h"
	4	#include "fastaFormat.h"
	5
	6	sequenceContainer::sequenceContainer(const sequenceContainer& other,const alphabet *inAlph) :
	7	_generalRemarks(other._generalRemarks),
	8	_id2place(other._id2place)
	9	{
	10	for (int i=0; i < other._seqDataVec.size(); ++i)
	11	_seqDataVec.push_back(sequence(other._seqDataVec[i],inAlph));
	12	}
	13
	14
	15	//if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces
	16	const int sequenceContainer::makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs) {
	17	if (_seqDataVec.size() == 0) return 0;
	18	const int len = _seqDataVec[0].seqLen();
	19	for (int i=1; i < _seqDataVec.size(); ++i) {
	20	if (_seqDataVec[i].seqLen()!=len) {
	21	if (bAugumentShorterSeqs) {
	22	for (int pos = _seqDataVec[i].seqLen(); pos < len; ++pos)
	23	_seqDataVec[i].push_back(getAlphabet()->gap());
	24	}
	25	else {
	26	cerr<<_seqDataVec[i].name()<<" length = "<<_seqDataVec[i].seqLen()<<" "<<_seqDataVec[0].name()<<" length = "" "<<len<<endl;
	27	errorMsg::reportError("not all sequences are of the same lengths");
	28	}
	29	}
	30	}
	31
	32	return len;
	33	}
	34
	35	//void sequenceContainer::addFromsequenceContainer(sequenceContainer& seqToAdd){
	36	// if (_seqDataVec.empty()) { // first sequence to add
	37	// sequenceContainer::taxaIterator tit;
	38	// sequenceContainer::taxaIterator titEND;
	39	// tit.begin(seqToAdd);
	40	// titEND.end(seqToAdd);
	41	// while (tit!=titEND) {
	42	// _seqDataVec.push_back(*tit);
	43	//
	44	// }
	45	// }
	46	// else {// now we are adding sequences to sequences that are already there.
	47	// sequenceContainer::taxaIterator tit;
	48	// sequenceContainer::taxaIterator titEND;
	49	// tit.begin(seqToAdd);
	50	// titEND.end(seqToAdd);
	51	// while (tit!=titEND) {
	52	// for (int i=0; i < _seqDataVec.size(); ++i) {
	53	// if (tit->name() == _seqDataVec[i].name()) {
	54	// _seqDataVec[i]+=(*tit);
	55	// break;
	56	// }
	57	// }
	58	// ++tit;
	59	// }
	60	// }
	61	//}
	62
	63	void sequenceContainer::changeGaps2MissingData() {
	64
	65	for (int i = 0; i < seqLen();++i) {//going over al positions
	66	for (int j = 0; j < _seqDataVec.size();++j) {
	67	if (_seqDataVec[j][i] == -1){
	68	_seqDataVec[j][i]=getAlphabet()->unknown(); // missing data
	69	}
	70	}
	71	}
	72	}
	73
	74	const int sequenceContainer::getId(const string &seqName, bool issueWarningIfNotFound) const {
	75	int k;
	76	for (k=0 ; k < _seqDataVec.size() ; ++k) {
	77	if (_seqDataVec[k].name() == seqName) return (_seqDataVec[k].id());
	78	}
	79	if (k == _seqDataVec.size() && issueWarningIfNotFound) {
	80	// debuggin
	81	LOG(5,<<"seqName = "<<seqName<<endl);
	82	for (k=0 ; k < _seqDataVec.size() ; ++k) {
	83	LOG(5,<<"_seqDataVec["<<k<<"].name() ="<<_seqDataVec[k].name()<<endl);
	84	}
	85	//end dubug
	86	LOG(0,<<seqName<<endl);
	87	vector<string> err;
	88	err.push_back("Could not find a sequence that matches the sequence name ");
	89	err.push_back(seqName);
	90	err.push_back("in function sequenceContainer::getSeqPtr ");
	91	err.push_back(" make sure that names in tree file match name in sequence file ");
	92	errorMsg::reportError(err); // also quit the program
	93	}
	94	return -1;
	95	}
	96
	97	const Vstring sequenceContainer::names() const {
	98	vector<string> res;
	99	for (int i=0; i < _seqDataVec.size(); ++i) {
	100	res.push_back(_seqDataVec[i].name());
	101	}
	102	return res;
	103	}
	104
	105	sequenceContainer::sequenceContainer() {
	106	_id2place.resize(100,-1);
	107	}
	108
	109	sequenceContainer::~sequenceContainer(){}
	110
	111	void sequenceContainer::add(const sequence& inSeq) {
	112	_seqDataVec.push_back(inSeq);
	113	if (_id2place.size() < inSeq.id()+1) {
	114	_id2place.resize(inSeq.id()+100,-1);
	115	}
	116	if (_id2place[inSeq.id()] != -1) {
	117	string err = "Two sequences with the same id - error in function sequenceContainer::add";
	118	err+= "\nThe id of the sequence you are trying to add = ";
	119	err += int2string(inSeq.id());
	120	errorMsg::reportError(err);
	121	}
	122	_id2place[inSeq.id()] = _seqDataVec.size()-1;
	123	}
	124
	125
	126	//given a sequence id the sequence is removed from the sequence container
	127	//and the vector _id2place is updated.
	128	void sequenceContainer::remove(const int idSeq) {
	129	if (idSeq > _id2place.size()-1 \|\| idSeq<0)
	130	errorMsg::reportError("the id of sequence is not mapped by id2place in function sequenceContainer::remove");
	131	int place = _id2place[idSeq];
	132
	133	if (place < 0)
	134	errorMsg::reportError("cannot find place of the id in the sequence container in function sequenceContainer::remove");
	135	_seqDataVec.erase(_seqDataVec.begin()+place);
	136
	137	_id2place[idSeq] = -1;
	138	for (int i=place;i<_seqDataVec.size();i++) {
	139	int id = _seqDataVec[i].id();
	140	_id2place[id]--;
	141	}
	142	}
	143	// remove all sequences from the sequence container
	144	void sequenceContainer::removeAll(){
	145	Vint ids2remove(numberOfSeqs());
	146	for(int i= 0; i<numberOfSeqs() ;i++){
	147	ids2remove[i] =placeToId(i);
	148	}
	149	for(int i= 0; i<ids2remove.size() ;i++){
	150	remove(ids2remove[i]);
	151	}
	152	}
	153
	154
	155
	156	//removes identical sequences in the sequence container.
	157	void sequenceContainer::removeIdenticalSequences(){
	158	bool exist;
	159	for (int i=1;i<_seqDataVec.size();i++){
	160	sequence sq1 = _seqDataVec[i];
	161	for (int j=0;j<i;j++){
	162	sequence sq2 = _seqDataVec[j];
	163	exist = true;
	164	if (sq1.seqLen() != sq2.seqLen()) continue;
	165	for (int pos=0;pos<sq1.seqLen();pos++){
	166	if (sq1[pos] != sq2[pos]){
	167	exist = false;
	168	break;
	169	}
	170	}
	171	if (exist) {
	172	remove(sq1.id());
	173	i--;
	174	break;
	175
	176	}
	177
	178	}
	179
	180	}
	181
	182	}
	183
	184	void sequenceContainer::removeGapPositions(){
	185	vector<int> posToRemove(seqLen(),0);
	186	bool gapCol;
	187	int i,j;
	188	for (i = 0; i < seqLen();++i) {//going over al positions
	189	gapCol = false;
	190	for (j = 0; j < _seqDataVec.size();++j) {
	191	if (_seqDataVec[j][i] == -1) posToRemove[i] = 1;
	192	}
	193	}
	194	removePositions(posToRemove);
	195	}
	196	void sequenceContainer::removeGapPositionsAllSeqs(){
	197	vector<int> posToRemove(seqLen(),1);
	198	bool gapCol;
	199	int i,j;
	200	for (i = 0; i < seqLen();++i) {//going over al positions
	201	gapCol = false;
	202	for (j = 0; j < _seqDataVec.size();++j) {
	203	if (_seqDataVec[j][i] != -1) posToRemove[i] = 0;
	204	}
	205	}
	206	removePositions(posToRemove);
	207	}
	208	void sequenceContainer::removeGapPositionsAccordingToAReferenceSeq(const string & seqName){
	209	int idOfRefSeq = getId(seqName,true);
	210	vector<int> posToRemove(seqLen(),0);
	211	int i;
	212	for (i = 0; i < seqLen();++i) {//going over al positions
	213	if (_seqDataVec[idOfRefSeq][i] == -1) posToRemove[i] = 1;
	214	}
	215	removePositions(posToRemove);
	216	}
	217
	218	void sequenceContainer::removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName){
	219	int idOfRefSeq = getId(seqName,true);
	220	vector<int> posToRemove(seqLen(),0);
	221	int i;
	222	for (i = 0; i < seqLen();++i) {//going over al positions
	223	if (_seqDataVec[idOfRefSeq][i] == getAlphabet()->unknown()) posToRemove[i] = 1;
	224	}
	225	removePositions(posToRemove);
	226	}
	227
	228	//removePositions: the positions to be removed are marked as '1' in posToRemoveVec
	229	//all othehr positions are '0'
	230	void sequenceContainer::removePositions(const Vint & posToRemoveVec) {
	231	for (int z = 0; z < _seqDataVec.size();++z) {
	232	_seqDataVec[z].removePositions(posToRemoveVec);
	233	}
	234	}
	235
	236
	237	sequenceContainer sequenceContainer::getSubSeq(const int startPos, const int endPos) {
	238	sequenceContainer subSeq(*this);
	239
	240	vector<int> posToRemove(seqLen(),true);
	241	for (int i = startPos; i <= endPos;++i) {//going over al positions
	242	posToRemove[i] = false;
	243	}
	244	subSeq.removePositions(posToRemove);
	245
	246	return subSeq;
	247	}
	248
	249
	250	void sequenceContainer::changeDotsToGoodCharacters() {
	251	for (int i = 0; i < seqLen();++i) {//going over al positions
	252	int charInFirstSeq = _seqDataVec[0][i];
	253	if (charInFirstSeq == -3) {
	254	LOG(5,<<" position is "<<i<<endl);
	255	errorMsg::reportError(" the first line contains dots ");
	256	}
	257	for (int j = 1; j < _seqDataVec.size();++j) {
	258	if ((_seqDataVec[j][i] == -3)) {
	259	_seqDataVec[j][i] = charInFirstSeq; // missing data
	260	}
	261	}
	262	}
	263	}
	264
	265	int sequenceContainer::numberOfSequencesWithoutGaps (const int pos) const {
	266	int numOfNonCharPos = numberOfSeqs();
	267	for (int i=0; i < numberOfSeqs(); ++i) {
	268	if ((*this)[i][pos] <0) --numOfNonCharPos;
	269	}
	270	return numOfNonCharPos;
	271	}
	272
	273	int sequenceContainer::numberOfSequencesWithoutUnknowns (const int pos) const {
	274	int numOfNonCharPos = numberOfSeqs();
	275	int unknown = getAlphabet()->unknown();
	276	for (int i=0; i < numberOfSeqs(); ++i) {
	277	if ((*this)[i][pos] == unknown )
	278	--numOfNonCharPos;
	279	}
	280	return numOfNonCharPos;
	281	}
	282
	283	bool sequenceContainer::isInvariable(const int pos) const {
	284	int charFound = getAlphabet()->unknown();
	285	for (int i=0; i < numberOfSeqs(); ++i) {
	286	if ((*this)[i][pos] >= 0) {
	287	if (charFound == getAlphabet()->unknown())
	288	charFound = (*this)[i][pos];
	289	else if (charFound != (*this)[i][pos])
	290	return false;
	291	}
	292	}
	293	return true;
	294	}
	295
	296	int sequenceContainer::getInvariablePosNum() const {
	297	int sum = 0;
	298	for (int pos = 0; pos < seqLen(); ++pos) {
	299	if (isInvariable(pos))
	300	++sum;
	301	}
	302	return sum;
	303	}
	304
	305	// new func for gainLoss project
	306	void sequenceContainer::startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes, const int minNumOfZeros)
	307	{
	308	//if(minNumOfOnes==0 && minNumOfZeros==0)
	309	// return;
	310
	311	string str0 = "0";
	312	string str1 = "1";
	313	vector<string> strV;
	314	strV.resize(sc.numberOfSeqs());
	315	string remark ="";
	316	switch (minNumOfOnes) {
	317	case (1) :
	318	for(int i=0; i<sc.numberOfSeqs();i++){
	319	// add patterns of 0 ones
	320	strV[i] = str0;
	321	}
	322	break;
	323	case (2) :
	324	for(int i=0; i<sc.numberOfSeqs();i++){
	325	// add patterns of 0 ones
	326	strV[i] = str0;
	327	}
	328	for(int i=0; i<sc.numberOfSeqs();i++){
	329	// add patterns of only 1 ones
	330	for(int j=0; j<sc.numberOfSeqs(); j++){
	331	if(j==i){
	332	strV[i]+=str1;
	333	}
	334	else{
	335	strV[i]+=str0;
	336	}
	337	}
	338	}
	339	break;
	340	case (3) :
	341	for(int i=0; i<sc.numberOfSeqs();i++){
	342	// add patterns of 0 ones
	343	strV[i] = str0;
	344	}
	345	for(int i=0; i<sc.numberOfSeqs();i++){
	346	// add patterns of only 1 ones
	347	for(int j=0; j<sc.numberOfSeqs(); j++){
	348	if(j==i){
	349	strV[i]+=str1;
	350	}
	351	else{
	352	strV[i]+=str0;
	353	}
	354	}
	355	}
	356	// add patterns of only 2 ones
	357	for(int onePosition1=0; onePosition1<sc.numberOfSeqs(); onePosition1++){
	358	for(int onePosition2=0; onePosition2<sc.numberOfSeqs(); onePosition2++){
	359	if(onePosition2<=onePosition1)
	360	continue;
	361	for(int i=0; i<sc.numberOfSeqs();i++){
	362	if(i==onePosition1 \|\| i==onePosition2){
	363	strV[i]+=str1;
	364	}
	365	else{
	366	strV[i]+=str0;
	367	}
	368	}
	369	}
	370	}
	371	break;
	372	}
	373	switch (minNumOfZeros) {
	374	case (0) :
	375	break;
	376	case (1) :
	377	for(int i=0; i<sc.numberOfSeqs();i++){
	378	// add patterns of 0 zeroes (only '1')
	379	strV[i] += str1;
	380	}
	381	break;
	382	}
	383	//////////////////////////////////////////////////////////////////////////
	384
	385	for(int i=0; i<sc.numberOfSeqs();i++){
	386	//cout<<strV[i]<<endl;
	387	this->add(sequence(strV[i],sc.name(i),remark,i,&alph));
	388	}
	389	}
	390
	391
	392	//concatenate two sequecneContainers.
	393	//The sequence names must be identical in the two containers.
	394	//returns false if: (1) A sequence_name in one of the containers does not match any sequence_name in the other container.
	395	void sequenceContainer::concatenate(sequenceContainer& other) {
	396	if (other.numberOfSeqs() != numberOfSeqs()){
	397	string msg = "Not the same number of taxa, can't concatenate: other="+ int2string(other.numberOfSeqs()) + " this=" + int2string( numberOfSeqs()) +"\n";
	398	errorMsg::reportError(msg);
	399	return;
	400	}
	401	for (sequenceContainer::taxaIterator itThis=(this).taxaBegin();itThis!=(this).taxaEnd();++itThis) {
	402	//for(int i = 0; i < numberOfSeqs(); ++i) {
	403	bool bFound = false;
	404	//out << (*this)[i].name()<<endl;
	405
	406	for (sequenceContainer::taxaIterator itOther=other.taxaBegin();itOther!=other.taxaEnd();++itOther) {
	407	//for (int j = 0; j < other.numberOfSeqs(); ++j) {
	408	//if((*this)[i].name().compare(other[j].name()) == 0)
	409	if(itThis->name().compare(itOther->name()) == 0)
	410	{
	411	//(*this)[i] += other[j]; // was i ?????
	412	(itThis) += (itOther);
	413	bFound = true;
	414	break;
	415	}
	416	}
	417	if (bFound == false)
	418	{
	419	string msg = "Can't find sequence name in the second MSA: " +itThis->name();
	420	errorMsg::reportError(msg);
	421	}
	422	}
	423	}
	424	//////////////////////////////////////////////////////////////////////////
	425	const bool sequenceContainer::operator==(const sequenceContainer& sq) const {
	426	if (_seqDataVec.size() != sq._seqDataVec.size()) // not the same number of sequences in sequenceContainer
	427	return false;
	428	const int numberOfSeqs = _seqDataVec.size();
	429	const int len = _seqDataVec[0].seqLen();
	430	for (int i=0; i < numberOfSeqs; ++i) {
	431	string nameI = name(i);
	432	int idI = getId(nameI);
	433	int idSq = sq.getId(nameI);
	434	if (_seqDataVec[idI].seqLen()!=sq._seqDataVec[idSq].seqLen())
	435	return false;
	436	for (int pos = 0; pos < len; ++pos)
	437	{
	438	if (_seqDataVec[idI][pos]!=sq._seqDataVec[idSq][pos])
	439	return false;
	440	}
	441	}
	442	return true;
	443	}
	444
	445
	446
	447	//////////////////////////////////////////////////////////////////////////
	448	int sequenceContainer::getNumOfOccurancesPerPos(const int pos, const char charId){
	449	int numOfOccurancesPerPos = 0;
	450	const int numberOfSeqs = _seqDataVec.size();
	451	const int len = _seqDataVec[0].seqLen();
	452
	453	for (int i=0; i < numberOfSeqs; ++i) {
	454	string nameI = name(i);
	455	int idI = getId(nameI);
	456	if (_seqDataVec[idI][pos]==charId)
	457	numOfOccurancesPerPos++;
	458	}
	459	return numOfOccurancesPerPos;
	460	}
	461
	462	//////////////////////////////////////////////////////////////////////////
	463	vector<string> sequenceContainer::getSeqNamesThatMatchPos(const int pos, const char charId){
	464	vector<string> SeqNamesThatMatchPos;
	465	const int numberOfSeqs = _seqDataVec.size();
	466	const int len = _seqDataVec[0].seqLen();
	467
	468	for (int i=0; i < numberOfSeqs; ++i) {
	469	string nameI = name(i);
	470	int idI = getId(nameI);
	471	if (_seqDataVec[idI][pos]==charId)
	472	SeqNamesThatMatchPos.push_back(nameI);
	473	}
	474	return SeqNamesThatMatchPos;
	475	}
	476
	477	//////////////////////////////////////////////////////////////////////////
	478	// added counts for unKnown data
	479	const vector<int> sequenceContainer::getAlphabetDistribution(bool isCountUnknown) const {
	480	vector<int> alphabetVec;
	481	int alphSize = alphabetSize()+1; //unKnown
	482	int UnknownVal = getAlphabet()->unknown();
	483	alphabetVec.resize( alphSize);
	484	const int numberOfSeqs = _seqDataVec.size();
	485	const int len = _seqDataVec[0].seqLen();
	486	for (int i=0; i < numberOfSeqs; ++i) {
	487	for (int pos = 0; pos < len; ++pos) {
	488	for(int alph = 0 ; alph<alphSize ;++alph){
	489	if ( _seqDataVec[i][pos] ==alph)
	490	++alphabetVec[alph];
	491	else if( _seqDataVec[i][pos] ==UnknownVal)
	492	++alphabetVec[alph];
	493	}
	494	}
	495	}
	496	return alphabetVec;
	497	}
	498
	499	//////////////////////////////////////////////////////////////////////////
	500	const vector<int> sequenceContainer::getAlphabetDistribution(int pos,bool isCountUnknown) const {
	501	vector<int> alphabetVec;
	502	alphabetVec.resize( alphabetSize());
	503	const int numberOfSeqs = _seqDataVec.size();
	504	for (int i=0; i < numberOfSeqs; ++i) {
	505	for(int alph = 0 ; alph<alphabetSize() ;++alph){
	506	if ( _seqDataVec[i][pos] ==alph)
	507	++alphabetVec[alph];
	508	}
	509	}
	510	return alphabetVec;
	511	}
	512
	513
	514

+183

-0

libs/phylogeny/sequenceContainer.h less more

	0	// $Id: sequenceContainer.h 11662 2013-07-17 08:01:17Z cohenofi $
	1
	2	#ifndef ___SEQUENCE_CONTAINER
	3	#define ___SEQUENCE_CONTAINER
	4	#include "definitions.h"
	5	#include "sequence.h"
	6	#include "gainLossAlphabet.h"
	7
	8	class sequenceContainer {
	9	public:
	10
	11	class taxaIterator;
	12	friend class taxaIterator;
	13	class constTaxaIterator;
	14	friend class constTaxaIterator;
	15
	16	//------------------------------------------------------------
	17	//constructors:
	18	explicit sequenceContainer();
	19	sequenceContainer(const sequenceContainer& other,const alphabet *inAlph);
	20	virtual ~sequenceContainer();
	21
	22	//questions only:
	23	const int seqLen() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].seqLen();}
	24	const int numberOfSeqs() const {return _seqDataVec.size();}
	25	const int alphabetSize() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].getAlphabet()->size();}
	26	const vector<string>& getGeneralRemarks() const {return _generalRemarks;}
	27	const int makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs = false); //if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces
	28	const int getId(const string &seqName, bool issueWarninInNotFound=true) const;//return -1 if not found...
	29	sequence& operator[](const int id) {return _seqDataVec[_id2place[id]];} // get the ID of the sequence. Return the sequence itself.
	30	const sequence& operator[](const int id) const {return _seqDataVec[_id2place[id]];}
	31	const bool operator==(const sequenceContainer& sq) const;
	32	const sequence& getSeqDirectFromDataVec(int i){return _seqDataVec[i];}
	33
	34
	35	const Vstring names() const; // return a vector<string> of the names of all the sequences.
	36	const string& name(const int id) const {return _seqDataVec[_id2place[id]].name();};
	37	const alphabet* getAlphabet() const {return _seqDataVec[0].getAlphabet();}
	38	const vector<int> getAlphabetDistribution(bool isCountUnknown=false) const;
	39	vector<string> getSeqNamesThatMatchPos(const int pos, const char charId);
	40	const vector<int> getAlphabetDistribution(int pos,bool isCountUnknown=false) const;
	41
	42	//returns the number of positions that are invariable (all seqs are identical
	43	int getInvariablePosNum() const;
	44	bool isInvariable(const int pos) const;
	45	// computed the number of sequences without gaps at a specific position
	46	// for example, if the multiple sequence alignment is
	47	// AT-
	48	// AG-
	49	// A-M
	50	// numberOfSequencesWithoutGaps(0) = 3
	51	// numberOfSequencesWithoutGaps(1) = 2
	52	// numberOfSequencesWithoutGaps(2) = 1
	53	int numberOfSequencesWithoutGaps(const int pos) const;
	54	int numberOfSequencesWithoutUnknowns(const int pos) const;
	55
	56
	57
	58
	59	//make changes:
	60	void resize(int t,const alphabet* inAlph) {
	61	if (inAlph == NULL) {
	62	errorMsg::reportError("cannot resize when the alphabet is unknown");
	63	}
	64	sequence s(inAlph);
	65	_seqDataVec.resize(t,s);
	66	}
	67	void add(const sequence& inSeq);
	68	void remove(const int idSeq);
	69	void removeAll();
	70
	71	void removeIdenticalSequences();
	72	int placeToId(const int place) const {return _seqDataVec[place].id();}; //get place in the vector and return the id of the sequence
	73	void addGeneralRemark(const string& inRemark) {_generalRemarks.push_back(inRemark);}
	74	void changeGaps2MissingData();
	75	//removePositions: the positions to be removed are marked as '1' in posToRemoveVec
	76	//all other positions are '0'
	77	void removePositions(const Vint & posToRemoveVec);
	78	sequenceContainer getSubSeq(const int startPos, const int endPos);
	79	int getNumOfOccurancesPerPos(const int pos, const char charId);
	80	void removeGapPositions();
	81	void removeGapPositionsAllSeqs();
	82	void removeGapPositionsAccordingToAReferenceSeq(const string & seqName);
	83	void changeDotsToGoodCharacters();
	84	void removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName);
	85	void concatenate(sequenceContainer& other);
	86	void startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes=1, const int minNumOfZeros=0);
	87
	88
	89	public:
	90	sequence::Iterator begin(const int id){//iterface to sequence iterator
	91	sequence::Iterator temp;
	92	temp.begin(_seqDataVec[id]);
	93	return temp;
	94	}
	95	sequence::Iterator end(const int id){//iterface to sequence iterator
	96	sequence::Iterator temp;
	97	temp.end(_seqDataVec[id]);
	98	return temp;
	99	}
	100
	101	class taxaIterator {
	102	public:
	103	explicit taxaIterator(){};
	104	~taxaIterator(){};
	105	void begin(sequenceContainer & inSeqCont){
	106	_pointer = inSeqCont._seqDataVec.begin();
	107	}
	108	void end(sequenceContainer & inSeqCont){
	109	_pointer = inSeqCont._seqDataVec.end();
	110	}
	111	sequence& operator* () {return *_pointer;}
	112	sequence const & operator* () const {return *_pointer;}
	113	sequence * operator-> () {return &*_pointer;} //MATAN- CHECK!!!
	114	sequence const * operator-> () const {return &* _pointer;} // MATAN - CHECK!!!
	115
	116	void operator ++() {++_pointer;}
	117	void operator --() { --_pointer; }
	118	bool operator != (const taxaIterator& rhs){return (_pointer != rhs._pointer);}
	119	bool operator == (const taxaIterator& rhs){return (_pointer == rhs._pointer);}
	120	private:
	121	vector<sequence>::iterator _pointer;
	122	};//end if class taxaIterator
	123
	124
	125	class constTaxaIterator {
	126	public:
	127	explicit constTaxaIterator(){};
	128	~constTaxaIterator(){};
	129	void begin(const sequenceContainer & inSeqCont){
	130	_pointer = inSeqCont._seqDataVec.begin();
	131	}
	132	void end(const sequenceContainer & inSeqCont){
	133	_pointer = inSeqCont._seqDataVec.end();
	134	}
	135	sequence const & operator() const {return _pointer;}
	136	sequence const * operator->() const {return &*_pointer;}// MATAN - CHECK!!!
	137
	138	void operator ++() {++_pointer;}
	139	void operator --() { --_pointer; }
	140	bool operator != (const constTaxaIterator& rhs) {
	141	return (_pointer != rhs._pointer);
	142	}
	143
	144	bool operator == (const constTaxaIterator& rhs) {
	145	return (_pointer == rhs._pointer);
	146	}
	147	private:
	148	vector<sequence>::const_iterator _pointer;
	149	};
	150
	151	public: // interfaces to iterators
	152	taxaIterator taxaBegin(const int id=0){// interface to taxaIterator
	153	taxaIterator temp;
	154	temp.begin(*this);
	155	return temp;
	156	}
	157
	158	taxaIterator taxaEnd(){// interface to taxaIterator
	159	taxaIterator temp;
	160	temp.end(*this);
	161	return temp;
	162	}
	163
	164	constTaxaIterator constTaxaBegin() const{ //interface to const taxaIter
	165	constTaxaIterator temp;
	166	temp.begin(*this);
	167	return temp;
	168	}
	169	constTaxaIterator constTaxaEnd() const{
	170	constTaxaIterator temp;
	171	temp.end(*this);
	172	return temp;
	173	}
	174
	175	private:
	176	vector<sequence> _seqDataVec;
	177	vector<string> _generalRemarks;
	178	vector<int> _id2place;
	179	};
	180
	181	#endif
	182

+295

-0

libs/phylogeny/simulateCodonsJumps.cpp less more

	0	#include "simulateCodonsJumps.h"
	1	#include "talRandom.h"
	2	#include "someUtil.h"
	3	#include <algorithm>
	4
	5
	6	simulateCodonsJumps::simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
	7	: simulateJumpsAbstract(inTree,sp,alphabetSize)
	8	{
	9	}
	10
	11	simulateCodonsJumps::~simulateCodonsJumps()
	12	{
	13	}
	14
	15	void simulateCodonsJumps::init()
	16	{
	17	//init the vector of waiting times.
	18	_waitingTimeParams.clear();
	19	_waitingTimeParams.resize(_alphabetSize);
	20	int i, j;
	21	for (i = 0; i < _alphabetSize; ++i)
	22	{
	23	_waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
	24
	25	}
	26
	27	//init _jumpProbs.
	28	_jumpProbs.clear();
	29	_jumpProbs.resize(_alphabetSize);
	30	for (i = 0; i < _alphabetSize; ++i)
	31	{
	32	MDOUBLE sum = 0.0;
	33	_jumpProbs[i].resize(_alphabetSize);
	34	for (j = 0; j < _alphabetSize; ++j)
	35	{
	36	if (i == j)
	37	_jumpProbs[i][j] = 0.0;
	38	else
	39	{
	40	_jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
	41	}
	42	sum += _jumpProbs[i][j];
	43	}
	44	if (! DEQUAL(sum, 1.0,0.001)){
	45	string err = "error in simulateCodonsJumps::init(): sum probabilities is not 1 and equal to ";
	46	err+=double2string(sum);
	47	errorMsg::reportError(err);
	48	}
	49	}
	50
	51	//init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order
	52	_tree.getAllNodes(_orderNodesVec, _tree.getRoot());
	53	sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist);
	54
	55	_nodes2JumpsExp.clear();
	56	_nodes2JumpsProb.clear();
	57	//
	58	vector<pair<MDOUBLE,MDOUBLE> > zeroCombinedStates2jumps;
	59	for(i = 0;i < getCombinedAlphabetSize();++i){
	60	pair<MDOUBLE,MDOUBLE> syn_and_nonSyn_jumps(0.0,0.0);
	61	zeroCombinedStates2jumps.push_back(syn_and_nonSyn_jumps);
	62	}
	63	Vdouble zeroVector(getCombinedAlphabetSize(),0.0);
	64	for (i = 0; i < _orderNodesVec.size(); ++i)
	65	{
	66	string nodeName = _orderNodesVec[i]->name();
	67	_nodes2JumpsExp[nodeName] = zeroCombinedStates2jumps;
	68	_nodes2JumpsProb[nodeName] = zeroCombinedStates2jumps;
	69	for (j=0; j<getCombinedAlphabetSize();++j)
	70	_totalTerminals[nodeName]=zeroVector;
	71	}
	72	}
	73
	74
	75	//simulate jumps starting from startState. The simulation continue until the maxTime is reached. In each step:
	76	//1. Draw a new waiting time.
	77	//2. Go over all branches shorter than nextJumpTime and update their jumpsNum between the states that were switched
	78	// (these branches will not be affected by the current jump):
	79	// however they might have been affected by the previous jump
	80	//3. Draw a new state
	81	void simulateCodonsJumps::runOneIter(int startState)
	82	{
	83	codonUtility::replacementType substitutionType = codonUtility::sameCodon;
	84	MDOUBLE maxTime = _orderNodesVec[_orderNodesVec.size()-1]->dis2father();
	85	MDOUBLE totalTimeTillJump = 0.0;
	86	int curState = startState;
	87	int smallestBranchNotUpdatedSofar = 0;
	88	vector<pair<int, int> > jumpsSoFar(0);
	89	while (totalTimeTillJump < maxTime)
	90	{
	91	MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState];
	92	MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime);
	93	//go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp
	94	//with the jumps that occured between the terminal Ids: startState-->curState
	95	for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b)
	96	{
	97	if (_orderNodesVec[b]->dis2father() > nextJumpTime)
	98	{
	99	smallestBranchNotUpdatedSofar = b;
	100	break;
	101	}
	102	string nodeName = _orderNodesVec[b]->name();
	103	//update all the jumps that occured along the branch
	104	int terminalState = getCombinedState(startState, curState);
	105	_totalTerminals[nodeName][terminalState]++;
	106	//update all longer branches with all jumps that occurred till now
	107	/* vector<bool> jumpsSoFarBool(getCombinedAlphabetSize(),false);*/
	108	// There's no need for the jumpsSoFarBool vector because we want to count
	109	// the number of syn subs and not just to note that there has been at least 1
	110	// The final probability is calculated in computeExpectationsAndPosterior
	111	for (int j = 0; j < jumpsSoFar.size(); ++j)
	112	{
	113	substitutionType = codonUtility::codonReplacement(jumpsSoFar[j].first,jumpsSoFar[j].second);
	114	/* int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second);
	115	jumpsSoFarBool[combinedJumpState]=true;*/
	116	if(substitutionType == codonUtility::synonymous)
	117	{
	118	_nodes2JumpsExp[nodeName][terminalState].first += 1;
	119	_nodes2JumpsProb[nodeName][terminalState].first += 1;
	120	}
	121	else if(substitutionType == codonUtility::non_synonymous)
	122	{
	123	_nodes2JumpsExp[nodeName][terminalState].second += 1;
	124	_nodes2JumpsProb[nodeName][terminalState].second += 1;
	125	}
	126	}
	127	/*
	128	for (int combined=0;combined<jumpsSoFarBool.size();++combined)
	129	{
	130	if (jumpsSoFarBool[combined]){
	131	if(substitutionType == codonUtility::synonymous)
	132	_nodes2JumpsProb[nodeName][terminalState].first += 1;
	133	else if(substitutionType == codonUtility::non_synonymous)
	134	_nodes2JumpsProb[nodeName][terminalState].second += 1;
	135	}
	136	}
	137	*/
	138	}
	139	totalTimeTillJump = nextJumpTime;
	140	int nextState = giveRandomState(_alphabetSize,curState,_jumpProbs);
	141	jumpsSoFar.push_back(pair<int,int>(curState, nextState));
	142	curState = nextState;
	143	}
	144	}
	145
	146
	147	void simulateCodonsJumps::computeExpectationsAndPosterior(){
	148	//scale _nodes2JumpsExp so it will represent expectations
	149	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterExp = _nodes2JumpsExp.begin();
	150	for (; iterExp != _nodes2JumpsExp.end(); ++iterExp)
	151	{//each node
	152	string nodeName = iterExp->first;
	153	for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState)
	154	{
	155	MDOUBLE totalJumps4currentNodeAndTermState = 0;
	156	map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
	157	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterProb = _nodes2JumpsProb.find(nodeName);
	158	if ((iterTerm==_totalTerminals.end()) \|\| (iterProb==_nodes2JumpsProb.end()))
	159	{
	160	errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map");
	161	}
	162
	163	if (iterTerm->second[termState]==0){ //never reached these terminal states
	164	if((iterExp->second[termState].first == 0)&&(iterExp->second[termState].second == 0)&&
	165	((iterProb->second[termState].first == 0)&&(iterProb->second[termState].second == 0)))
	166	{
	167	int startID = getStartId(termState);
	168	int endID = getEndId(termState);
	169	if (startID != endID) // if the terminal states are different there was at least one startID->endID jump
	170	{
	171	codonUtility::replacementType substitutionType = codonUtility::codonReplacement(startID,endID);
	172	if(substitutionType == codonUtility::synonymous)
	173	{
	174	iterExp->second[termState].first = 1;
	175	iterProb->second[termState].first = 1;
	176	}
	177	else if(substitutionType == codonUtility::non_synonymous)
	178	{
	179	iterExp->second[termState].second = 1;
	180	iterProb->second[termState].second = 1;
	181	}
	182	totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second));
	183	if(totalJumps4currentNodeAndTermState)
	184	{
	185	(iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState;
	186	(iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState;
	187	}
	188	}
	189	continue;
	190	}
	191
	192	else
	193	errorMsg::reportError("error in simulateCodonJumps::runSimulation, 0 times reached termState but non-zero for jumpCount");
	194	}
	195	(iterExp->second[termState].first) /= iterTerm->second[termState];
	196	(iterExp->second[termState].second) /= iterTerm->second[termState];
	197
	198	totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second));
	199	if(totalJumps4currentNodeAndTermState)
	200	{
	201	(iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState;
	202	(iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState;
	203	}
	204	}
	205	}
	206	}
	207
	208
	209	MDOUBLE simulateCodonsJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId)
	210	{
	211	//map <string, VVdouble>::iterator pos;//Old
	212	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	213	if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
	214	{
	215	string err="error in simulateCodonJumps::getExpectation: cannot find node "+nodeName;
	216	errorMsg::reportError(err);
	217	}
	218	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	219	//Old
	220	//int combinedJumpState = getCombinedState(fromId, toId);
	221	//return (pos->second[combinedTerminalState][combinedJumpState]);
	222
	223	MDOUBLE expectation=0.0;
	224	if(codonUtility::codonReplacement(fromId,toId) == 1)
	225	expectation = pos->second[combinedTerminalState].first;
	226	else if(codonUtility::codonReplacement(fromId,toId) == 2)
	227	expectation = pos->second[combinedTerminalState].second;
	228	return (expectation);
	229	}
	230
	231	MDOUBLE simulateCodonsJumps::getExpectation(
	232	const string& nodeName,
	233	int terminalStart,
	234	int terminalEnd,
	235	codonUtility::replacementType substitutionType)
	236	{
	237	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	238	if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
	239	{
	240	string err="error in simulateCodonJumps::getExpectation: cannot find node "+nodeName;
	241	errorMsg::reportError(err);
	242	}
	243	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	244	MDOUBLE expectation=0.0;
	245	if(substitutionType == codonUtility::synonymous)
	246	expectation = pos->second[combinedTerminalState].first;
	247	else if(substitutionType == codonUtility::non_synonymous)
	248	expectation = pos->second[combinedTerminalState].second;
	249
	250	return (expectation);
	251	}
	252
	253
	254	MDOUBLE simulateCodonsJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){
	255	//map <string, VVdouble>::iterator pos;
	256	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	257	if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
	258	{
	259	string err="error in simulateCodonJumps::getProb: cannot find node "+nodeName;
	260	errorMsg::reportError(err);
	261	}
	262	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	263	//Old
	264	//int combinedJumpState = getCombinedState(fromId, toId);
	265	//return (pos->second[combinedTerminalState][combinedJumpState]);
	266
	267	MDOUBLE prob=0.0;
	268	if(codonUtility::codonReplacement(fromId,toId) == 1)
	269	prob = pos->second[combinedTerminalState].first;
	270	else if(codonUtility::codonReplacement(fromId,toId) == 2)
	271	prob = pos->second[combinedTerminalState].second;
	272	return (prob);
	273	}
	274
	275	MDOUBLE simulateCodonsJumps::getProb(
	276	const string& nodeName,
	277	int terminalStart,
	278	int terminalEnd,
	279	codonUtility::replacementType substitutionType)
	280	{
	281	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	282	if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
	283	{
	284	string err="error in simulateCodonJumps::getProb: cannot find node "+nodeName;
	285	errorMsg::reportError(err);
	286	}
	287	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	288	MDOUBLE prob=0.0;
	289	if(substitutionType == codonUtility::synonymous)
	290	prob = pos->second[combinedTerminalState].first;
	291	else if(substitutionType == codonUtility::non_synonymous)
	292	prob = pos->second[combinedTerminalState].second;
	293	return (prob);
	294	}

+51

-0

libs/phylogeny/simulateCodonsJumps.h less more

	0	#ifndef ___SIMULATE_CODONS_JUMPS__
	1	#define ___SIMULATE_CODONS_JUMPS__
	2
	3	#include "simulateJumpsAbstract.h"
	4	#include "codon.h"
	5	using namespace std;
	6
	7	/******************************************************************
	8	This class implements simulateJumpsAbstract for small alphabets: (tested so far up to 3)
	9	*******************************************************************/
	10
	11	class simulateCodonsJumps:public simulateJumpsAbstract {
	12	public:
	13	simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
	14	virtual ~simulateCodonsJumps();
	15
	16	//for a branch length specified by a nodeName:
	17	//give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
	18	//in which the starting character is terminalStart and the terminal character is terminalEnd
	19	MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	20	MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType);
	21	//same as above, except here we return the probability of a jump from fromId to toId given
	22	//terminal states terminalStart, terminalEnd in this branch
	23	MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	24	MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType);
	25
	26	private:
	27	void init();
	28	void runOneIter(int state);
	29	void computeExpectationsAndPosterior();
	30
	31	private:
	32
	33	//_node2Jumps: maps a node name (which specify a branch length) to
	34	//the expected number of synonymous and nonsynonymous jumps between any two characters along the branch leading from the father to this node
	35	//given the terminal characters of this branch.
	36	//We use a "combined alphabet" to make access easier. see getCombinedState() for details
	37	//The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively.
	38
	39	map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsExp;
	40
	41	//_node2JumpsProb: maps a node name (which specify a branch length) to
	42	//the probability of a synonymous and non-synonymous jump between any two characters along the branch leading from the father to this node
	43	//given the terminal characters of this branch.
	44	//We use a "combined alphabet" to make access easier. see getCombinedState() for details
	45	//The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively
	46	map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsProb;
	47
	48	};
	49
	50	#endif

+195

-0

libs/phylogeny/simulateJumps.cpp less more

	0	#include "simulateJumps.h"
	1	#include "talRandom.h"
	2	#include "someUtil.h"
	3	#include <algorithm>
	4
	5
	6	simulateJumps::simulateJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
	7	: simulateJumpsAbstract(inTree,sp,alphabetSize)
	8	{
	9	}
	10
	11	simulateJumps::~simulateJumps()
	12	{
	13	}
	14
	15	void simulateJumps::init()
	16	{
	17
	18	//init the vector of waiting times.
	19	_waitingTimeParams.clear();
	20	_waitingTimeParams.resize(_alphabetSize);
	21
	22	int i, j;
	23	for (i = 0; i < _alphabetSize; ++i)
	24	{
	25	_waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
	26
	27	}
	28
	29	//init _jumpProbs.
	30	//_jumpProbs[i][j] = Q[i][j] / -Q[i][i]
	31	_jumpProbs.clear();
	32	_jumpProbs.resize(_alphabetSize);
	33	for (i = 0; i < _alphabetSize; ++i)
	34	{
	35	MDOUBLE sum = 0.0;
	36	_jumpProbs[i].resize(_alphabetSize);
	37	for (j = 0; j < _alphabetSize; ++j)
	38	{
	39	if (i == j)
	40	_jumpProbs[i][j] = 0.0;
	41	else
	42	{
	43	_jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
	44	}
	45	sum += _jumpProbs[i][j];
	46	}
	47	if (! DEQUAL(sum, 1.0)){
	48	string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to ";
	49	err+=double2string(sum);
	50	errorMsg::reportError(err);
	51	}
	52	}
	53
	54	//init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order
	55	_tree.getAllNodes(_orderNodesVec, _tree.getRoot());
	56	sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist);
	57
	58	_nodes2JumpsExp.clear();
	59	_nodes2JumpsProb.clear();
	60	VVdouble zeroMatrix(getCombinedAlphabetSize());
	61	for (i = 0; i < getCombinedAlphabetSize(); ++i)
	62	zeroMatrix[i].resize(getCombinedAlphabetSize(), 0.0);
	63	Vdouble zeroVector(getCombinedAlphabetSize(),0.0);
	64	for (i = 0; i < _orderNodesVec.size(); ++i)
	65	{
	66	string nodeName = _orderNodesVec[i]->name();
	67	_nodes2JumpsExp[nodeName] = zeroMatrix;
	68	_nodes2JumpsProb[nodeName] = zeroMatrix;
	69	for (j=0; j<getCombinedAlphabetSize();++j)
	70	_totalTerminals[nodeName]=zeroVector;
	71	}
	72
	73	}
	74
	75
	76	//simulate jumps starting from startState. The simulation continue until the maxTime is reached. In each step:
	77	//1. Draw a new waiting time.
	78	//2. Go over all branches shorter than nextJumpTime and update their jumpsNum between the states that were switched
	79	// (these branches will not be affected by the current jump):
	80	// however they might have been affected by the previous jump
	81	//3. Draw a new state
	82	void simulateJumps::runOneIter(int startState)
	83	{
	84	MDOUBLE maxTime = _orderNodesVec[_orderNodesVec.size()-1]->dis2father();
	85	MDOUBLE totalTimeTillJump = 0.0;
	86	int jumpsNum = 0;
	87	int curState = startState;
	88	int smallestBranchNotUpdatedSofar = 0;
	89	vector<pair<int, int> > jumpsSoFar(0);
	90	while (totalTimeTillJump < maxTime)
	91	{
	92	MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState];
	93	MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime);
	94	//go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp
	95	//with the jumps that occurred between the terminal Ids: startState-->curState
	96	for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b)
	97	{
	98	if (_orderNodesVec[b]->dis2father() > nextJumpTime)
	99	{
	100	smallestBranchNotUpdatedSofar = b;
	101	break;
	102	}
	103	string nodeName = _orderNodesVec[b]->name();
	104	//update all the jumps that occurred along the branch
	105	int terminalState = getCombinedState(startState, curState);
	106	_totalTerminals[nodeName][terminalState]++;
	107	//update all longer branches with all jumps that occurred till now
	108	vector<bool> jumpsSoFarBool(getCombinedAlphabetSize(),false);
	109	for (int j = 0; j < jumpsSoFar.size(); ++j)
	110	{
	111	int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second);
	112	jumpsSoFarBool[combinedJumpState]=true;
	113	_nodes2JumpsExp[nodeName][terminalState][combinedJumpState] += 1;
	114	}
	115	for (int combined=0;combined<jumpsSoFarBool.size();++combined)
	116	{
	117	if (jumpsSoFarBool[combined])
	118	_nodes2JumpsProb[nodeName][terminalState][combined]+=1;
	119	}
	120	}
	121	totalTimeTillJump = nextJumpTime;
	122	int nextState = giveRandomState(_alphabetSize,curState, _jumpProbs);
	123	jumpsSoFar.push_back(pair<int,int>(curState, nextState));
	124	curState = nextState;
	125	++jumpsNum;
	126	}
	127	}
	128
	129
	130	void simulateJumps::computeExpectationsAndPosterior(){
	131	//scale _nodes2JumpsExp so it will represent expectations
	132	map<string, VVdouble>::iterator iterExp = _nodes2JumpsExp.begin();
	133	for (; iterExp != _nodes2JumpsExp.end(); ++iterExp)
	134	{
	135	string nodeName = iterExp->first;
	136	for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState)
	137	{
	138	for (int jumpState = 0; jumpState < getCombinedAlphabetSize(); ++jumpState)
	139	{
	140
	141	//(iter->second[termState][jumpState]) /= static_cast<MDOUBLE>(iterNum);
	142	map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
	143	map<string, VVdouble>::iterator iterProb = _nodes2JumpsProb.find(nodeName);
	144	if ((iterTerm==_totalTerminals.end()) \|\| (iterProb==_nodes2JumpsProb.end()))
	145	{
	146	errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map");
	147	}
	148	if ((iterTerm->second[termState]==0)){ //never reached these terminal states
	149	if ((iterExp->second[termState][jumpState]==0) && (iterProb->second[termState][jumpState]==0)){
	150	if( termState == jumpState && (getStartId(termState)!=getEndId(termState) ) ){
	151	(iterExp->second[termState][jumpState]) = 1; // E.g - given start=0 end=1 there was at least one 0->1 jump
	152	(iterProb->second[termState][jumpState]) = 1; // E.g - given start=0 end=1 there was at least one 0->1 jump
	153	}
	154	continue;//leave the value of _nodes2JumpsExp and _nodes2JumpsProb as zero (or one)
	155	}
	156	else {
	157	errorMsg::reportError("error in simulateJumps::runSimulation, 0 times reached termState but non-zero for jumpCount");
	158	}
	159	}
	160	(iterExp->second[termState][jumpState]) /= iterTerm->second[termState];
	161
	162	(iterProb->second[termState][jumpState]) /= iterTerm->second[termState];
	163
	164	}
	165	}
	166	}
	167	}
	168
	169
	170	MDOUBLE simulateJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId)
	171	{
	172	map <string, VVdouble>::iterator pos;
	173	if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
	174	{
	175	string err="error in simulateJumps::getExpectation: cannot find node "+nodeName;
	176	errorMsg::reportError(err);
	177	}
	178	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	179	int combinedJumpState = getCombinedState(fromId, toId);
	180	return (pos->second[combinedTerminalState][combinedJumpState]);
	181	}
	182
	183
	184	MDOUBLE simulateJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){
	185	map <string, VVdouble>::iterator pos;
	186	if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
	187	{
	188	string err="error in simulateJumps::getProb: cannot find node "+nodeName;
	189	errorMsg::reportError(err);
	190	}
	191	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	192	int combinedJumpState = getCombinedState(fromId, toId);
	193	return (pos->second[combinedTerminalState][combinedJumpState]);
	194	}⏎

+48

-0

libs/phylogeny/simulateJumps.h less more

	0	#ifndef ___SIMULATE_JUMPS__
	1	#define ___SIMULATE_JUMPS__
	2
	3	#include "simulateJumpsAbstract.h"
	4	using namespace std;
	5
	6	/******************************************************************
	7	This class implements simulateJumpsAbstract for small alphabets: (tested so far up to 3)
	8	*******************************************************************/
	9
	10	class simulateJumps:public simulateJumpsAbstract {
	11	public:
	12	simulateJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
	13	virtual ~simulateJumps();
	14
	15	//for a branch length specified by a nodeName:
	16	//give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
	17	//in which the starting character is terminalStart and the terminal character is terminalEnd
	18	MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	19	//same as above, except here we return the probability of a jump from fromId to toId given
	20	//terminal states terminalStart, terminalEnd in this branch
	21	MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	22
	23	private:
	24	void init();
	25	void runOneIter(int state);
	26	void computeExpectationsAndPosterior();
	27
	28
	29	private:
	30
	31	//_node2Jumps: maps a node name (which specify a branch length) to
	32	//the expected number of jumps between any two characters along the branch leading from the father to this node
	33	//given the terminal characters of this branch.
	34	//The matrix is 2D and not 4D because we use a "combined alphabet" to make access easier. see getCombinedState() for details
	35	//The first dimension is the combined terminal state and the second dimension is the combined jump state
	36	map<string, VVdouble> _nodes2JumpsExp;
	37
	38	//_node2JumpsProb: maps a node name (which specify a branch length) to
	39	//the probability of a jump between any two characters along the branch leading from the father to this node
	40	//given the terminal characters of this branch.
	41	//The matrix is 2D and not 4D because we use a "combined alphabet" to make access easier. see getCombinedState() for details
	42	//The first dimension is the combined terminal state and the second dimension is the combined jump state
	43	map<string, VVdouble> _nodes2JumpsProb;
	44
	45	};
	46
	47	#endif

+44

-0

libs/phylogeny/simulateJumpsAbstract.cpp less more

	0	#include "simulateJumpsAbstract.h"
	1
	2
	3	simulateJumpsAbstract::simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
	4	: _tree(inTree), _sp(sp), _alphabetSize(alphabetSize)
	5	{
	6	}
	7
	8
	9
	10	//runSimulation: do the actual simulation. iterNum specifies the number of iterations starting from each state
	11	void simulateJumpsAbstract::runSimulation(int iterNum)
	12	{
	13	init();
	14	for (int state = 0; state < _alphabetSize; ++state)
	15	{
	16	for (int iter = 0; iter < iterNum; ++iter)
	17	{
	18	runOneIter(state);
	19	}
	20	}
	21
	22	computeExpectationsAndPosterior();
	23	}
	24
	25	//////////////////////////////////////////////////////////
	26	//combined two characters into a combined state.
	27	//For example. if the alphabet is {0,1,2} then the combined alphabet will be {0,1...8}.
	28	//The states (terminalStart, terminalEnd) = (0,2) then combinedId = 2.
	29	//The states (terminalStart, terminalEnd) = (1,2) then combinedId = 5. etc.
	30	int simulateJumpsAbstract::getCombinedState(int terminalStart, int terminalEnd) const
	31	{
	32	return (terminalStart * _alphabetSize + terminalEnd);
	33	}
	34	int simulateJumpsAbstract::getStartId(int combinedState) const
	35	{
	36	return combinedState / _alphabetSize;
	37	}
	38	int simulateJumpsAbstract::getEndId(int combinedState) const
	39	{
	40	return combinedState % _alphabetSize;
	41	}
	42	//////////////////////////////////////////////////////////
	43

+77

-0

libs/phylogeny/simulateJumpsAbstract.h less more

	0	#ifndef ___SIMULATE_JUMPS_ABSTRACT_
	1	#define ___SIMULATE_JUMPS_ABSTRACT_
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "alphabet.h"
	7
	8	#include <map>
	9	#include <vector>
	10	using namespace std;
	11
	12	/******************************************************************
	13	This is an abstract class to various implementations of simulateJumps.
	14	It was created to be a father class to the generic (original) implementation of
	15	simulateJumps class simulateJumps (working on alphabets of either 0,1,2 or 0,1
	16	and class simulateCodonsJumps which is a variant simulateJumps that can handle the
	17	61 sized alphabet without memory limitations.
	18
	19	The simulateJumps algorithm simulates jumps (events) along differing branch lengths (according to a
	20	given tree), with the aim of giving the expectation of the number of jumps
	21	from state a to state b given that the terminal states at the end of the branch are
	22	x and y.
	23	*******************************************************************/
	24
	25	class simulateJumpsAbstract {
	26	public:
	27	simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
	28	virtual ~simulateJumpsAbstract(){}
	29	virtual void runSimulation(int iterNum = 10000);
	30
	31	//for a branch length specified by a nodeName:
	32	//give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
	33	//in which the starting character is terminalStart and the terminal character is terminalEnd
	34	virtual MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0;
	35	//same as above, except here we return the probability of a jump from fromId to toId given
	36	//terminal states terminalStart, terminalEnd in this branch
	37	virtual MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0;
	38	virtual int getTotalTerminal(const string& nodeName, int terminalStart, int terminalEnd){
	39	map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
	40	return (int)iterTerm->second[getCombinedState(terminalStart,terminalEnd)];
	41	}
	42
	43	protected:
	44	virtual int getCombinedState(int terminalStart, int terminalEnd) const;
	45	virtual int getCombinedAlphabetSize() const {return _alphabetSize*_alphabetSize;}
	46	virtual int getStartId(int combinedState) const;
	47	virtual int getEndId(int combinedState) const;
	48
	49	virtual void init() = 0;
	50	virtual void runOneIter(int state) = 0;
	51	virtual void computeExpectationsAndPosterior() = 0;
	52
	53	// a comparison function to be used in sort init
	54	static bool compareDist(tree::nodeP node1, tree::nodeP node2){ return (node1->dis2father() < node2->dis2father());}
	55
	56
	57	protected:
	58	tree _tree;
	59	stochasticProcess _sp;
	60	const int _alphabetSize;
	61
	62	Vdouble _waitingTimeParams;//each entry is the lambda parameter of the exponential distribution modeling the waiting time for "getting out" of state i
	63
	64	//_jumpProbs[i][j] is the probability of jumping from state i to state j (given that a change has ocured).
	65	VVdouble _jumpProbs;
	66
	67	//the number of times we reached a certain combination of terminal states for each branch lengths
	68	//e.g. the number of times we observed 0,1 at terminal states given branch length 0.03
	69	//this is used to to afterwards normalize (i.e. compute the expectation) the _nodes2JumpsExp values
	70	map<string, Vdouble> _totalTerminals;
	71
	72	vector<tree::nodeP> _orderNodesVec; //internal use: the branch are sorted in ascending order
	73
	74	};
	75
	76	#endif

+342

-0

libs/phylogeny/simulateRateShiftJumps.cpp less more

	0	#include "simulateRateShiftJumps.h"
	1	#include "talRandom.h"
	2	#include "someUtil.h"
	3	#include "replacementModelSSRV.h"
	4	#include "generalGammaDistribution.h"
	5
	6	#include <algorithm>
	7
	8
	9	//TO DO:
	10	//1. input: a specific node vector and not a tree
	11	//2. all instances of syn are converted to acc
	12	//3. function of mulAlphabet: compareCategories, static function which also receives alphabetSize
	13
	14	simulateRateShiftJumps::simulateRateShiftJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
	15	: simulateJumpsAbstract(inTree,sp,alphabetSize)
	16	{
	17	// note: ontainging the number of rate categories, probably an easier way to do this:
	18	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(sp.getPijAccelerator()->getReplacementModel());
	19	generalGammaDistribution* generalGammaDist = static_cast<generalGammaDistribution*>(pMulRM->getDistribution());
	20	_numRateCategories = generalGammaDist->categories();
	21	if (alphabetSize % _numRateCategories != 0) {
	22	errorMsg::reportError("error in simulateRateShiftJumps::simulateRateShiftJumps, alphabetSize must divide by number of rate categories");
	23	}
	24	_baseAlphabetSize = alphabetSize / _numRateCategories;
	25	}
	26
	27	simulateRateShiftJumps::~simulateRateShiftJumps()
	28	{
	29	}
	30
	31	//runSimulation: do the actual simulation. iterNum specifies the number of iterations starting from each state
	32	void simulateRateShiftJumps::runSimulation(int iterNum, vector <tree::nodeP> inputNodes)
	33	{
	34	init(inputNodes);
	35	for (int state = 0; state < _alphabetSize; ++state)
	36	{
	37	for (int iter = 0; iter < iterNum; ++iter)
	38	{
	39	runOneIter(state);
	40	}
	41	}
	42
	43	computeExpectationsAndPosterior();
	44	}
	45
	46
	47	void simulateRateShiftJumps::init()
	48	{
	49	_waitingTimeParams.clear();
	50	_waitingTimeParams.resize(_alphabetSize);
	51	int i, j;
	52	for (i = 0; i < _alphabetSize; ++i)
	53	{
	54	_waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
	55
	56	}
	57
	58	//init _jumpProbs.
	59	_jumpProbs.clear();
	60	_jumpProbs.resize(_alphabetSize);
	61	for (i = 0; i < _alphabetSize; ++i)
	62	{
	63	MDOUBLE sum = 0.0;
	64	_jumpProbs[i].resize(_alphabetSize);
	65	for (j = 0; j < _alphabetSize; ++j)
	66	{
	67	if (i == j)
	68	_jumpProbs[i][j] = 0.0;
	69	else
	70	{
	71	_jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
	72	}
	73	sum += _jumpProbs[i][j];
	74	}
	75	if (! DEQUAL(sum, 1.0,0.001)){
	76	string err = "error in simulateRateShiftJumps::init(): sum probabilities is not 1 and equal to ";
	77	err+=double2string(sum);
	78	errorMsg::reportError(err);
	79	}
	80	}
	81
	82
	83	}
	84
	85	void simulateRateShiftJumps::init(vector <tree::nodeP> inputNodes)
	86	{
	87	init();
	88	//init the vector of waiting times.
	89	//init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order
	90	//_tree.getAllNodes(_orderNodesVec, _tree.getRoot()); // here instead: _orderNodesVec = input nodesVec, and then sort
	91	_orderNodesVec = inputNodes;
	92	sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist);
	93
	94	_nodes2JumpsExp.clear();
	95	_nodes2JumpsProb.clear();
	96	//
	97	vector<pair<MDOUBLE,MDOUBLE> > zeroCombinedStates2jumps;
	98	int i,j;
	99	for(i = 0;i < getCombinedAlphabetSize();++i){
	100	pair<MDOUBLE,MDOUBLE> acc_and_decc_jumps(0.0,0.0);
	101	zeroCombinedStates2jumps.push_back(acc_and_decc_jumps);
	102	}
	103	Vdouble zeroVector(getCombinedAlphabetSize(),0.0);
	104	for (i = 0; i < _orderNodesVec.size(); ++i)
	105	{
	106	string nodeName = _orderNodesVec[i]->name();
	107	_nodes2JumpsExp[nodeName] = zeroCombinedStates2jumps;
	108	_nodes2JumpsProb[nodeName] = zeroCombinedStates2jumps;
	109	for (j=0; j<getCombinedAlphabetSize();++j)
	110	_totalTerminals[nodeName]=zeroVector;
	111	}
	112	}
	113
	114
	115	//simulate jumps starting from startState. The simulation continue until the maxTime is reached. In each step:
	116	//1. Draw a new waiting time.
	117	//2. Go over all branches shorter than nextJumpTime and update their jumpsNum between the states that were switched
	118	// (these branches will not be affected by the current jump):
	119	// however they might have been affected by the previous jump
	120	//3. Draw a new state
	121	void simulateRateShiftJumps::runOneIter(int startState)
	122	{
	123	mulAlphabet::rateShiftType my_rateShiftType = mulAlphabet::noRateShift;
	124	MDOUBLE maxTime = _orderNodesVec[_orderNodesVec.size()-1]->dis2father();
	125	MDOUBLE totalTimeTillJump = 0.0;
	126	int curState = startState;
	127	int smallestBranchNotUpdatedSofar = 0;
	128	vector<pair<int, int> > jumpsSoFar(0);
	129	while (totalTimeTillJump < maxTime)
	130	{
	131	MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState];
	132	MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime);
	133	//go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp
	134	//with the jumps that occured between the terminal Ids: startState-->curState
	135	for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b)
	136	{
	137	if (_orderNodesVec[b]->dis2father() > nextJumpTime)
	138	{
	139	smallestBranchNotUpdatedSofar = b;
	140	break;
	141	}
	142	string nodeName = _orderNodesVec[b]->name();
	143	//update all the jumps that occured along the branch
	144	int terminalState = getCombinedState(startState, curState);
	145	_totalTerminals[nodeName][terminalState]++;
	146	//update all longer branches with all jumps that occurred till now
	147	/* vector<bool> jumpsSoFarBool(getCombinedAlphabetSize(),false);*/
	148	// There's no need for the jumpsSoFarBool vector because we want to count
	149	// the number of syn subs and not just to note that there has been at least 1
	150	// The final probability is calculated in computeExpectationsAndPosterior
	151	for (int j = 0; j < jumpsSoFar.size(); ++j)
	152	{
	153	my_rateShiftType = mulAlphabet::compareCategories(jumpsSoFar[j].first,jumpsSoFar[j].second,_baseAlphabetSize,_numRateCategories);
	154	/* int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second);
	155	jumpsSoFarBool[combinedJumpState]=true;*/
	156	if(my_rateShiftType == mulAlphabet::acceleration)
	157	{
	158	_nodes2JumpsExp[nodeName][terminalState].first += 1;
	159	_nodes2JumpsProb[nodeName][terminalState].first += 1;
	160	}
	161	else if(my_rateShiftType == mulAlphabet::deceleration)
	162	{
	163	_nodes2JumpsExp[nodeName][terminalState].second += 1;
	164	_nodes2JumpsProb[nodeName][terminalState].second += 1;
	165	//cout<<"debug: jump dec for node name "<<nodeName<<" from start "<<startState<<" to "<<curState<<endl;//debug
	166	}
	167	}
	168
	169	/*for (int combined=0;combined<jumpsSoFarBool.size();++combined)
	170	{
	171	if (jumpsSoFarBool[combined]){
	172	if(my_rateShiftType == mulAlphabet::acceleration)
	173	_nodes2JumpsProb[nodeName][terminalState].first += 1;
	174	else if(my_rateShiftType == mulAlphabet::deceleration)
	175	_nodes2JumpsProb[nodeName][terminalState].second += 1;
	176	}
	177	}*/
	178
	179	}
	180	totalTimeTillJump = nextJumpTime;
	181	int nextState = giveRandomState(_alphabetSize,curState,_jumpProbs);
	182	jumpsSoFar.push_back(pair<int,int>(curState, nextState));
	183	curState = nextState;
	184	}
	185	}
	186
	187
	188	void simulateRateShiftJumps::computeExpectationsAndPosterior(){
	189	//scale _nodes2JumpsExp so it will represent expectations
	190	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterExp = _nodes2JumpsExp.begin();
	191	for (; iterExp != _nodes2JumpsExp.end(); ++iterExp)
	192	{//each node
	193	string nodeName = iterExp->first;
	194	for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState)
	195	{
	196	MDOUBLE totalJumps4currentNodeAndTermState = 0;
	197	map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
	198	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterProb = _nodes2JumpsProb.find(nodeName);
	199	if ((iterTerm==_totalTerminals.end()) \|\| (iterProb==_nodes2JumpsProb.end()))
	200	{
	201	errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map");
	202	}
	203
	204	if (iterTerm->second[termState]==0){ //never reached these terminal states
	205	if((iterExp->second[termState].first == 0)&&(iterExp->second[termState].second == 0)&&
	206	((iterProb->second[termState].first == 0)&&(iterProb->second[termState].second == 0)))
	207	{
	208	int startID = getStartId(termState);
	209	int endID = getEndId(termState);
	210	if (startID != endID) // if the terminal states are different there was at least one startID->endID jump
	211	{
	212	mulAlphabet::rateShiftType my_rateShiftType = mulAlphabet::compareCategories(startID,endID,_baseAlphabetSize,_numRateCategories);
	213	if(my_rateShiftType == mulAlphabet::acceleration)
	214	{
	215	iterExp->second[termState].first = 1;
	216	iterProb->second[termState].first = 1;
	217	}
	218	else if(my_rateShiftType == mulAlphabet::deceleration)
	219	{
	220	iterExp->second[termState].second = 1;
	221	iterProb->second[termState].second = 1;
	222	}
	223	totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second));
	224	if(totalJumps4currentNodeAndTermState)
	225	{
	226	(iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState;
	227	(iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState;
	228	}
	229	}
	230	continue;
	231	}
	232
	233	else
	234	errorMsg::reportError("error in simulateRateShiftJumps::runSimulation, 0 times reached termState but non-zero for jumpCount");
	235	}
	236	(iterExp->second[termState].first) /= iterTerm->second[termState];
	237	(iterExp->second[termState].second) /= iterTerm->second[termState];
	238
	239	totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second));
	240	if(totalJumps4currentNodeAndTermState)
	241	{
	242	(iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState;
	243	(iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState;
	244	}
	245	}
	246	}
	247	}
	248
	249
	250	MDOUBLE simulateRateShiftJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId)
	251	{
	252	//map <string, VVdouble>::iterator pos;//Old
	253	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	254	if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
	255	{
	256	string err="error in simulateRateShiftJumps::getExpectation: cannot find node "+nodeName;
	257	errorMsg::reportError(err);
	258	}
	259	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	260	//Old
	261	//int combinedJumpState = getCombinedState(fromId, toId);
	262	//return (pos->second[combinedTerminalState][combinedJumpState]);
	263
	264	MDOUBLE expectation=0.0;
	265	// !!! go over this to make sure this is correct!!
	266	if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::acceleration)
	267	expectation = pos->second[combinedTerminalState].first;
	268	else if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::deceleration)
	269	expectation = pos->second[combinedTerminalState].second;
	270	return (expectation);
	271	}
	272
	273	MDOUBLE simulateRateShiftJumps::getExpectation(
	274	const string& nodeName,
	275	int terminalStart,
	276	int terminalEnd,
	277	mulAlphabet::rateShiftType my_rateShiftType)
	278	{
	279	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	280	if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
	281	{
	282	string err="error in simulateRateShiftJumps::getExpectation: cannot find node "+nodeName;
	283	errorMsg::reportError(err);
	284	}
	285	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	286	MDOUBLE expectation=0.0;
	287	if(my_rateShiftType == mulAlphabet::acceleration)
	288	expectation = pos->second[combinedTerminalState].first;
	289	else if(my_rateShiftType == mulAlphabet::deceleration)
	290	expectation = pos->second[combinedTerminalState].second;
	291	else
	292	errorMsg::reportError("simulateRateShiftJumps::getExpectation does not support computations for non rate-shifts");
	293
	294	return (expectation);
	295	}
	296
	297
	298	MDOUBLE simulateRateShiftJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){
	299	//map <string, VVdouble>::iterator pos;
	300	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	301	if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
	302	{
	303	string err="error in simulateRateShiftJumps::getProb: cannot find node "+nodeName;
	304	errorMsg::reportError(err);
	305	}
	306	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	307	//Old
	308	//int combinedJumpState = getCombinedState(fromId, toId);
	309	//return (pos->second[combinedTerminalState][combinedJumpState]);
	310
	311	MDOUBLE prob=0.0;
	312	//!! go over this to make sure
	313	if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::acceleration)
	314	prob = pos->second[combinedTerminalState].first;
	315	else if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::deceleration)
	316	prob = pos->second[combinedTerminalState].second;
	317	return (prob);
	318	}
	319
	320	MDOUBLE simulateRateShiftJumps::getProb(
	321	const string& nodeName,
	322	int terminalStart,
	323	int terminalEnd,
	324	mulAlphabet::rateShiftType my_rateShiftType)
	325	{
	326	map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
	327	if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
	328	{
	329	string err="error in simulateRateShiftJumps::getProb: cannot find node "+nodeName;
	330	errorMsg::reportError(err);
	331	}
	332	int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
	333	MDOUBLE prob=0.0;
	334	if(my_rateShiftType == mulAlphabet::acceleration)
	335	prob = pos->second[combinedTerminalState].first;
	336	else if(my_rateShiftType == mulAlphabet::deceleration)
	337	prob = pos->second[combinedTerminalState].second;
	338	else
	339	errorMsg::reportError("simulateRateShiftJumps::getProb does not support probabilities of non rate-shifts");
	340	return (prob);
	341	}

+55

-0

libs/phylogeny/simulateRateShiftJumps.h less more

	0	#ifndef ___SIMULATE_RATESHIFT_JUMPS__
	1	#define ___SIMULATE_RATESHIFT_JUMPS__
	2
	3	#include "simulateJumpsAbstract.h"
	4	#include "mulAlphabet.h"
	5	using namespace std;
	6
	7	/******************************************************************
	8	This class implements simulateJumpsAbstract for multiplied alphabet used for rate-shift
	9	*******************************************************************/
	10
	11	class simulateRateShiftJumps:public simulateJumpsAbstract {
	12	public:
	13	simulateRateShiftJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
	14	virtual ~simulateRateShiftJumps();
	15	void runSimulation(int iterNum, vector <tree::nodeP> inputNodes);
	16	//for a branch length specified by a nodeName:
	17	//give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
	18	//in which the starting character is terminalStart and the terminal character is terminalEnd
	19	MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	20	MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType);
	21	//same as above, except here we return the probability of a jump from fromId to toId given
	22	//terminal states terminalStart, terminalEnd in this branch
	23	MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
	24	MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType);
	25
	26	private:
	27	void init();
	28	void init(vector <tree::nodeP> inputNodes);
	29	void runOneIter(int state);
	30	void computeExpectationsAndPosterior();
	31
	32	private:
	33
	34	//_node2Jumps: maps a node name (which specify a branch length) to
	35	//the expected number of synonymous and nonsynonymous jumps between any two characters along the branch leading from the father to this node
	36	//given the terminal characters of this branch.
	37	//We use a "combined alphabet" to make access easier. see getCombinedState() for details
	38	//The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively.
	39
	40	map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsExp;
	41
	42	//_node2JumpsProb: maps a node name (which specify a branch length) to
	43	//the probability of a synonymous and non-synonymous jump between any two characters along the branch leading from the father to this node
	44	//given the terminal characters of this branch.
	45	//We use a "combined alphabet" to make access easier. see getCombinedState() for details
	46	//The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively
	47	map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsProb;
	48
	49	int _baseAlphabetSize;
	50	int _numRateCategories;
	51
	52	};
	53
	54	#endif

+230

-0

libs/phylogeny/simulateTree.cpp less more

	0	// $Id: simulateTree.cpp 8508 2010-08-12 15:21:04Z rubi $
	1
	2	#include "definitions.h"
	3	#include "treeUtil.h"
	4	#include "simulateTree.h"
	5	#include "talRandom.h"
	6	#include "gammaDistribution.h"
	7	#include "codon.h"
	8
	9	simulateTree::simulateTree(const tree& _inEt,
	10	const stochasticProcess& sp,
	11	const alphabet* alph) :
	12	_et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {};
	13
	14	simulateTree::~simulateTree() {}
	15
	16
	17	void simulateTree::generate_seq(int seqLength) {
	18	sequence justAseq(_alph);
	19	_simulatedSequences.resize(_et.getNodesNum(),justAseq);
	20	for (int i=0; i < _simulatedSequences.size(); ++i) {
	21	_simulatedSequences[i].resize(seqLength);
	22	}
	23	generateRootSeq(seqLength);
	24
	25	vector<MDOUBLE> rateVec(seqLength);
	26	for (int h = 0; h < seqLength; h++) {
	27	int theRanCat = getRandCategory(h);
	28	rateVec[h] = _sp.rates(theRanCat);
	29	}
	30
	31	_avgSubtitutionsPerSite = 0.0;
	32	for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
	33	recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p));
	34	}
	35	_avgSubtitutionsPerSite /= 1.0*seqLength;
	36	}
	37
	38	void simulateTree::generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha, Vdouble rates)
	39	{
	40	rates.clear();
	41	rates.resize(seqLength);
	42	for (int h = 0; h < seqLength; h++) {
	43	rates[h] = talRandom::SampleGamma(alpha);
	44	}
	45	}
	46
	47	void simulateTree::generate_seq_continuous_gamma(int seqLength) {
	48	sequence justAseq(_alph);
	49	_simulatedSequences.resize(_et.getNodesNum(),justAseq);
	50	for (int i=0; i < _simulatedSequences.size(); ++i) {
	51	_simulatedSequences[i].resize(seqLength);
	52	}
	53	generateRootSeq(seqLength);
	54
	55	vector<MDOUBLE> rateVec(seqLength);
	56	MDOUBLE alpha= (static_cast<gammaDistribution*>(_sp.distr()))->getAlpha();
	57	for (int h = 0; h < seqLength; h++) {
	58	rateVec[h] = talRandom::SampleGamma(alpha);
	59	}
	60
	61	_avgSubtitutionsPerSite = 0.0;
	62	for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
	63	recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p));
	64	}
	65	_avgSubtitutionsPerSite /= 1.0*seqLength;
	66	}
	67
	68	void simulateTree::generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength)
	69	{
	70	if (_alph->size() != 4)
	71	errorMsg::reportError("generate_seqWithRateVectorNoStopCodon is applicable only for nucleotide process");
	72	if (seqLength %3 != 0)
	73	errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: seqLenth should be a multiplicative of 3");
	74	if (simRates.size() != seqLength)
	75	errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: the size of simRates should be identical to seqLenth");
	76
	77	// sequence justAseq(_alph);
	78	// vector<sequence> simulatedSequences(_et.getNodesNum(),justAseq);
	79	vector<sequence> simulatedSequences;
	80	//generate three nucleotide positions at a time. Repeat each position if the generated sequences contain stop codon
	81	Vdouble rateVec(3);
	82	bool bStopCodonFound = false;
	83	codon codonAlph;
	84	for (int p = 0; p < seqLength; p+=3)
	85	{
	86	rateVec[0] = simRates[p];
	87	rateVec[1] = simRates[p+1];
	88	rateVec[2] = simRates[p+2];
	89	//generate 3 nucleotide positions with no stop codon
	90	for (int loop = 0; loop < 1000; ++loop)
	91	{
	92	bStopCodonFound = false;
	93	generate_seqWithRateVector(rateVec, 3);
	94	for (int s = 0; s < _simulatedSequences.size(); ++s)
	95	{
	96	string codonStr = _simulatedSequences[s].toString();
	97	if (codonAlph.isStopCodon(codonStr))
	98	{
	99	bStopCodonFound = true;
	100	break;
	101	}
	102	}
	103	if (!bStopCodonFound)
	104	break;
	105	}
	106	if (bStopCodonFound)
	107	errorMsg::reportError("Could not generate a position without stop codon");
	108	//append positions to the positions generated so far
	109	if (p == 0)
	110	simulatedSequences = _simulatedSequences; //this will copy also the names of the sequences
	111	else
	112	{
	113	for (int i = 0; i < simulatedSequences.size(); ++i)
	114	simulatedSequences[i] += _simulatedSequences[i];
	115	}
	116	}
	117	_simulatedSequences = simulatedSequences;
	118	}
	119
	120
	121
	122	void simulateTree::generate_seqWithRateVector(const Vdouble& rateVec, const int seqLength) {
	123	sequence justAseq(_alph);
	124	_simulatedSequences.resize(_et.getNodesNum(),justAseq);
	125	for (int i=0; i < _simulatedSequences.size(); ++i) {
	126	_simulatedSequences[i].resize(seqLength);
	127	}
	128	generateRootSeq(seqLength);
	129
	130	_avgSubtitutionsPerSite = 0.0;
	131	for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
	132	recursiveGenerateSpecificSeq(rateVec,seqLength,_et.getRoot()->getSon(p));
	133	}
	134	_avgSubtitutionsPerSite /= 1.0*seqLength;
	135	}
	136
	137	void simulateTree::generateRootSeq(int seqLength) {
	138	for (int i = 0; i < seqLength; i++) {
	139	_simulatedSequences[_et.getRoot()->id()][i] = giveRandomChar();
	140	}
	141
	142	_simulatedSequences[_et.getRoot()->id()].setAlphabet(_alph);
	143	_simulatedSequences[_et.getRoot()->id()].setName(_et.getRoot()->name());
	144	_simulatedSequences[_et.getRoot()->id()].setID(_et.getRoot()->id());
	145
	146	}
	147
	148
	149	void simulateTree::recursiveGenerateSpecificSeq(
	150	const vector<MDOUBLE> &rateVec,
	151	const int seqLength,
	152	tree::nodeP myNode) {
	153
	154	for (int y = 0; y < seqLength; y++) {
	155	MDOUBLE lenFromFather=myNode->dis2father()*rateVec[y];
	156	int aaInFather = _simulatedSequences[myNode->father()->id()][y];
	157	int newChar = giveRandomChar(aaInFather,lenFromFather,y);
	158	if(newChar != aaInFather) _avgSubtitutionsPerSite += 1;
	159	_simulatedSequences[myNode->id()][y] = newChar;
	160	}
	161	_simulatedSequences[myNode->id()].setAlphabet(_alph);
	162	_simulatedSequences[myNode->id()].setName(myNode->name());
	163	_simulatedSequences[myNode->id()].setID(myNode->id());
	164	for (int x =0 ; x < myNode->getNumberOfSons(); ++x) {
	165	recursiveGenerateSpecificSeq(rateVec, seqLength, myNode->getSon(x));
	166	}
	167	}
	168
	169	int simulateTree::giveRandomChar() const {
	170	for (int loop =0 ;loop<100000 ;loop++) {
	171	MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
	172	MDOUBLE sum = 0.0;
	173	for (int j=0;j<_sp.alphabetSize();++j) {
	174	sum+=_sp.freq(j);
	175	if (theRandNum<sum) return j;
	176	}
	177	}
	178	errorMsg::reportError("Could not give random character. The reason is probably that the P_i do not sum to one.");
	179	return 1;
	180	}
	181
	182	int simulateTree::giveRandomChar(const int letterInFatherNode,
	183	const MDOUBLE length,
	184	const int pos) const {
	185	assert(letterInFatherNode>=0);
	186	assert(letterInFatherNode<_sp.alphabetSize());
	187	for (int loop =0 ;loop<100000 ;loop++) {
	188	MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
	189	MDOUBLE sum = 0.0;
	190	for (int j=0;j<_sp.alphabetSize();++j) {
	191	sum+=_sp.Pij_t(letterInFatherNode,j, length);
	192	if (theRandNum<sum) return j;
	193	}
	194	}
	195	errorMsg::reportError("Could not give random character. The reason is probably that the Pij_t do not sum to one.");
	196	return 1;
	197	}
	198
	199
	200	int simulateTree::getRandCategory(const int pos) const {
	201	MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1);
	202	MDOUBLE sum = 0.0;
	203	for (int j=0;j<_sp.categories() ;++j) {
	204	sum+=_sp.ratesProb(j);
	205	if (theRandNum<sum) return j;
	206	}
	207	errorMsg::reportError(" error in function simulateTree::getRandCategory() ");// also quit the program
	208	return -1;
	209	}
	210
	211	sequenceContainer simulateTree::toSeqData() {
	212	sequenceContainer myseqData;
	213	for (int i=0; i < _simulatedSequences.size(); ++i) {
	214	myseqData.add(_simulatedSequences[i]);
	215	}
	216	return myseqData;
	217	}
	218
	219	sequenceContainer simulateTree::toSeqDataWithoutInternalNodes() {
	220	sequenceContainer myseqData;
	221	for (int i=0; i < _simulatedSequences.size(); ++i) {
	222	tree::nodeP theCurNode = _et.findNodeByName(_simulatedSequences[i].name());
	223	if (theCurNode == NULL)
	224	errorMsg::reportError("could not find the specified name: " + _simulatedSequences[i].name());
	225	if (theCurNode->isInternal()) continue;
	226	myseqData.add(_simulatedSequences[i]);
	227	}
	228	return myseqData;
	229	}

+52

-0

libs/phylogeny/simulateTree.h less more

	0	// $Id: simulateTree.h 8507 2010-08-12 15:20:59Z rubi $
	1
	2	#ifndef ___SIMULATE_TREE
	3	#define ___SIMULATE_TREE
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "stochasticProcess.h"
	8	#include "sequenceContainer.h"
	9
	10	//class sequenceData; // to be able to go to simulate data.
	11
	12	class simulateTree {
	13	public:
	14	explicit simulateTree(const tree& _inEt,const stochasticProcess& sp,
	15	const alphabet* alph);
	16	void generate_seq(int seqLength);
	17
	18	// This function generates the sequences not using the discrete gamma, but rather,
	19	// the rates are sampled from the continuous distribution.
	20	// It assumes the Gamma distribution has mean 1 (alpha = beta).
	21	void generate_seq_continuous_gamma(int seqLength);
	22
	23	void generate_seqWithRateVector(const Vdouble& simRates, const int seqLength);
	24	//these function do the same simulation as above but check that no stop codon is created.
	25	//applicable only when the stochasticProcess is based on nucleotides
	26	void generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength);
	27
	28	tree gettree() {return _et;}
	29	virtual ~simulateTree();
	30	sequenceContainer toSeqData();
	31	sequenceContainer toSeqDataWithoutInternalNodes();
	32	void generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha,Vdouble rates);
	33	MDOUBLE getAvgSub() {return _avgSubtitutionsPerSite;}
	34
	35	private:
	36	void generateRootSeq(int seqLength);
	37	void recursiveGenerateSpecificSeq(const Vdouble& rateVec, int seqLength, tree::nodeP myNode);
	38	int giveRandomChar() const;
	39	int giveRandomChar(const int letterInFatherNode, const MDOUBLE length,const int pos) const;
	40	int getRandCategory(const int pos) const;
	41
	42	vector<sequence> _simulatedSequences; // the sequences (nodes * seqLen)
	43	tree _et;
	44	const stochasticProcess& _sp;
	45	const alphabet* _alph;
	46	MDOUBLE _avgSubtitutionsPerSite;
	47
	48	};
	49
	50	#endif
	51

+142

-0

libs/phylogeny/simulateWithDependence.cpp less more

	0	#include "simulateWithDependence.h"
	1
	2	/*
	3	This code receives a tree file and simulates sequences accordingly using:
	4	simulateTree st1(treeIn, *_sp, alph);
	5	st1.generate_seq(num_pos_with_same_k);
	6	which were written by another beloved group member.
	7
	8	Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic:
	9	1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states.
	10	thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T
	11	2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate
	12	the rate of evolution when an "unstable" state occures (rate increases when 01 (C) or 10 (G))
	13
	14	For more details, please see http://copap.tau.ac.il/benchmark.php and
	15	Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013)
	16	CoPAP: Co-evolution of Presence-Absence Patterns.
	17	Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471
	18
	19	Eli Levy Karin, 2013
	20
	21	----------------- usage example: ------------------
	22	#include "simulateWithDependence.h"
	23
	24	using namespace sim_with_dep;
	25
	26	int main(int argc, char** argv) {
	27
	28	string treeFile = argv[1];
	29
	30	double exit_code;
	31
	32	exit_code = simulate_with_dependence (treeFile, 0.5, 14, 500, 500, 0, 1, 0.893195, 1, 4);
	33
	34	return 0;
	35	}
	36	-------------- end usage example: ---------------
	37
	38	*/
	39	namespace sim_with_dep
	40	{
	41
	42	double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat)
	43	{
	44
	45	//read Newick format tree
	46	tree treeIn(treeFile);
	47
	48	//four states alphabet A C G T (will later be rplaced to 00,01,10,11)
	49	alphabet* alph = new nucleotide;
	50
	51	sequenceContainer SC_all; //this will contain all positions
	52
	53	//parameters:
	54	double PI_0 = 1-PI_1;
	55	double k = init_k; //will be increased with each iteration
	56
	57	//parameters:
	58	int jump_size = total_positions / num_pos_with_same_k;
	59
	60	for(int i=0; i<jump_size; i++)
	61	{
	62	Vdouble freqs; //stationary probabilities PI_00, PI_01, PI_10, PI_11
	63	double TOTAL = kPI_1PI_1 + 2PI_0PI_1 + kPI_0PI_0;
	64	freqs.push_back(kPI_0PI_0 / TOTAL); //PI_00 = kPI_0PI_0 / TOTAL
	65	freqs.push_back(PI_0PI_1 / TOTAL); //PI_01 = PI_0PI_1 / TOTAL
	66	freqs.push_back(PI_0PI_1 / TOTAL); //PI_10 = PI_0PI_1 / TOTAL
	67	freqs.push_back(kPI_1PI_1 / TOTAL); //PI_11 = kPI_1PI_1 / TOTAL
	68
	69	//Q matrix (partial values - the rest are calculated by gtrModel using freqs and these values)
	70	MDOUBLE a2c = PI_1; // --> c2a = freqs[a]a2c/freqs[c] --> c2a = ((kPI_0PI_0 / TOTAL)PI_1)/(PI_0PI_1 / TOTAL) = kPI_0
	71	MDOUBLE a2g = PI_1;
	72	MDOUBLE a2t = 0;
	73	MDOUBLE c2g = 0;
	74	MDOUBLE c2t = k*PI_1;
	75	MDOUBLE g2t = k*PI_1;
	76
	77	//starting the evolutionary model
	78	distribution *currDist = NULL;
	79	if(is_gamma == 1)
	80	{
	81	currDist = new generalGammaDistribution(alpha,beta,num_cat); // ---> in the future we might want to turn these into param
	82	}
	83	else
	84	{
	85	currDist = new uniDistribution; // no among site rate variation
	86	}
	87
	88	replacementModel *probMod = NULL;
	89	pijAccelerator *pijAcc = NULL;
	90
	91	probMod = new gtrModel(freqs,a2c,a2g,a2t,c2g,c2t,g2t);
	92	pijAcc = new trivialAccelerator(probMod);
	93	stochasticProcess* _sp = new stochasticProcess(currDist, pijAcc);
	94
	95	//simulate:
	96	simulateTree st1(treeIn, *_sp, alph);
	97	st1.generate_seq(num_pos_with_same_k); //simulate num_pos_with_same_k positions with the current k
	98
	99	if(i == 0)
	100	{
	101	SC_all = st1.toSeqDataWithoutInternalNodes(); //first time
	102	}
	103	else
	104	{
	105	sequenceContainer SC = st1.toSeqDataWithoutInternalNodes(); //concatenate new positions to the ones you have
	106	SC_all.concatenate(SC);
	107	}
	108
	109	delete currDist;
	110	delete probMod;
	111	delete pijAcc;
	112	delete _sp;
	113
	114	k = k + k_increase; //k = 1 , 1.05 , 1.1 , ... , 5.5
	115	}
	116
	117	//prepare out file name:
	118	std::stringstream sstm;
	119	if(is_gamma == 1)
	120	{
	121	sstm << treeFile << ".gammaRateNoInv.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
	122	}
	123	else
	124	{
	125	sstm << treeFile << ".NoRate.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
	126	}
	127	std::string seqOutputFile = sstm.str();
	128
	129	//write out:
	130	ofstream seq_sim(seqOutputFile.c_str());
	131	fastaFormat::write(seq_sim,SC_all);
	132	seq_sim.close();
	133
	134
	135	delete alph;
	136
	137	return 0;
	138
	139	}
	140
	141	};⏎

+56

-0

libs/phylogeny/simulateWithDependence.h less more

	0	// simulate positions with dependence 2013 09 22 Eli Levy Karin
	1
	2	/*
	3	This code receives a tree file and simulates sequences accordingly using:
	4	simulateTree st1(treeIn, *_sp, alph);
	5	st1.generate_seq(num_pos_with_same_k);
	6	which were written by another beloved group member.
	7
	8	Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic:
	9	1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states.
	10	thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T
	11	2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate
	12	the rate of evolution when an "unstable" state occures (rate increases when 01 or 10)
	13
	14	For more details, please see http://copap.tau.ac.il/benchmark.php and
	15	Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013)
	16	CoPAP: Co-evolution of Presence-Absence Patterns.
	17	Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471
	18
	19	Eli Levy Karin, 2013
	20
	21	*/
	22
	23	#ifndef ___SIM_WITH_DEP
	24	#define ___SIM_WITH_DEP
	25
	26	#include <string>
	27	#include <iostream>
	28	#include "tree.h"
	29	#include "alphabet.h"
	30	#include "nucleotide.h"
	31	#include "simulateTree.h"
	32	#include "trivialAccelerator.h"
	33	#include "uniDistribution.h" // distribution of rates across sites
	34	#include "generalGammaDistributionPlusInvariant.h"
	35	#include "generalGammaDistribution.h"
	36	#include "fastaFormat.h"
	37	#include <fstream>
	38	#include "gtrModel.h"
	39	#include <sstream>
	40
	41	namespace sim_with_dep {
	42	double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat);
	43	/*
	44	treeFile - newick format
	45	total_positions - number of positions to simulate (note that you'll get double this nuber in binary positions)
	46	num_pos_with_same_k - one can have a different k for parts of the pairs (a gradient, for example).
	47	Make sure that: (num_pos_with_same_k <= total_positions) and (total_positions % num_pos_with_same_k = 0)
	48	k_increase - if you decide to simulate with different k's - set by how much k should increase
	49	is_gamma - if 0 uniDistribution, if 1 generalGammaDistribution
	50	alpha, beta, number of rate categories are only relevant if is_gamma=1 (otherwise, you can put whatever there)
	51	*/
	52
	53	};
	54
	55	#endif

+480

-0

libs/phylogeny/siteSpecificRate.cpp less more

	0	// $Id: siteSpecificRate.cpp 11008 2012-10-16 21:54:04Z rubi $
	1
	2	#include "siteSpecificRate.h"
	3	#include "numRec.h"
	4	#include "checkcovFanctors.h"
	5	#include "definitions.h"
	6
	7
	8	/********************************************************************************************
	9	ML - full data (1)
	10	*********************************************************************************************/
	11	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	12	Vdouble & likelihoodsV,
	13	const sequenceContainer& sc,
	14	const stochasticProcess& sp,
	15	const tree& et,
	16	const MDOUBLE maxRate,//20.0f
	17	const MDOUBLE tol){//=0.0001f;
	18
	19	ratesV.resize(sc.seqLen());
	20	likelihoodsV.resize(sc.seqLen());
	21	MDOUBLE Lsum = 0.0;
	22
	23	for (int pos=0; pos < sc.seqLen(); ++pos) {
	24	computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol);
	25	assert(log(likelihoodsV[pos])>0.0);
	26	Lsum += log(likelihoodsV[pos]);
	27	LOG(6,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	28	}
	29	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	30	return Lsum;
	31	}
	32	/********************************************************************************************
	33	ML - per Pos (1.1)
	34	*********************************************************************************************/
	35	// note that this places the likelihood, rather then the loglikelihood into posL
	36	void computeML_siteSpecificRate(int pos,
	37	const sequenceContainer& sc,
	38	const stochasticProcess& sp,
	39	const tree &et,
	40	MDOUBLE& bestRate,
	41	MDOUBLE& posL,
	42	const MDOUBLE maxRate,
	43	const MDOUBLE tol) {
	44	LOG(6,<<".");
	45	MDOUBLE ax=0.00001f,bx=maxRate*0.25,cx=maxRate; // MN
	46	posL=-brent(ax,bx,cx,Cevaluate_L_given_r(sc,et,sp,pos),tol,&bestRate);
	47	}
	48
	49
	50	/********************************************************************************************
	51	ML - full data AttributesVecs (1)
	52	*********************************************************************************************/
	53	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	54	Vdouble & likelihoodsV,
	55	const Vint& spAttributesVec,
	56	const Vint& treeAttributesVec,
	57	const vector<tree> & etVec,
	58	const vector<const stochasticProcess *> & spVec,
	59	const sequenceContainer& sc,
	60	const MDOUBLE maxRate,
	61	const MDOUBLE tol){
	62	MDOUBLE Lsum = 0.0;
	63	ratesV.resize(sc.seqLen()); // the rates themselves
	64	likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position
	65
	66	for (int pos=0; pos < sc.seqLen(); ++pos) {
	67	LOG(6,<<".");
	68	MDOUBLE bestR=-1.0; // tree1
	69	// MDOUBLE LmaxR1=0;
	70
	71	// getting the right tree for the specific position:
	72	const tree* treeForThisPosition=NULL;
	73	if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) {
	74	treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
	75	} else {
	76	errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)");
	77	}
	78
	79	// getting the right stochastic process for the specific position:
	80
	81	const stochasticProcess* spForThisPosition=NULL;
	82
	83	if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) {
	84	spForThisPosition = spVec[ spAttributesVec[pos] -1];
	85	} else {
	86	errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)");
	87	}
	88
	89	computeML_siteSpecificRate(pos,sc,spForThisPosition,treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol);
	90	ratesV[pos] = bestR;
	91	assert(log(likelihoodsV[pos])>0.0);
	92	Lsum += log(likelihoodsV[pos]);
	93	LOG(6,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	94	}
	95	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	96	return Lsum;
	97	}
	98	/********************************************************************************************
	99	ML - AttributesVecs (1.1)
	100	*********************************************************************************************/
	101	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	102	Vdouble & likelihoodsV,
	103	const Vint& treeAttributesVec, //treeAttributesVec
	104	const vector<tree> & etVec,
	105	const stochasticProcess& sp,
	106	const sequenceContainer& sc,
	107	const MDOUBLE maxRate,
	108	const MDOUBLE tol) {
	109	Vint spAttributesVec(sc.seqLen(),1);
	110	vector<const stochasticProcess* > spVec;
	111	spVec.push_back(&sp);
	112	return computeML_siteSpecificRate(ratesV,likelihoodsV,
	113	spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
	114	}
	115	/********************************************************************************************
	116	ML - AttributesVecs (1.1)
	117	*********************************************************************************************/
	118	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	119	Vdouble & likelihoodsV,
	120	const Vint& spAttributesVec, // spAttributesVec
	121	const tree & et,
	122	const vector<const stochasticProcess* > & spVec,
	123	const sequenceContainer& sc,
	124	const MDOUBLE maxRate,
	125	const MDOUBLE tol){
	126	Vint treeAttributesVec(sc.seqLen(),1);
	127	vector<tree> etVec;
	128	etVec.push_back(et);
	129	return computeML_siteSpecificRate(ratesV,likelihoodsV,
	130	spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
	131	}
	132
	133
	134
	135	// THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. //
	136	/********************************************************************************************
	137	EB_EXP - full data (1)
	138	*********************************************************************************************/
	139	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	140	Vdouble & stdV,
	141	Vdouble & lowerBoundV,
	142	Vdouble & upperBoundV,
	143	const sequenceContainer& sc,
	144	const stochasticProcess& sp,
	145	const tree& et,
	146	const MDOUBLE alphaConf,
	147	VVdouble* LpostPerCat, //2 fill (*LpostPerCat)[cat][pos]
	148	unObservableData* unObservableData_p)
	149	{
	150	ratesV.resize(sc.seqLen());
	151	stdV.resize(sc.seqLen());
	152	lowerBoundV.resize(sc.seqLen());
	153	upperBoundV.resize(sc.seqLen());
	154
	155	computePijGam cpg;
	156	cpg.fillPij(et,sp);
	157	for (int pos=0; pos < sc.seqLen(); ++pos) {
	158	computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,unObservableData_p);
	159	LOG(6,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	160	}
	161	LOG(6,<<" number of sites: "<<sc.seqLen()<<endl);
	162	}
	163
	164
	165	/********************************************************************************************
	166	EB_EXP - per Pos (1.1)
	167	*********************************************************************************************/
	168	void computeEB_EXP_siteSpecificRate(int pos,
	169	const sequenceContainer& sc,
	170	const stochasticProcess& sp,
	171	const computePijGam& cpg,
	172	const tree &et,
	173	MDOUBLE& bestRate,
	174	MDOUBLE & stdRate,
	175	MDOUBLE & lowerConf,
	176	MDOUBLE & upperConf,
	177	const MDOUBLE alphaConf, // alpha of 0.05 is considered 0.025 for each side.
	178	VVdouble* LpostPerCat, //2 fill (*LpostPerCat)[cat][pos]
	179	unObservableData* unObservableData_p)
	180	{
	181	// here we compute P(r \| data)
	182	VdoubleRep pGivenR(sp.categories(),0.0);
	183	doubleRep sum=0;
	184	doubleRep LofPos_givenRateCat;
	185	LOG(8,<<pos+1<<"\t"); //DEBUG
	186	for (int cat=0; cat < sp.categories(); ++cat) {
	187	LofPos_givenRateCat = likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp);
	188
	189	// ver1 - fix likelihoodForEachCat by LforMissingDataPerCat
	190	//if(unObservableData_p){
	191	// LofPos_givenRateCat = LofPos_givenRateCat/(1- unObservableData_p->getLforMissingDataPerCat()[cat]);
	192	//}
	193	// ver2 - fix likelihoodForEachCat by LforMissingDataAll
	194	if(unObservableData_p){
	195	LofPos_givenRateCat = LofPos_givenRateCat/(1- exp(unObservableData_p->getlogLforMissingData()));
	196	}
	197	pGivenR[cat] = LofPos_givenRateCat * sp.ratesProb(cat);
	198	sum+=pGivenR[cat];
	199	}
	200	LOG(8,<<"\n"); //DEBUG
	201	assert(sum!=0);
	202
	203	// here we compute sigma r * P(r \| data)
	204	doubleRep sumOfSquares(0.0);
	205	doubleRep bestRate_dblRep(0.0);
	206
	207	LOG(6,<<"Pos "<<pos<<" content = "<<sc[0][pos]<<" ,total likelihood = "<<sum<<endl); //DEBUG
	208
	209	for (int j=0; j < sp.categories(); ++j) {
	210	pGivenR[j]/=sum; // So that pGivenR is probability.
	211	// From here on we can convert it back
	212	// to MDOUBLE because it's not a very
	213	// small likelihood any more
	214
	215	// ver3 - fix likelihoodForEachCat after multiplied by Prob - Error??
	216	//if(unObservableData_p){
	217	// pGivenR[j] = pGivenR[j]/(1- (unObservableData_p->getLforMissingDataPerCat())[j]) ; // Note: each postProbCat corrected by unObs of a
	218	//}
	219
	220	if (LpostPerCat){
	221	(*LpostPerCat)[j][pos]= convert(pGivenR[j]);
	222	}
	223	doubleRep tmp = pGivenR[j]*sp.rates(j);
	224	LOG(8,<<j<<"\t"<<sp.rates(j)<<"\t"<<convert(pGivenR[j])<<"\t"); //DEBUG
	225	bestRate_dblRep += tmp;
	226	sumOfSquares += (tmp*sp.rates(j));
	227	}
	228
	229	bestRate = convert(bestRate_dblRep);
	230	MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate);
	231	MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important
	232	if (varRate<-tolerance)
	233	LOGnOUT(3,<<"Error in computeEB_EXP_siteSpecificRate pos="<<pos<<", varRate="<<varRate<<" (< 0) \n");
	234	if ((varRate<0) && (varRate>=-tolerance))
	235	varRate = 0;
	236	stdRate = sqrt(varRate);
	237
	238	// detecting the confidence intervals.
	239	MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
	240	MDOUBLE cdf = 0.0; // cumulative density function.
	241	MDOUBLE lower_interval = 0;
	242	MDOUBLE total_interval = 0;
	243	int k=0;
	244	while (k < sp.categories()){
	245	cdf += convert(pGivenR[k]);
	246	if (cdf >oneSideConfAlpha) {
	247	if(k>0) {
	248	lowerConf = sp.rates(k-1);
	249	lower_interval = convert(pGivenR[k-1]);
	250	}
	251	else {
	252	lowerConf = 0;
	253	lower_interval = 0;
	254	}
	255	break;
	256	}
	257	k++;
	258	}
	259	while (k < sp.categories()) {
	260	if (cdf >(1.0-oneSideConfAlpha)) {
	261	upperConf = sp.rates(k);
	262	total_interval = cdf - lower_interval;
	263	break;
	264	}
	265	++k;
	266	cdf += convert(pGivenR[k]);
	267	}
	268	if (k==sp.categories()) {
	269	upperConf = sp.rates(k-1);
	270	total_interval = 1.0 - lower_interval;
	271	}
	272	LOG(7,<<"Pos: "<<pos<<", conf_interval= "<<total_interval<<endl);
	273	}
	274
	275	// THE PROPORTIONAL BAYESIAN EB_EXP PART OF RATE ESTIMATION. //
	276	/********************************************************************************************
	277	EB_EXP - full data (1)
	278	*********************************************************************************************/
	279	void computeEB_EXP_siteSpecificRateProportional(Vdouble & ratesV,
	280	Vdouble & stdV,
	281	Vdouble & lowerBoundV,
	282	Vdouble & upperBoundV,
	283	const vector<sequenceContainer>& scVec,
	284	multipleStochasticProcess& msp,
	285	const gammaDistribution* pProportionDist,
	286	const tree& et,
	287	const MDOUBLE alphaConf,
	288	VVdouble* LpostPerCat) //2 fill (*LpostPerCat)[globalRate][localRate][pos]
	289	{
	290	ratesV.resize(scVec.size());
	291	stdV.resize(scVec.size());
	292	lowerBoundV.resize(scVec.size());
	293	upperBoundV.resize(scVec.size());
	294
	295	for (int gene=0; gene < scVec.size(); ++gene) {
	296	computeEB_EXP_siteSpecificRateProportional(gene,scVec[gene],msp,pProportionDist,et,ratesV[gene],stdV[gene],lowerBoundV[gene],upperBoundV[gene],alphaConf,LpostPerCat);
	297	LOG(6,<<" rate of gene "<<gene<<" = "<<ratesV[gene]<<endl);
	298	}
	299	LOG(6,<<" number of genes: "<<scVec.size()<<endl);
	300	}
	301
	302
	303	/********************************************************************************************
	304	EB_EXP - per Pos (1.1)
	305	*********************************************************************************************/
	306	void computeEB_EXP_siteSpecificRateProportional(int gene,
	307	const sequenceContainer& sc,
	308	multipleStochasticProcess& msp,
	309	const gammaDistribution* pProportionDist,
	310	const tree &et,
	311	MDOUBLE& bestRate,
	312	MDOUBLE & stdRate,
	313	MDOUBLE & lowerConf,
	314	MDOUBLE & upperConf,
	315	const MDOUBLE alphaConf,
	316	VVdouble* LpostPerCat)
	317	{
	318
	319	// here we compute P(r \| data)
	320	VdoubleRep pGivenR(pProportionDist->categories(),0.0);
	321	doubleRep sum=0;
	322	doubleRep LofGene_givenRateCat = 0.0;
	323	LOG(8,<<gene+1<<"\t"); //DEBUG
	324	for (int cat=0; cat < pProportionDist->categories(); ++cat) {
	325	msp.getSp(gene)->setGlobalRate(pProportionDist->rates(cat));
	326	computePijGam cpg;
	327	cpg.fillPij(et,*msp.getSp(gene));
	328	for (int k=0; k < sc.seqLen(); ++k) {
	329	LofGene_givenRateCat += likelihoodComputation::getLofPosProportional(k,//pos,
	330	et, //const tree&
	331	sc, // sequenceContainer& sc,
	332	cpg, //const computePijGam& ,
	333	*msp.getSp(gene)); //removed the prior of the globar rate categ cause it is multiplied below
	334	}
	335	pGivenR[cat] = LofGene_givenRateCat*pProportionDist->ratesProb(cat);
	336	sum+=pGivenR[cat];
	337	}
	338	LOG(8,<<"\n"); //DEBUG
	339	assert(sum!=0);
	340
	341	// here we compute sigma r * P(r \| data)
	342	doubleRep sumOfSquares(0.0);
	343	doubleRep bestRate_dblRep(0.0);
	344
	345	for (int j=0; j < pProportionDist->categories(); ++j) {
	346	pGivenR[j]/=sum; // So that pGivenR is probability.
	347	// From here on we can convert it back
	348	// to MDOUBLE because it's not a very
	349	// small likelihood any more
	350
	351	if (LpostPerCat){
	352	(*LpostPerCat)[j][gene]= convert(pGivenR[j]);
	353	}
	354	doubleRep tmp = pGivenR[j]*pProportionDist->rates(j);
	355	LOG(8,<<j<<"\t"<<pProportionDist->rates(j)<<"\t"<<convert(pGivenR[j])<<"\t"); //DEBUG
	356	bestRate_dblRep += tmp;
	357	sumOfSquares += (tmp*pProportionDist->rates(j));
	358	}
	359
	360	bestRate = convert(bestRate_dblRep);
	361	MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate);
	362	MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important
	363	if (varRate<-tolerance)
	364	LOGnOUT(3,<<"Error in computeEB_EXP_siteSpecificRateProportional gene="<<gene<<", varRate="<<varRate<<" (< 0) \n");
	365	if ((varRate<0) && (varRate>=-tolerance))
	366	varRate = 0;
	367	stdRate = sqrt(varRate);
	368
	369
	370	// detecting the confidence intervals.
	371	MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
	372	MDOUBLE cdf = 0.0; // cumulative density function.
	373	MDOUBLE lower_interval = 0;
	374	MDOUBLE total_interval = 0;
	375	int k=0;
	376	while (k < pProportionDist->categories()){
	377	cdf += convert(pGivenR[k]);
	378	if (cdf >oneSideConfAlpha) {
	379	if(k>0) {
	380	lowerConf = pProportionDist->rates(k-1);
	381	lower_interval = convert(pGivenR[k-1]);
	382	}
	383	else {
	384	lowerConf = 0;
	385	lower_interval = 0;
	386	}
	387	break;
	388	}
	389	k++;
	390	}
	391	while (k < pProportionDist->categories()) {
	392	if (cdf >(1.0-oneSideConfAlpha)) {
	393	upperConf = pProportionDist->rates(k);
	394	total_interval = cdf - lower_interval;
	395	break;
	396	}
	397	++k;
	398	cdf += convert(pGivenR[k]);
	399	}
	400	if (k==pProportionDist->categories()) {
	401	upperConf = pProportionDist->rates(k-1);
	402	total_interval = 1.0 - lower_interval;
	403	}
	404	LOG(7,<<"Gene: "<<gene<<", conf_interval= "<<total_interval<<endl);
	405	}
	406
	407	/********************************************************************************************
	408	EB_EXP - full data AttributesVecs (1)
	409	*********************************************************************************************/
	410	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	411	Vdouble & stdV,
	412	Vdouble & lowerBoundV,
	413	Vdouble & upperBoundV,
	414	const Vint& spAttributesVec,
	415	const Vint& treeAttributesVec,
	416	const sequenceContainer& sc,
	417	const vector<tree> & etVec,
	418	const vector<const stochasticProcess *> & spVec,
	419	const MDOUBLE alphaConf){
	420	ratesV.resize(sc.seqLen());
	421	stdV.resize(sc.seqLen());
	422	lowerBoundV.resize(sc.seqLen());
	423	upperBoundV.resize(sc.seqLen());
	424	for (int treeNum=0; treeNum<etVec.size(); ++treeNum) {
	425	for (int spNum = 0; spNum<spVec.size(); ++spNum) {
	426	computePijGam cpg;
	427	cpg.fillPij(etVec[treeNum],*(spVec[spNum]));
	428	for (int pos=0; pos < sc.seqLen(); ++pos) {
	429	if (((spAttributesVec[pos]-1)!=spNum ) \|\| ((treeAttributesVec[pos]-1)!=treeNum )) continue;
	430	const tree* treeForThisPosition=NULL;
	431	assert ((etVec.size() >0 ) && (treeAttributesVec[pos]>0));
	432	treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
	433	const stochasticProcess* spForThisPosition=NULL;
	434	assert ((spVec.size() >0 ) && (spAttributesVec[pos]>0));
	435	spForThisPosition = spVec[ spAttributesVec[pos] -1];
	436	computeEB_EXP_siteSpecificRate(pos,sc,spForThisPosition,cpg, treeForThisPosition,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf);
	437	LOG(6,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	438	}
	439	}
	440	}
	441	LOG(6,<<" number of sites: "<<sc.seqLen()<<endl);
	442	}
	443
	444	/********************************************************************************************
	445	EB_EXP - AttributesVecs - one tree many sps
	446	*********************************************************************************************/
	447	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	448	Vdouble & stdV,
	449	Vdouble & lowerBoundV,
	450	Vdouble & upperBoundV,
	451	const Vint& spAttributesVec,
	452	const sequenceContainer& sc,
	453	const tree & et,
	454	const vector<const stochasticProcess *> & spVec,
	455	const MDOUBLE alphaConf){
	456	Vint etAttributesVec(sc.seqLen(),1);
	457	vector<tree> etVec;
	458	etVec.push_back(et);
	459	computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf);
	460	}
	461
	462	/********************************************************************************************
	463	EB_EXP - AttributesVecs - one sp many trees
	464	*********************************************************************************************/
	465	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	466	Vdouble & stdV,
	467	Vdouble & lowerBoundV,
	468	Vdouble & upperBoundV,
	469	const Vint& treeAttributesVec,
	470	const sequenceContainer& sc,
	471	const vector<tree> & etVec,
	472	const stochasticProcess & sp,
	473	const MDOUBLE alphaConf){
	474	Vint spAttributesVec(sc.seqLen(),1);
	475	vector<const stochasticProcess* > spVec;
	476	spVec.push_back(&sp);
	477	computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf);
	478	}
	479

+168

-0

libs/phylogeny/siteSpecificRate.h less more

	0	// $Id: siteSpecificRate.h 11008 2012-10-16 21:54:04Z rubi $
	1
	2	#ifndef ___SITE_SPECIFIC_RATE
	3	#define ___SITE_SPECIFIC_RATE
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include "multipleStochasticProcess.h"
	10	#include "computePijComponent.h"
	11	#include "unObservableData.h"
	12
	13
	14	// the function returns the total log-likelihood of the rates.
	15	// it is used for computing the rates, when there is one tree common to
	16	// all positions and 1 stochastic process common to all position.
	17
	18	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	19	Vdouble & likelihoodsV,
	20	const sequenceContainer& sd,
	21	const stochasticProcess& sp,
	22	const tree& et,
	23	const MDOUBLE maxRate=20.0f,
	24	const MDOUBLE tol=0.0001f);
	25
	26	// this function is the same as the one above, but here, each site can have its
	27	//own tree, or its own stochastic process.
	28	//etVec: a vector of possible trees.
	29	//spVec: a vector of possible stochastic processes.
	30	//treeAttributesVec: defines which tree is assigned to a specific position.
	31	//NOTE: the possible attributes are 1,2..., so that the tree for position i
	32	//is etVec[treeAttributesVec[i]-1]
	33	//The same is true for the stochastic process atributes vector.
	34	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	35	Vdouble & likelihoodsV,
	36	const Vint& spAttributesVec,
	37	const Vint& treeAttributesVec,
	38	const vector<tree> & etVec,
	39	const vector<const stochasticProcess *> & spVec,
	40	const sequenceContainer& sc,
	41	const MDOUBLE maxRate,
	42	const MDOUBLE tol);
	43
	44	// this function is the same as the one above, but here,
	45	// there are only tree attributes.
	46	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	47	Vdouble & likelihoodsV,
	48	const Vint& treeAttributesVec,
	49	const vector<tree> & etVec,
	50	const stochasticProcess& sp,
	51	const sequenceContainer& sc,
	52	const MDOUBLE maxRate,
	53	const MDOUBLE tol);
	54
	55	// this function is the same as the one above, but here,
	56	// there are only stochastic process attributes.
	57	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	58	Vdouble & likelihoodsV,
	59	const Vint& spAttributesVec,
	60	const tree & et,
	61	const vector<const stochasticProcess* > & spVec,
	62	const sequenceContainer& sc,
	63	const MDOUBLE maxRate,
	64	const MDOUBLE tol);
	65
	66	void computeML_siteSpecificRate(int pos,
	67	const sequenceContainer& sc,
	68	const stochasticProcess& sp,
	69	const tree &et,
	70	MDOUBLE& bestRate,
	71	MDOUBLE& posL,
	72	const MDOUBLE maxRate,
	73	const MDOUBLE tol);
	74
	75	// BAYESIAN PART
	76
	77	// 1 sequence container, 1 tree, 1 position
	78	void computeEB_EXP_siteSpecificRate(int pos,
	79	const sequenceContainer& sc,
	80	const stochasticProcess& sp,
	81	const computePijGam& cpg,
	82	const tree &et,
	83	MDOUBLE& bestRate,
	84	MDOUBLE & stdRate,
	85	MDOUBLE & lowerConf,
	86	MDOUBLE & upperConf,
	87	const MDOUBLE alphaConf,
	88	VVdouble* LpostPerCat=NULL,
	89	unObservableData* unObservableData_p=NULL);
	90
	91	// 1 stochastic process, 1 tree, all positions
	92	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	93	Vdouble & stdV,
	94	Vdouble & lowerBoundV,
	95	Vdouble & upperBoundV,
	96	const sequenceContainer& sc,
	97	const stochasticProcess& sp,
	98	const tree& et,
	99	const MDOUBLE alphaConf,
	100	VVdouble* LpostPerCat=NULL,
	101	unObservableData* unObservableData_p=NULL);
	102
	103
	104	// many stochastic process, many tree, all positions
	105	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	106	Vdouble & stdV,
	107	Vdouble & lowerBoundV,
	108	Vdouble & upperBoundV,
	109	const Vint& spAttributesVec,
	110	const Vint& treeAttributesVec,
	111	const sequenceContainer& sc,
	112	const vector<tree> & etVec,
	113	const vector<const stochasticProcess *> & spVec,
	114	const MDOUBLE alphaConf);
	115
	116	// many stochastic process, 1 tree, all positions
	117	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	118	Vdouble & stdV,
	119	Vdouble & lowerBoundV,
	120	Vdouble & upperBoundV,
	121	const Vint& spAttributesVec,
	122	const sequenceContainer& sc,
	123	const tree & et,
	124	const vector<const stochasticProcess *> & spVec,
	125	const MDOUBLE alphaConf);
	126
	127	// 1 stochastic process, many tree, all positions
	128	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	129	Vdouble & stdV,
	130	Vdouble & lowerBoundV,
	131	Vdouble & upperBoundV,
	132	const Vint& treeAttributesVec,
	133	const sequenceContainer& sc,
	134	const vector<tree> & etVec,
	135	const stochasticProcess & sp,
	136	const MDOUBLE alphaConf);
	137
	138
	139	// PROPORTIONAL BAYESIAN PART
	140
	141	// Many stochastic processes controlled by their distribution, 1 sequence container, 1 tree, 1 position
	142	void computeEB_EXP_siteSpecificRateProportional(int gene,
	143	const sequenceContainer& sc,
	144	multipleStochasticProcess& msp,
	145	const gammaDistribution* pProportionDist,
	146	const tree &et,
	147	MDOUBLE& bestRate,
	148	MDOUBLE & stdRate,
	149	MDOUBLE & lowerConf,
	150	MDOUBLE & upperConf,
	151	const MDOUBLE alphaConf,
	152	VVdouble* LpostPerCat=NULL);
	153
	154	// Many stochastic processes controlled by their distribution, 1 tree, all positions
	155	void computeEB_EXP_siteSpecificRateProportional(Vdouble & ratesV,
	156	Vdouble & stdV,
	157	Vdouble & lowerBoundV,
	158	Vdouble & upperBoundV,
	159	const vector<sequenceContainer>& sc,
	160	multipleStochasticProcess& msp,
	161	const gammaDistribution* pProportionDist,
	162	const tree& et,
	163	const MDOUBLE alphaConf,
	164	VVdouble* LpostPerCat=NULL);
	165
	166	#endif
	167

+299

-0

libs/phylogeny/siteSpecificRateGL.cpp less more

	0	// $Id: siteSpecificRate.cpp 3658 2008-03-05 09:25:46Z cohenofi $
	1
	2	#include "siteSpecificRateGL.h"
	3	#include "numRec.h"
	4	#include "checkcovFanctors.h"
	5	#include "definitions.h"
	6
	7	using namespace siteSpecificRateGL;
	8
	9	MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
	10	Vdouble & likelihoodsV,
	11	const sequenceContainer& sc,
	12	const stochasticProcess& sp,
	13	const tree& et,
	14	const MDOUBLE maxRate,//20.0f
	15	const MDOUBLE tol){//=0.0001f;
	16
	17	ratesV.resize(sc.seqLen());
	18	likelihoodsV.resize(sc.seqLen());
	19	MDOUBLE Lsum = 0.0;
	20
	21	for (int pos=0; pos < sc.seqLen(); ++pos) {
	22	siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol);
	23	assert(likelihoodsV[pos]>0.0);
	24	Lsum += log(likelihoodsV[pos]);
	25	LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	26	}
	27	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	28	return Lsum;
	29	}
	30
	31	MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
	32	Vdouble & likelihoodsV,
	33	const Vint& spAttributesVec,
	34	const Vint& treeAttributesVec,
	35	const vector<tree> & etVec,
	36	const vector<const stochasticProcess *> & spVec,
	37	const sequenceContainer& sc,
	38	const MDOUBLE maxRate,
	39	const MDOUBLE tol){
	40	MDOUBLE Lsum = 0.0;
	41	ratesV.resize(sc.seqLen()); // the rates themselves
	42	likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position
	43
	44	for (int pos=0; pos < sc.seqLen(); ++pos) {
	45	LOG(5,<<".");
	46	MDOUBLE bestR=-1.0; // tree1
	47	// MDOUBLE LmaxR1=0;
	48
	49	// getting the right tree for the specific position:
	50	const tree* treeForThisPosition=NULL;
	51	if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) {
	52	treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
	53	} else {
	54	errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)");
	55	}
	56
	57	// getting the right stochastic process for the specific position:
	58
	59	const stochasticProcess* spForThisPosition=NULL;
	60
	61	if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) {
	62	spForThisPosition = spVec[ spAttributesVec[pos] -1];
	63	} else {
	64	errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)");
	65	}
	66
	67	siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,spForThisPosition,treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol);
	68	ratesV[pos] = bestR;
	69	assert(likelihoodsV[pos]>0.0);
	70	Lsum += log(likelihoodsV[pos]);
	71	LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	72	}
	73	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	74	return Lsum;
	75	}
	76
	77	// note that this places the likelihood, rather then the loglikelihood into posL
	78	void siteSpecificRateGL::computeML_siteSpecificRate(int pos,
	79	const sequenceContainer& sc,
	80	const stochasticProcess& sp,
	81	const tree &et,
	82	MDOUBLE& bestRate,
	83	MDOUBLE& posL,
	84	const MDOUBLE maxRate,
	85	const MDOUBLE tol) {
	86	LOG(5,<<".");
	87	MDOUBLE ax=0.00001f,bx=maxRate*0.25,cx=maxRate; // MN
	88	posL=-brent(ax,bx,cx,Cevaluate_L_given_r(sc,et,sp,pos),tol,&bestRate);
	89	}
	90
	91	MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
	92	Vdouble & likelihoodsV,
	93	const Vint& treeAttributesVec,
	94	const vector<tree> & etVec,
	95	const stochasticProcess& sp,
	96	const sequenceContainer& sc,
	97	const MDOUBLE maxRate,
	98	const MDOUBLE tol) {
	99	Vint spAttributesVec(sc.seqLen(),1);
	100	vector<const stochasticProcess* > spVec;
	101	spVec.push_back(&sp);
	102	return computeML_siteSpecificRate(ratesV,likelihoodsV,
	103	spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
	104	}
	105
	106	MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
	107	Vdouble & likelihoodsV,
	108	const Vint& spAttributesVec,
	109	const tree & et,
	110	const vector<const stochasticProcess* > & spVec,
	111	const sequenceContainer& sc,
	112	const MDOUBLE maxRate,
	113	const MDOUBLE tol){
	114	Vint treeAttributesVec(sc.seqLen(),1);
	115	vector<tree> etVec;
	116	etVec.push_back(et);
	117	return siteSpecificRateGL::computeML_siteSpecificRate(ratesV,likelihoodsV,
	118	spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
	119	}
	120
	121
	122	// THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. //
	123
	124	void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(int pos,
	125	const sequenceContainer& sc,
	126	const stochasticProcess& sp,
	127	const computePijGam& cpg,
	128	const tree &et,
	129	MDOUBLE& bestRate,
	130	MDOUBLE & stdRate,
	131	MDOUBLE & lowerConf,
	132	MDOUBLE & upperConf,
	133	const MDOUBLE alphaConf, // alpha of 0.05 is considered 0.025 for each side.
	134	VVdouble* LpostPerCat,
	135	Vdouble* pLforMissingDataPerCat)
	136	{
	137	// here we compute P(r \| data)
	138	VdoubleRep pGivenR(sp.categories(),0.0);
	139	doubleRep sum=0;
	140	MDOUBLE LofPos_givenRateCat;
	141	LOG(8,<<pos+1<<"\t"); //DEBUG
	142	for (int cat=0; cat < sp.categories(); ++cat) {
	143	LofPos_givenRateCat = convert(likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp));
	144	if(pLforMissingDataPerCat){
	145	LofPos_givenRateCat = LofPos_givenRateCat/(1- (*pLforMissingDataPerCat)[cat]);
	146	}
	147	pGivenR[cat] = LofPos_givenRateCat*sp.ratesProb(cat);
	148	LOG(8,<<cat<<"\t"<<LofPos_givenRateCat<<"\t"); //DEBUG
	149	sum+=pGivenR[cat];
	150	}
	151	LOG(8,<<"\n"); //DEBUG
	152	assert(sum!=0);
	153
	154	// here we compute sigma r * P(r \| data)
	155
	156	doubleRep sumOfSquares(0.0);
	157	doubleRep bestRate_dblRep(0.0);
	158
	159	LOG(5,<<"Pos "<<pos<<" content = "<<sc[0][pos]<<" ,total likelihood = "<<sum<<endl); //DEBUG
	160
	161	for (int j=0; j < sp.categories(); ++j) {
	162	pGivenR[j]/=sum; // So that pGivenR is probability.
	163	// From here on we can convert it back
	164	// to MDOUBLE because it's not a very
	165	// small likelihood any more
	166	if (LpostPerCat){
	167	(*LpostPerCat)[j][pos]= convert(pGivenR[j]);
	168	}
	169	doubleRep tmp = pGivenR[j]*sp.rates(j);
	170	bestRate_dblRep += tmp;
	171	sumOfSquares += (tmp*sp.rates(j));
	172
	173	}
	174
	175	bestRate = convert(bestRate_dblRep);
	176	MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate);
	177	MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important
	178	if (varRate<-tolerance)
	179	errorMsg::reportError("Error in computeEB_EXP_siteSpecificRate, varRate < 0");
	180	if ((varRate<0) && (varRate>=-tolerance))
	181	varRate = 0;
	182	stdRate = sqrt(varRate);
	183
	184	// detecting the confidence intervals.
	185	MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
	186	doubleRep cdf = 0.0; // cumulative density function.
	187	int k=0;
	188	while (k < sp.categories()){
	189	cdf += convert(pGivenR[k]);
	190	if (cdf >oneSideConfAlpha) {
	191	lowerConf = sp.rates(k);
	192	break;
	193	}
	194	k++;
	195	}
	196	while (k < sp.categories()) {
	197	if (cdf >(1.0-oneSideConfAlpha)) {
	198	upperConf = sp.rates(k);
	199	break;
	200	}
	201	++k;
	202	cdf += convert(pGivenR[k]);
	203	}
	204	if (k==sp.categories()) upperConf = sp.rates(k-1);
	205	}
	206
	207	void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	208	Vdouble & stdV,
	209	Vdouble & lowerBoundV,
	210	Vdouble & upperBoundV,
	211	const sequenceContainer& sc,
	212	const stochasticProcess& sp,
	213	const tree& et,
	214	const MDOUBLE alphaConf,
	215	VVdouble* LpostPerCat,
	216	Vdouble* pLforMissingDataPerCat)
	217	{
	218	ratesV.resize(sc.seqLen());
	219	stdV.resize(sc.seqLen());
	220	lowerBoundV.resize(sc.seqLen());
	221	upperBoundV.resize(sc.seqLen());
	222
	223	computePijGam cpg;
	224	cpg.fillPij(et,sp);
	225	for (int pos=0; pos < sc.seqLen(); ++pos) {
	226	siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,pLforMissingDataPerCat);
	227	LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	228	}
	229	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	230	}
	231
	232	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	233	Vdouble & stdV,
	234	Vdouble & lowerBoundV,
	235	Vdouble & upperBoundV,
	236	const Vint& spAttributesVec,
	237	const Vint& treeAttributesVec,
	238	const sequenceContainer& sc,
	239	const vector<tree> & etVec,
	240	const vector<const stochasticProcess *> & spVec,
	241	const MDOUBLE alphaConf){
	242	ratesV.resize(sc.seqLen());
	243	stdV.resize(sc.seqLen());
	244	lowerBoundV.resize(sc.seqLen());
	245	upperBoundV.resize(sc.seqLen());
	246	for (int treeNum=0; treeNum<etVec.size(); ++treeNum) {
	247	for (int spNum = 0; spNum<spVec.size(); ++spNum) {
	248	computePijGam cpg;
	249	cpg.fillPij(etVec[treeNum],*(spVec[spNum]));
	250	for (int pos=0; pos < sc.seqLen(); ++pos) {
	251	if (((spAttributesVec[pos]-1)!=spNum ) \|\| ((treeAttributesVec[pos]-1)!=treeNum )) continue;
	252	const tree* treeForThisPosition=NULL;
	253	assert ((etVec.size() >0 ) && (treeAttributesVec[pos]>0));
	254	treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
	255	const stochasticProcess* spForThisPosition=NULL;
	256	assert ((spVec.size() >0 ) && (spAttributesVec[pos]>0));
	257	spForThisPosition = spVec[ spAttributesVec[pos] -1];
	258	siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,spForThisPosition,cpg, treeForThisPosition,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf);
	259	LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
	260	}
	261	}
	262	}
	263	LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
	264	}
	265
	266	// one tree many sps
	267	void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	268	Vdouble & stdV,
	269	Vdouble & lowerBoundV,
	270	Vdouble & upperBoundV,
	271	const Vint& spAttributesVec,
	272	const sequenceContainer& sc,
	273	const tree & et,
	274	const vector<const stochasticProcess *> & spVec,
	275	const MDOUBLE alphaConf){
	276	Vint etAttributesVec(sc.seqLen(),1);
	277	vector<tree> etVec;
	278	etVec.push_back(et);
	279	siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf);
	280	}
	281
	282	// one sp many trees
	283
	284	void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	285	Vdouble & stdV,
	286	Vdouble & lowerBoundV,
	287	Vdouble & upperBoundV,
	288	const Vint& treeAttributesVec,
	289	const sequenceContainer& sc,
	290	const vector<tree> & etVec,
	291	const stochasticProcess & sp,
	292	const MDOUBLE alphaConf){
	293	Vint spAttributesVec(sc.seqLen(),1);
	294	vector<const stochasticProcess* > spVec;
	295	spVec.push_back(&sp);
	296	siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf);
	297	}
	298

+141

-0

libs/phylogeny/siteSpecificRateGL.h less more

	0	// $Id: siteSpecificRate.h 3428 2008-01-30 12:30:46Z cohenofi $
	1
	2	#ifndef ___SITE_SPECIFIC_RATE_GL_
	3	#define ___SITE_SPECIFIC_RATE_GL_
	4
	5	#include "definitions.h"
	6	#include "tree.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include "computePijComponent.h"
	10	//#include "likelihoodComputationGL.h"
	11
	12	// the function returns the total log-likelihood of the rates.
	13	// it is used for computing the rates, when there is one tree common to
	14	// all positions and 1 stochastic process common to all position.
	15
	16	namespace siteSpecificRateGL {
	17
	18	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	19	Vdouble & likelihoodsV,
	20	const sequenceContainer& sd,
	21	const stochasticProcess& sp,
	22	const tree& et,
	23	const MDOUBLE maxRate=20.0f,
	24	const MDOUBLE tol=0.0001f);
	25
	26	// this function is the same as the one above, but here, each site can have its
	27	//own tree, or its own stochastic process.
	28	//etVec: a vector of possible trees.
	29	//spVec: a vector of possible stochastic processes.
	30	//treeAttributesVec: defines which tree is assigned to a specific position.
	31	//NOTE: the possible attributes are 1,2..., so that the tree for position i
	32	//is etVec[treeAttributesVec[i]-1]
	33	//The same is true for the stochastic process atributes vector.
	34	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	35	Vdouble & likelihoodsV,
	36	const Vint& spAttributesVec,
	37	const Vint& treeAttributesVec,
	38	const vector<tree> & etVec,
	39	const vector<const stochasticProcess *> & spVec,
	40	const sequenceContainer& sc,
	41	const MDOUBLE maxRate,
	42	const MDOUBLE tol);
	43
	44	// this function is the same as the one above, but here,
	45	// there are only tree attributes.
	46	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	47	Vdouble & likelihoodsV,
	48	const Vint& treeAttributesVec,
	49	const vector<tree> & etVec,
	50	const stochasticProcess& sp,
	51	const sequenceContainer& sc,
	52	const MDOUBLE maxRate,
	53	const MDOUBLE tol);
	54
	55	// this function is the same as the one above, but here,
	56	// there are only stochastic process attributes.
	57	MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
	58	Vdouble & likelihoodsV,
	59	const Vint& spAttributesVec,
	60	const tree & et,
	61	const vector<const stochasticProcess* > & spVec,
	62	const sequenceContainer& sc,
	63	const MDOUBLE maxRate,
	64	const MDOUBLE tol);
	65
	66	void computeML_siteSpecificRate(int pos,
	67	const sequenceContainer& sc,
	68	const stochasticProcess& sp,
	69	const tree &et,
	70	MDOUBLE& bestRate,
	71	MDOUBLE& posL,
	72	const MDOUBLE maxRate,
	73	const MDOUBLE tol);
	74
	75	// BAYESIAN PART
	76
	77	// 1 sequence container, 1 tree, 1 position
	78	void computeEB_EXP_siteSpecificRate(int pos,
	79	const sequenceContainer& sc,
	80	const stochasticProcess& sp,
	81	const computePijGam& cpg,
	82	const tree &et,
	83	MDOUBLE& bestRate,
	84	MDOUBLE & stdRate,
	85	MDOUBLE & lowerConf,
	86	MDOUBLE & upperConf,
	87	const MDOUBLE alphaConf,
	88	VVdouble* LpostPerCat=NULL,
	89	Vdouble* pLforMissingDataPerCat=NULL);
	90
	91	// 1 stochastic process, 1 tree, all positions
	92	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	93	Vdouble & stdV,
	94	Vdouble & lowerBoundV,
	95	Vdouble & upperBoundV,
	96	const sequenceContainer& sc,
	97	const stochasticProcess& sp,
	98	const tree& et,
	99	const MDOUBLE alphaConf,
	100	VVdouble* LpostPerCat=NULL,
	101	Vdouble* pLforMissingDataPerCat=NULL);
	102
	103	// many stochastic process, many tree, all positions
	104	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	105	Vdouble & stdV,
	106	Vdouble & lowerBoundV,
	107	Vdouble & upperBoundV,
	108	const Vint& spAttributesVec,
	109	const Vint& treeAttributesVec,
	110	const sequenceContainer& sc,
	111	const vector<tree> & etVec,
	112	const vector<const stochasticProcess *> & spVec,
	113	const MDOUBLE alphaConf);
	114
	115	// many stochastic process, 1 tree, all positions
	116	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	117	Vdouble & stdV,
	118	Vdouble & lowerBoundV,
	119	Vdouble & upperBoundV,
	120	const Vint& spAttributesVec,
	121	const sequenceContainer& sc,
	122	const tree & et,
	123	const vector<const stochasticProcess *> & spVec,
	124	const MDOUBLE alphaConf);
	125
	126	// 1 stochastic process, many tree, all positions
	127	void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
	128	Vdouble & stdV,
	129	Vdouble & lowerBoundV,
	130	Vdouble & upperBoundV,
	131	const Vint& treeAttributesVec,
	132	const sequenceContainer& sc,
	133	const vector<tree> & etVec,
	134	const stochasticProcess & sp,
	135	const MDOUBLE alphaConf);
	136
	137	};
	138
	139	#endif
	140

+1073

-0

libs/phylogeny/someUtil.cpp less more

	0	// $Id: someUtil.cpp 11906 2013-12-26 10:12:24Z itaymay $
	1
	2	#include "someUtil.h"
	3	#include "errorMsg.h"
	4	#include "talRandom.h"
	5	#include <cmath>
	6	#include <ctime>
	7	#include <iterator>
	8	#include <algorithm>
	9	#include <string>
	10	#include <cctype>
	11	#include <cassert>
	12	using namespace std;
	13
	14	// for the _mkdir call
	15	#if defined(WIN32) \|\| defined(SunOS) \|\| defined(solaris)
	16	#include <direct.h>
	17	#else
	18	#include <sys/file.h>
	19	#include <dirent.h>
	20	// #include <io.h>
	21	#endif
	22
	23	//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
	24	//used in functoin mnbrack below.
	25	void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d) {
	26	a=b;
	27	b=c;
	28	c=d;
	29	}
	30
	31	MDOUBLE computeAverage(const vector<int>& vec) {
	32	MDOUBLE sum=0.0;
	33	for (int i=0; i < vec.size(); ++i) {
	34	sum+=static_cast<MDOUBLE>(vec[i]);
	35	}
	36	return sum/static_cast<MDOUBLE>(vec.size());
	37	}
	38
	39	// X ~ Poisson(lamda) --> P(X=k) = ((lamda^k)/k!) * e^(-lamda)
	40	// It isn't smart to first calculate factorial(k) because the size of long int limits this calculation to k<=13
	41	MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda)
	42	{
	43	assert(k>=0);
	44	long double tmp = pow(lamda,k); // tmp = (lamda^k)/k!
	45
	46	for (int i=2; i<=k; ++i)
	47	tmp/=i;
	48
	49	return (tmp * exp(-lamda));
	50	}
	51
	52
	53	MDOUBLE computeAverage(const vector<MDOUBLE>& vec, const Vdouble* weightsV) {
	54	MDOUBLE sum=0.0;
	55	if(weightsV && !(weightsV->size() == vec.size() ))
	56	errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values");
	57	for (int i=0; i < vec.size(); ++i){
	58	if(weightsV)
	59	sum+=vec[i]* (*weightsV)[i];
	60	else
	61	sum+=vec[i];
	62	}
	63	return sum/static_cast<MDOUBLE>(vec.size());
	64	}
	65
	66	MDOUBLE computeAverageOfAbs(const vector<MDOUBLE>& vec, const Vdouble* weightsV) {
	67	MDOUBLE sum=0.0;
	68	if(weightsV && !(weightsV->size() == vec.size() ))
	69	errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values");
	70	for (int i=0; i < vec.size(); ++i){
	71	if(weightsV)
	72	sum+=abs(vec[i]* (*weightsV)[i]);
	73	else
	74	sum+=abs(vec[i]);
	75	}
	76	return sum/static_cast<MDOUBLE>(vec.size());
	77	}
	78
	79	MDOUBLE computeMedian(const vector<MDOUBLE>& vec) {
	80	int vecSize = vec.size();
	81	if (vecSize<1)
	82	return 0;
	83	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	84	for (int x =0; x < vecSize ; ++x)
	85	{
	86	sortVec[x].setValue(vec[x]);
	87	sortVec[x].setPlace(x);
	88	}
	89	sort(sortVec.begin(), sortVec.end());
	90	sort(sortVec.begin(), sortVec.end());
	91
	92	int highMedianIndex;
	93	if(vecSize>1)
	94	highMedianIndex = int((vecSize+1)/2);
	95	else
	96	highMedianIndex = int((vecSize)/2); // thus, highMedianIndex==0
	97
	98
	99	MDOUBLE median = sortVec[highMedianIndex].getValue();
	100	return median;
	101	}
	102
	103	//// if quantile=0.5, the median is returned, if quantile=0.1, the low-ton-percentile is returned, quantile=0.9, the top-90-percentile is returned
	104	MDOUBLE computeQuantileFrac(const vector<MDOUBLE>& vec, MDOUBLE quantile) {
	105	int vecSize = vec.size();
	106	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	107	for (int x =0; x < vecSize ; ++x)
	108	{
	109	sortVec[x].setValue(vec[x]);
	110	sortVec[x].setPlace(x);
	111	}
	112	sort(sortVec.begin(), sortVec.end());
	113	sort(sortVec.begin(), sortVec.end());
	114
	115	int qIndex = int((vecSize+1)*quantile);
	116	MDOUBLE quantileVal = sortVec[qIndex].getValue();
	117	return quantileVal;
	118	}
	119
	120	//// if quantile=2, the median is returned, if quantile=10, the ten-percentile is returned
	121	MDOUBLE computeQuantile(const vector<MDOUBLE>& vec, MDOUBLE quantile) {
	122	MDOUBLE dividerForRank = 1+ 1.0/(quantile-1);
	123
	124	int vecSize = vec.size();
	125	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	126	for (int x =0; x < vecSize ; ++x)
	127	{
	128	sortVec[x].setValue(vec[x]);
	129	sortVec[x].setPlace(x);
	130	}
	131	sort(sortVec.begin(), sortVec.end());
	132	sort(sortVec.begin(), sortVec.end());
	133
	134	int qIndex = int((vecSize+1)/dividerForRank);
	135	MDOUBLE quantileVal = sortVec[qIndex].getValue();
	136	return quantileVal;
	137	}
	138
	139
	140	MDOUBLE computeStd(const vector<int>& vec) {// page 60, Sokal and Rohlf
	141	MDOUBLE sum=0.0;
	142	MDOUBLE sumSqr=0.0;
	143	MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
	144	for (int i=0; i < vec.size(); ++i) {
	145	sum+=static_cast<MDOUBLE>(vec[i]);
	146	sumSqr+=(static_cast<MDOUBLE>(vec[i])*static_cast<MDOUBLE>(vec[i]));
	147	}
	148	MDOUBLE res= sumSqr-(sum*sum/vecSize);
	149	res /= (vecSize-1.0);
	150	res = sqrt(res);
	151	return res;
	152	}
	153
	154	MDOUBLE computeStd(const vector<MDOUBLE>& vec) {// page 60, Sokal and Rohlf
	155	MDOUBLE sum=0.0;
	156	MDOUBLE sumSqr=0.0;
	157	MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
	158	for (int i=0; i < vec.size(); ++i) {
	159	sum+=vec[i];
	160	sumSqr+=(vec[i]*vec[i]);
	161	}
	162	MDOUBLE res= sumSqr-(sum*sum/vecSize);
	163	res /= (vecSize-1.0);
	164	res = sqrt(res);
	165	return res;
	166	}
	167
	168	void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs){
	169	MDOUBLE proportionAfterOptimization = 1.0 - newValFreq;
	170	MDOUBLE proportionBeforeOptimization = 1.0 - freqs[indexNewFreq];
	171	MDOUBLE sum = 0.0;
	172	for (int i=0; i<freqs.size(); ++i) {
	173	if (i==indexNewFreq){
	174	freqs[i] = newValFreq;
	175	}
	176	else {
	177	freqs[i] = proportionAfterOptimization*freqs[i]/proportionBeforeOptimization;
	178	}
	179	sum+=freqs[i];
	180	}
	181	if (!DEQUAL(sum,1.0)) {
	182	errorMsg::reportError("Error in computeRelativeFreqsFollowingOneChanged, sum not equal to 1");
	183	}
	184	}
	185
	186
	187	char mytolower(char in){return tolower(in);}
	188	char mytoupper(char in){return toupper(in);}
	189
	190	void toLower(string& str) {
	191	transform (str.begin(), str.end(), str.begin(), mytolower);
	192	}
	193	void toUpper(string& str) {
	194	transform (str.begin(), str.end(), str.begin(), mytoupper);
	195	}
	196	string toUpper2(const string& str)
	197	{
	198	string res("");
	199	transform (str.begin(), str.end(), res.begin(), mytoupper);
	200	return res;
	201	}
	202
	203	bool allowCharSet(const string& allowableChars, const string& string2check) {
	204	// this function check if all the character in string2check are made of characters from allowableChars
	205	for (int i=0; i < string2check.size(); ++i) {
	206	// now checking for string2check[i]
	207	int j;
	208	for (j=0; j < allowableChars.size(); ++j) {
	209	if (string2check[i] == allowableChars[j]) {
	210	break;
	211	}
	212	}
	213	if (j==allowableChars.size()) return false;
	214	}
	215	return true;
	216	}
	217
	218	bool isCharInString(const string& stringToCheck, const char charToCheck) {
	219	for (int i=0; i < stringToCheck.size(); ++i ) {
	220	if (stringToCheck[i] == charToCheck) return true;
	221	}
	222	return false;
	223	}
	224
	225	string double2string(const double x, const int lenght, bool round){
	226
	227	// first getting the integer part:
	228	//Itay: fixing bug regarding negative floats
	229	double x_abs = fabs(x);
	230	int theIntegerPart = static_cast<int>(x_abs);
	231	double theRemainingPart = fabs(x_abs-theIntegerPart);
	232	int integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght));
	233	if (round)
	234	integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght)+0.5);
	235
	236	string part1 = int2string(theIntegerPart);
	237	string part2 = int2string(integerRepresentingTheRemainingPart);
	238	while (part2.length()<lenght){
	239	part2.insert(0, "0");
	240	}
	241
	242	string result("");
	243	if (x < 0.0)
	244	result += "-";
	245	result += part1;
	246	result += ".";
	247	result += part2;
	248
	249	// removing 0 from the end
	250	int i = result.length()-1;
	251	while (result[i]!='.' && i>0 && result[i]=='0'){
	252	result.erase(i);
	253	i--;
	254	}
	255
	256	// removing "." if this is the last character in the string.
	257	if (result[result.length()-1]=='.')
	258	result.erase(result.length()-1);
	259
	260	return result;
	261	}
	262
	263	string int2string(const int num) {
	264	// the input to this program is say 56
	265	// the output is the string "56"
	266	// this version of int2string is more portable
	267	// than sprintf like functions from c;
	268	// or sstream of stl.
	269	if (num == 0) return "0";
	270	string res;
	271	int i = abs(num);
	272
	273
	274	int leftover;
	275	char k;
	276	while (i) {
	277	leftover = i%10;
	278	k = '0'+leftover;
	279	res = k+res;
	280	i/=10;
	281	}
	282	if (num<0) res = "-" + res;
	283	return res;
	284	};
	285
	286	void printTime(ostream& out) {
	287	time_t ltime;
	288	time( &ltime );
	289	out<<"# the date is "<< ctime( &ltime )<<endl;
	290	}
	291
	292	MDOUBLE string2double(const string& inString) {
	293
	294	if (allowCharSet("0123456789.eE+-",inString) == false) {
	295	errorMsg::reportError(" error in function string2double ");
	296	}
	297
	298	// first decide if the format is like 0.00343 (regularFormat) or
	299	// if it is in the form of 0.34e-006 for example
	300
	301	bool regularFormat = true;
	302	int i;
	303	for (i=0; i < inString.size(); ++i) {
	304	if ((inString[i] == 'e' ) \|\| (inString[i] == 'E' )) {
	305	regularFormat = false;
	306	break;
	307	}
	308	}
	309
	310	if (regularFormat) {
	311	MDOUBLE dDistance = atof(inString.c_str());
	312	return dDistance;
	313	}
	314	else {
	315	string b4TheExp;
	316	bool plusAfterTheExp = true;
	317	string afterTheExp;
	318
	319	// b4 the exp
	320	for (i=0; i < inString.size(); ++i) {
	321	if ((inString[i] != 'e' ) && (inString[i] != 'E' )){
	322	b4TheExp += inString[i];
	323	}
	324	else break;
	325	}
	326	++i; //now standing after the exp;
	327	if (inString[i] == '-' ) {
	328	plusAfterTheExp = false;
	329	++i;
	330	}
	331	else if (inString[i] == '+' ) {
	332	plusAfterTheExp = true;
	333	++i;
	334	}
	335	else plusAfterTheExp = true; // the number is like 0.34e43
	336
	337	for (; i < inString.size(); ++i) {
	338	afterTheExp += inString[i];
	339	}
	340
	341	MDOUBLE res = 0.0;
	342	MDOUBLE dDistance = atof(b4TheExp.c_str());
	343	int exponentialFactor = atoi(afterTheExp.c_str());
	344	if (plusAfterTheExp) res = dDistance * pow(10.0,exponentialFactor);
	345	else res = dDistance * pow(10.0,-exponentialFactor);
	346	return res;
	347	}
	348
	349
	350	}
	351
	352
	353	bool checkThatFileExist(const string& fileName) {
	354	ifstream file1(fileName.c_str());
	355	if (file1==NULL) return false;
	356	file1.close();
	357	return true;
	358	}
	359
	360	void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile){
	361	inseqFile.clear();
	362	string tmp1;
	363	while (getline(infile,tmp1, '\n' ) ) {
	364	if (tmp1.empty()) continue;
	365	if (tmp1.size() > 100000) { // was 15000
	366	vector<string> err;
	367	err.push_back("Unable to read file. It is required that each line is no longer than");
	368	err.push_back("15000 characters. ");
	369	errorMsg::reportError(err,1);
	370	}
	371	if (tmp1[tmp1.size()-1]=='\r') {// in case we are reading a dos file
	372	tmp1.erase(tmp1.size()-1);
	373	}// remove the traling carrige-return
	374	inseqFile.push_back(tmp1);
	375	}
	376	}
	377
	378	bool fromStringIterToInt(string::const_iterator & it, // ref must be here
	379	const string::const_iterator endOfString,
	380	int& res) {// the ref is so that we can use the it after the func.
	381	while (it != endOfString) {
	382	if ((it == ' ') \|\| (it == '\t')) ++it;else break; // skeeping white spaces.
	383	}
	384	if (it != endOfString) {
	385	if (isdigit(it) \|\| (it == '-')){
	386	int k = atoi(&*it);
	387	if (*it == '-') ++it;
	388	for (int numDig = abs(k); numDig>0; numDig/=10) ++it;
	389	res = k;
	390	return true;
	391	}
	392	else return false; //unable to read int From String
	393	}
	394	return false; //unable to read int From String
	395
	396	}
	397
	398	string* searchStringInFile(const string& string2find,
	399	const int index,
	400	const string& inFileName) {
	401	ifstream f;
	402	f.open(inFileName.c_str());
	403	if (f==NULL) {
	404	string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
	405	errorMsg::reportError(tmp);
	406	}
	407
	408	string numm = int2string(index);
	409	string realString2find = string2find+numm;
	410
	411	istream_iterator<string> is_string(f);
	412	istream_iterator<string> end_of_stream;
	413
	414	is_string = find(is_string,end_of_stream,realString2find);
	415	if(is_string == end_of_stream) {f.close();return NULL;}
	416	else {
	417	is_string++;
	418	if(is_string == end_of_stream) {f.close();return NULL;};
	419	string* s = new string(*is_string);
	420	f.close();
	421	return s;
	422	}
	423	f.close();
	424	return NULL;
	425	}
	426	string* searchStringInFile(const string& string2find,
	427	const string& inFileName) {// return the string that is AFTER the string to search.
	428	ifstream f;
	429	f.open(inFileName.c_str());
	430	if (f==NULL) {
	431	string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
	432	errorMsg::reportError(tmp);
	433	}
	434	string realString2find = string2find;
	435
	436	istream_iterator<string> is_string(f);
	437	istream_iterator<string> end_of_stream;
	438
	439	is_string = find(is_string,end_of_stream,realString2find);
	440	if(is_string == end_of_stream) {f.close();return NULL;}
	441	else {
	442	is_string++;
	443	if(is_string == end_of_stream) {f.close();return NULL;};
	444	string* s = new string(*is_string);
	445	f.close();
	446	return s;
	447	}
	448	f.close();
	449	return NULL;
	450	}
	451	bool doesWordExistInFile(const string& string2find,const string& inFileName) {
	452	ifstream f;
	453	f.open(inFileName.c_str());
	454	if (f==NULL) {
	455	string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
	456	errorMsg::reportError(tmp);
	457	}
	458
	459	istream_iterator<string> is_string(f);
	460	istream_iterator<string> end_of_stream;
	461
	462	is_string = find(is_string,end_of_stream,string2find);
	463	if(is_string == end_of_stream) return false;
	464	else return true;
	465	}
	466
	467	string takeCharOutOfString(const string& charsToTakeOut, const string& fromString) {
	468	string finalString;
	469	for (int i=0; i<fromString.size(); ++i) {
	470	bool goodChar = true;
	471	for (int j=0; j < charsToTakeOut.size(); ++j) {
	472	if (fromString[i]== charsToTakeOut[j]) goodChar = false;
	473	}
	474	if (goodChar) finalString+=fromString[i];
	475	}
	476	return finalString;
	477	}
	478
	479	bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/1.192092896e-07F/) {
	480	return (fabs(x1-x2)<epsilon);
	481	}
	482
	483	bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/1.192092896e-07F/){
	484	return ((x1 > x2) \|\| DEQUAL(x1, x2,epsilon));
	485	}
	486
	487
	488	bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/1.192092896e-07F/){
	489	return ((x1 < x2) \|\| DEQUAL(x1, x2,epsilon));
	490	}
	491
	492	void createDir(const string & curDir, const string & dirName){// COPYRIGHT OF ITAY MAYROSE.
	493	string newDir;
	494	if (curDir == "")
	495	newDir = dirName;
	496	else
	497	newDir = curDir + string("/") + dirName;
	498	#ifdef WIN32
	499	if( _mkdir(newDir.c_str()) == 0 ){
	500	LOG(5, << "Directory " <<newDir<<" was successfully created\n"<<endl);
	501	}else{
	502	if (errno == EEXIST) {
	503	LOG(5,<<"Directory already exist\n");
	504	return;
	505	} else {
	506	string err = "Problem creating directory " + newDir + " \n";
	507	LOG(5, << err << endl);
	508	errorMsg::reportError(err);
	509	}
	510	}
	511	#else
	512	DIR * directory = opendir(newDir.c_str());
	513	if (directory == NULL) {
	514	string sysCall = "mkdir " + newDir;
	515	system(sysCall.c_str());
	516	}
	517	else{
	518	string err = "Directory " + newDir + " already exists \n";
	519	LOG(5, << err << endl);
	520	//errorMsg::reportError(err);
	521
	522	}
	523	#endif
	524	}
	525
	526	//scale vecToScale so that its new average is AvgIn. return the scaling factor.
	527	MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn)
	528	{
	529	int vecSize = vecToScale.size();
	530	MDOUBLE sum = 0;
	531	for (int x = 0; x<vecSize; ++x)
	532	{
	533	sum += vecToScale[x];
	534	}
	535	MDOUBLE avg = sum/vecSize;
	536	MDOUBLE scaleFactor = avgIn / avg;
	537
	538	for (int i = 0; i<vecSize; ++i)
	539	{
	540	vecToScale[i] *= scaleFactor;
	541	}
	542
	543	MDOUBLE newAvg = computeAverage(vecToScale);
	544	if (fabs(newAvg - avgIn) > 0.001)
	545	errorMsg::reportError(" problem - scalled average is not avgIn after scalling!!!");
	546	return scaleFactor;
	547	}
	548
	549	//calculates the mean square error distance between 2 vectors:
	550	MDOUBLE calcMSEDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
	551	{
	552	MDOUBLE res = 0.0;
	553	if (oneRatesVec.size() != otherRatesVec.size())
	554	errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	555
	556	for (int i=0; i<oneRatesVec.size(); ++i)
	557	{
	558	MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
	559	res += diff * diff;
	560	}
	561
	562	res /= oneRatesVec.size();
	563	return res;
	564	}
	565
	566	//calculates the mean absolute deviations distance between 2 vectors:
	567	MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
	568	{
	569	MDOUBLE res = 0.0;
	570	if (oneRatesVec.size() != otherRatesVec.size())
	571	errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	572
	573	for (int i=0; i<oneRatesVec.size(); ++i)
	574	{
	575	MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
	576	res += fabs(diff);
	577	}
	578
	579	res /= oneRatesVec.size();
	580	return res;
	581	}
	582
	583	MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/0.0/)
	584	{
	585	MDOUBLE res = 0.0;
	586	if (inferredValues.size() != trueValues.size())
	587	errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	588
	589	int counter = 0;
	590	for (int i=0; i<inferredValues.size(); ++i)
	591	{
	592	if (trueValues[i] < threshhold)
	593	continue;
	594	MDOUBLE diff = fabs(inferredValues[i] - trueValues[i]);
	595	res += (diff / trueValues[i]);
	596	++counter;
	597	}
	598
	599	res /= counter;
	600	return res;
	601	}
	602
	603	//calculates the relative mean square error distance between 2 vectors:
	604	//The difference from a regualar MSE is that for each position the squared difference is devided by the true value
	605	//if threshhold > 0: if trueValues[i] < threshhold then do not add the rse for this psition to the result
	606	MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/0.0/ )
	607	{
	608	MDOUBLE res = 0.0;
	609	if (inferredValues.size() != trueValues.size())
	610	errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	611
	612	int counter = 0;
	613	for (int i=0; i<inferredValues.size(); ++i)
	614	{
	615	if (trueValues[i] < threshhold)
	616	continue;
	617	MDOUBLE diff = inferredValues[i] - trueValues[i];
	618	res += diff * diff / trueValues[i];
	619	++counter;
	620	}
	621
	622	res /= counter;
	623	return res;
	624	}
	625
	626
	627	MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
	628	{
	629	MDOUBLE res = 0.0;
	630	Vdouble orderVec1, orderVec2;
	631	MDOUBLE s_one = orderVec(oneRatesVec, orderVec1);
	632	MDOUBLE s_two = orderVec(otherRatesVec, orderVec2);
	633	int seqLength = oneRatesVec.size();
	634
	635	MDOUBLE diff, sum_diff_sqr = 0;
	636	for (int i=0; i<seqLength; ++i)
	637	{
	638	diff = orderVec1[i] - orderVec2[i];
	639	sum_diff_sqr += pow(diff, 2);
	640	}
	641
	642	MDOUBLE en3n = (seqLength * (pow(static_cast<double>(seqLength), 2.0) -1)); //n^3 -n
	643	MDOUBLE numerator = 1.0 - ((6/en3n) * (sum_diff_sqr + (s_one + s_two)/12.0));
	644	MDOUBLE denum = sqrt((1.0 - s_one/en3n) * (1.0 - s_two/en3n));
	645	res = numerator/ denum;
	646	return res;
	647	}
	648
	649	/********************************************************************************************
	650	calculates the spearman rank correlation value, Ofir implementation
	651	*********************************************************************************************/
	652	MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
	653	{
	654	int vecLen = oneRatesVec.size();
	655	if(vecLen != otherRatesVec.size())
	656	errorMsg::reportError("calcRankCorrelation2. Vectors length differ");
	657
	658	Vdouble orderVec1, orderVec2;
	659	orderRankNoTies(oneRatesVec, orderVec1);
	660	orderRankNoTies(otherRatesVec, orderVec2);
	661
	662	MDOUBLE n = (double)vecLen;
	663	MDOUBLE dif,r,sum_dif=0;
	664	for (int i=0; i<vecLen; ++i)
	665	{
	666	dif = orderVec1[i] - orderVec2[i];
	667	sum_dif += pow(dif, 2);
	668	}
	669	r=1-(6sum_dif)/(n(pow (n,2)-1));
	670	return r;
	671	}
	672
	673
	674	/********************************************************************************************
	675	Pearson's correlation co-efficient
	676	*********************************************************************************************/
	677	MDOUBLE calcPearsonCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec, const int numberOfSignificantDigits)
	678	{
	679	MDOUBLE res = 0.0;
	680
	681	//MDOUBLE cov = calcCoVariance(oneRatesVec, otherRatesVec);
	682	//MDOUBLE sdOne = computeStd(oneRatesVec);
	683	//MDOUBLE sdOther = computeStd(otherRatesVec);
	684	//res = cov/(sdOne*sdOther);
	685
	686	int seqLength = oneRatesVec.size();
	687	MDOUBLE meanOne = computeAverage(oneRatesVec);
	688	MDOUBLE meanOther = computeAverage(otherRatesVec);
	689	MDOUBLE cov = 0;
	690	MDOUBLE sdOne = 0;
	691	MDOUBLE sdOther = 0;
	692	for (int i=0; i<seqLength; ++i)
	693	{
	694	cov+=(oneRatesVec[i]-meanOne)*(otherRatesVec[i]-meanOther); // crossProductSum
	695	sdOne+=(oneRatesVec[i]-meanOne)*(oneRatesVec[i]-meanOne); // sqDevX
	696	sdOther+=(otherRatesVec[i]-meanOther)*(otherRatesVec[i]-meanOther); // sqDevY
	697	}
	698	res = cov/ (sqrt(sdOne)*sqrt(sdOther)); // no need to divide by seqLength -1, since canceled out
	699
	700	MDOUBLE rounded = floorf(res * pow(10.0,numberOfSignificantDigits) + 0.5) / pow(10.0,numberOfSignificantDigits); // if not rounded, perfect correlations may return 1.000002, for example
	701	return rounded;
	702	}
	703
	704	/********************************************************************************************
	705	Benjamini–Hochberg–Yekutieli procedure controls the false discovery rate
	706	*********************************************************************************************/
	707	MDOUBLE computeFDRthreshold(Vdouble& pVals, MDOUBLE levelOfFDRcontroled, bool isPValsSorted){
	708	MDOUBLE FDRthreshold = 0;
	709	if(!isPValsSorted)
	710	sort(pVals.begin(),pVals.end());
	711	int indexAll = pVals.size();
	712	for (int i=0; i<pVals.size(); ++i){
	713	MDOUBLE correctedVal = (double)(i+1)/(double)indexAll *levelOfFDRcontroled;
	714	if( pVals[i] <= correctedVal){
	715	FDRthreshold = pVals[i];
	716	}
	717	}
	718	return FDRthreshold;
	719	}
	720
	721	MDOUBLE calcCoVariance(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
	722	{
	723	MDOUBLE cov = 0.0;
	724	MDOUBLE elemMulti = 0.0;
	725	int seqLength = oneRatesVec.size();
	726	MDOUBLE meanOne = computeAverage(oneRatesVec);
	727	MDOUBLE meanOther = computeAverage(otherRatesVec);
	728	for (int i=0; i<seqLength; ++i)
	729	{
	730	elemMulti += (oneRatesVec[i]-meanOne) * (otherRatesVec[i]-meanOther);
	731	}
	732	cov = elemMulti/(seqLength -1);
	733	return cov;
	734	}
	735
	736
	737	ostream &operator<<(ostream &out, const Vdouble &v){
	738	for (int j=0;j<v.size();++j)
	739	out<< v[j]<<" ";
	740	out <<endl;
	741	return(out);
	742	}
	743
	744	ostream &operator<<(ostream &out, const VVdouble &m){
	745	for (int i=0;i<m.size();++i)
	746	out<<m[i];
	747	out <<endl;
	748	return(out);
	749	}
	750
	751	void mult(Vdouble& vec, const MDOUBLE factor){
	752	for(int i=0;i<vec.size();++i)
	753	vec[i]*=factor;
	754	}
	755
	756	void mult(VVdouble& vec, const MDOUBLE factor){
	757	for(int i=0;i<vec.size();++i)
	758	mult(vec[i],factor);
	759	}
	760
	761
	762
	763	////orderVec - determine the relative order of vecIn
	764	////returns orderVecOut[i] is the rank of vecIn[i]
	765	////note that in case of ties the rank will be the midrank of the tied group
	766	//Vdouble orderVec(const Vdouble& vecIn)
	767	//{
	768	// int vecSize = vecIn.size();
	769	// Vdouble orderVecOut(vecSize);
	770	// vector< vecElem<MDOUBLE> > sortVec(vecSize);
	771	// for (int x =0; x < vecSize ; ++x)
	772	// {
	773	// sortVec[x].setValue(vecIn[x]);
	774	// sortVec[x].setPlace(x);
	775	// }
	776	// sort(sortVec.begin(), sortVec.end());
	777	//
	778	// //check for ties and correct their rank
	779	// Vdouble rankVec(vecSize);
	780	// MDOUBLE rank;
	781	// for (int i=0; i < vecSize; )
	782	// {
	783	// if (sortVec[i].getValue() != sortVec[i+1].getValue())
	784	// {//no tie
	785	// rankVec[i] = i;
	786	// ++i;
	787	// }
	788	// else
	789	// {//tie
	790	// int to =0;
	791	// for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
	792	// ;//check how far the tie goes
	793	// to--;
	794	// rank = 0.5*(to + i);
	795	// for (int ji = i; ji<= to; ji++)
	796	// {
	797	// rankVec[ji] = rank;
	798	// }
	799	//
	800	// i = to+1;
	801	// }
	802	// }
	803	// for (int j =0; j < vecSize; ++j) {
	804	// assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
	805	// orderVecOut[sortVec[j].getPlace()] = rankVec[j];
	806	// }
	807	// return orderVecOut;
	808	//}
	809
	810	//orderVec - determine the relative order of vecIn
	811	//orderVecOut[i] is the rank of vecIn[i]
	812	//note that in case of ties the rank will be the midrank of the tied group
	813	//return sum of n^3 - n where n is the number of elements in each tied group - see spearman rank correlation
	814	MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut)
	815	{
	816	int vecSize = vecIn.size();
	817	orderVecOut.resize(vecSize);
	818	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	819	for (int x =0; x < vecSize ; ++x)
	820	{
	821	sortVec[x].setValue(vecIn[x]);
	822	sortVec[x].setPlace(x);
	823	}
	824	sort(sortVec.begin(), sortVec.end());
	825	//check for ties and correct their rank
	826	Vdouble rankVec(vecSize);
	827	MDOUBLE sumRankDif = 0; //sum(Fk^3 - Fk)
	828
	829	MDOUBLE rank;
	830	for (int i=0; i < vecSize-1; ) // loop was till vecSize, out of range with sortVec[i+1]. Fixed (?)
	831	{
	832	if (sortVec[i].getValue() != sortVec[i+1].getValue())
	833	{//no tie
	834	rankVec[i] = i;
	835	++i;
	836	}
	837	else
	838	{//tie
	839	int to =0;
	840	for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
	841	;//check how far the tie goes
	842	to--;
	843	rank = 0.5*(to + i);
	844	for (int ji = i; ji<= to; ji++)
	845	{
	846	rankVec[ji] = rank;
	847	}
	848
	849	int numTies = to - i +1; //number o fties in this group
	850	sumRankDif += numTiesnumTiesnumTies - numTies;
	851	i = to+1;
	852	}
	853	}
	854
	855	for (int j =0; j < vecSize; ++j) {
	856	assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
	857	orderVecOut[sortVec[j].getPlace()] = rankVec[j];
	858	}
	859	return sumRankDif;
	860	}
	861
	862
	863	void orderRankNoTies(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut)
	864	{
	865	int vecSize = vecIn.size();
	866	orderVecOut.resize(vecSize);
	867	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	868	for (int x =0; x < vecSize ; ++x)
	869	{
	870	sortVec[x].setValue(vecIn[x]);
	871	sortVec[x].setPlace(x);
	872	}
	873	sort(sortVec.begin(), sortVec.end());
	874	for (int j =0; j < vecSize; ++j) {
	875	orderVecOut[sortVec[j].getPlace()] = j;
	876	}
	877
	878	}
	879
	880
	881
	882	void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut)
	883	{
	884	int vecSize = vecIn.size();
	885	orderVecOut.resize(vecSize);
	886	for (int x =0; x < vecSize ; ++x)
	887	{
	888	orderVecOut[x].setValue(vecIn[x]);
	889	orderVecOut[x].setPlace(x);
	890	}
	891	sort(orderVecOut.begin(), orderVecOut.end());
	892	}
	893
	894
	895	void splitString2(string str, string seperater, string &first, string &second)
	896	{
	897	int i = (int)str.find(seperater); //find seperator
	898	if(i != -1)
	899	{
	900	int y = 0;
	901	if(!str.empty())
	902	{
	903	while(y != i)
	904	{
	905	first += str[y++]; //creating first string
	906	}
	907	y = y+(int)seperater.length(); //jumping forward seperater length
	908	while(y != str.length())
	909	{
	910	second += str[y++]; //creating second string
	911	}
	912
	913	}
	914	}
	915	else
	916	{
	917	first = str;
	918	second = "NULL"; //if seperator is not there then second string == null
	919	}
	920	}
	921
	922
	923	void splitString(const string& str,vector<string>& subStrs,const string& delimiter)
	924	{
	925	// Skip delimiter at beginning.
	926	string::size_type lastPos = str.find_first_not_of(delimiter,0);
	927	// Find first "non-delimiter".
	928	string::size_type pos = str.find_first_of(delimiter,lastPos);
	929
	930	while (string::npos != pos \|\| string::npos != lastPos)
	931	{
	932	// Found a subStr, add it to the vector.
	933	subStrs.push_back(str.substr(lastPos,pos - lastPos));
	934	// Skip delimiter. Note the "not_of"
	935	lastPos = str.find_first_not_of(delimiter,pos);
	936	// Find next "non-delimiter"
	937	pos = str.find_first_of(delimiter,lastPos);
	938	}
	939	}
	940
	941	Vint getVintFromStr(const string& inStr)
	942	{
	943	Vint res;
	944	vector<string> outStr;
	945	splitString(inStr, outStr, ",");
	946	for (int i = 0; i < outStr.size(); ++i)
	947	{
	948	int x = atoi(outStr[i].c_str());
	949	res.push_back(x);
	950	}
	951	return res;
	952	}
	953
	954	string getStrFromVint(const Vint& inVec)
	955	{
	956	string res("");
	957	for (int i = 0; i < inVec.size(); ++i)
	958	{
	959	if (i > 0)
	960	res += ",";
	961	res += int2string(inVec[i]);
	962	}
	963	return res;
	964	}
	965
	966
	967	/********************************************************************************************
	968	gainLoss project
	969	*********************************************************************************************/
	970	int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories){
	971	int gainIndex;
	972	if(lossCategories<=gainCategories){
	973	gainIndex = (int)floor((double)i/(lossCategories) );
	974	}
	975	else{
	976	gainIndex = i%(gainCategories);
	977	}
	978	return gainIndex;
	979	}
	980
	981	int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories){
	982	int lossIndex;
	983	if(lossCategories<=gainCategories){
	984	lossIndex = i%(lossCategories);
	985	}
	986	else{
	987	lossIndex = (int)floor((double)i/(gainCategories) );
	988	}
	989	return lossIndex;
	990	}
	991
	992	int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities)
	993	{
	994	for (int loop = 0 ; loop < 100000 ; ++loop)
	995	{
	996	MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
	997	MDOUBLE sum = 0.0;
	998	for (int state = 0; state < alphabetSize; ++state)
	999	{
	1000	sum += changeProbabilities[beginningState][state];
	1001	if (theRandNum < sum) {
	1002	return state;
	1003	}
	1004	}
	1005	}
	1006	errorMsg::reportError("giveRandomState: could not give random character. The reason is unknown.");
	1007	return 1;
	1008
	1009	}
	1010
	1011	int giveRandomState(const int alphabetSize, const Vdouble &frequencies) {
	1012	for (int loop =0 ;loop<100000 ;loop++) {
	1013	MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(0.999);
	1014	MDOUBLE sum = 0.0;
	1015	for (int j=0; j < alphabetSize;++j) {
	1016	sum+=frequencies[j];
	1017	if (theRandNum<sum) return j;
	1018	}
	1019	}
	1020	errorMsg::reportError("giveRandomState: Could not give random character. The reason is probably that the frequencies do not sum to one.");
	1021	return 1;
	1022	}
	1023	/********************************************************************************************
	1024	additional Math functions
	1025	*********************************************************************************************/
	1026	int sign(MDOUBLE r){
	1027	if(r>0) return 1;
	1028	else return -1;
	1029	}
	1030	MDOUBLE factorial(int x) {
	1031	MDOUBLE fac = 1;
	1032	for (int i=2; i<=x; i++)
	1033	fac *= i;
	1034	return fac;
	1035	}
	1036	MDOUBLE BinomialCoeff(int a, int b) {
	1037	return factorial(a)/(factorial(b)*factorial(a-b));
	1038	}
	1039
	1040	MDOUBLE exponentResolver(Vdouble& valuesVec){
	1041	//First find largest element in valuesVec
	1042	MDOUBLE largest = VERYSMALL;
	1043	int largestIndex = -1;
	1044	for(int i = 0;i < valuesVec.size();++i){
	1045	if(valuesVec[i] > largest){
	1046	largest = valuesVec[i];
	1047	largestIndex = i;
	1048	}
	1049	}
	1050	if(largestIndex == -1){
	1051	errorMsg::reportError("exponentResolver: Could not find the largest element in the input vector");
	1052	return 1;
	1053	}
	1054	//Now sum over all elements that are greater than -50. Note that exp(-50) is way smaller than the famous EPSILON so we are pretty safe from neglecting anything significant
	1055	MDOUBLE sum = 1.0;
	1056	MDOUBLE cutoff = -50;
	1057	for(int i = 0;i < valuesVec.size();++i){
	1058	if(i == largestIndex) continue;
	1059	if((valuesVec[i]-largest) < cutoff) continue;
	1060	sum += exp(valuesVec[i]-largest);
	1061	}
	1062	MDOUBLE result = largest+log(sum);
	1063	return(result);
	1064	}
	1065
	1066	MDOUBLE sumVdouble(const Vdouble & vec){
	1067	MDOUBLE sum = 0.0;
	1068	for(int i = 0;i < vec.size();++i){
	1069	sum += vec[i];
	1070	}
	1071	return(sum);
	1072	}

+189

-0

libs/phylogeny/someUtil.h less more

	0	// $Id: someUtil.h 11905 2013-12-26 10:12:03Z itaymay $
	1
	2	#ifndef ___SOME_UTIL_H
	3	#define ___SOME_UTIL_H
	4
	5	#include "logFile.h"
	6	#include "definitions.h"
	7	#include "alphabet.h"
	8	#include <string>
	9	#include <iostream>
	10	using namespace std;
	11
	12	//to be used for orderVec
	13	template <class T>
	14	class vecElem
	15	{
	16	public:
	17	vecElem();
	18	virtual ~vecElem() {};
	19	void setValue(const T val) {m_value = val;}
	20	T getValue() {return m_value;}
	21	void setPlace(const int place) {m_place = place;}
	22	int getPlace() {return m_place;}
	23	inline bool operator< (const vecElem& elemIn) const;
	24	private:
	25	int m_place;
	26	T m_value;
	27	};
	28
	29
	30	template <class T>
	31	vecElem< T >::vecElem()
	32	{
	33	m_value = -1;
	34	m_place = -1;
	35	}
	36
	37	//template <class T>
	38	//vecElement< T >::~vecElement()
	39	//{
	40	//}
	41	template <class T>
	42	bool vecElem< T >::operator<(const vecElem& elemIn) const
	43	{
	44	if (m_value == elemIn.m_value)
	45	return (m_place < elemIn.m_place);
	46	else
	47	return (m_value < elemIn.m_value);
	48	}
	49
	50
	51
	52	// STATISTICAL UTILITIES:
	53
	54	MDOUBLE computeAverage(const vector<int>& vec);
	55	MDOUBLE computeAverage(const vector<MDOUBLE>& vec, const Vdouble* weightsV = NULL);
	56	MDOUBLE computeAverageOfAbs(const vector<MDOUBLE>& vec, const Vdouble* weightsV = NULL);
	57	MDOUBLE computeMedian(const vector<MDOUBLE>& vec);
	58	MDOUBLE computeQuantile(const vector<MDOUBLE>& vec, MDOUBLE quantile);
	59	MDOUBLE computeQuantileFrac(const vector<MDOUBLE>& vec, MDOUBLE quantile);
	60	MDOUBLE computeStd(const vector<MDOUBLE>& vec);// page 60, Sokal and Rohlf
	61	MDOUBLE computeStd(const vector<int>& vec);// page 60, Sokal and Rohlf
	62	MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda);
	63	// re-computes a vector of frequencies after one value is changed:
	64	// all other values are set according to their relative value
	65	void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs);//freqs is the old vector into which we write the new values
	66
	67	// SIMULATIONS:
	68	int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities);
	69	int giveRandomState(const int alphabetSize, const Vdouble &frequencies);
	70
	71	// TIME UTILITIES
	72	void printTime(ostream& out);
	73
	74	// TEXT UTILITIES
	75	string int2string(const int i);
	76	string double2string(const double x, int const howManyDigitsAfterTheDot=5, bool round = false);
	77	MDOUBLE string2double(const string& inString);
	78	bool allowCharSet(const string& allowableChars, const string& string2check);
	79	bool isCharInString(const string& stringToCheck, const char charToCheck);
	80	void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile);
	81
	82	bool fromStringIterToInt(string::const_iterator & it,
	83	const string::const_iterator endOfString,
	84	int& res);
	85
	86	string takeCharOutOfString(const string& charsToTakeOut, const string& fromString);
	87	void toLower(string& str);
	88	void toUpper(string& str);
	89	string toUpper2(const string& str);
	90	//splits the string to substr according to the given delimiter (parallel to split in perl)
	91	void splitString(const string& str,vector<string>& subStrs,const string& delimiter);
	92	//input: a list of INTs seperated by commas ("1,3,5") returns the int in the vector
	93	Vint getVintFromStr(const string& str);
	94	//return a list of INTs seperated by commas ("1,3,5")
	95	string getStrFromVint(const Vint& inVec);
	96
	97	// FILE UTILITIES
	98	bool checkThatFileExist(const string& fileName);
	99	string* searchStringInFile(const string& string2find,
	100	const int index,
	101	const string& inFileName);
	102	string* searchStringInFile(const string& string2find,
	103	const string& inFileName);
	104	bool doesWordExistInFile(const string& string2find,const string& inFileName);
	105	void createDir(const string& curDir,const string& dirName);
	106
	107
	108	//BIT UTILITIES
	109	//void nextBit(bitset<64> &cur);
	110
	111	//ARITHMETIC UTILITIES
	112	//DEQUAL: == UP TO EPSILON
	113	//DBIG_EQUAL: >= UP TO EPSILON
	114	//DSMALL_EQUAL: <= UP TO EPSILON
	115	bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // epsilon taken from WINDOW'S FILE FLOAT.H
	116	bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F);
	117	bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // {return ((x1 < x2) \|\| DEQUAL(x1, x2));}
	118
	119	//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
	120	//used in functoin mnbrack below.
	121	void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d);
	122
	123
	124	// print vector and VVdoulbe util
	125	ostream &operator<<(ostream &out, const Vdouble &v);
	126	ostream &operator<<(ostream &out, const VVdouble &m);
	127	void mult(Vdouble& vec, const MDOUBLE factor);
	128	void mult(VVdouble& vec, const MDOUBLE factor);
	129	//scale vecToScale so that its new average is AvgIn. return the scaling factor.
	130	MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn);
	131	//determine the relative order of vecIn. The order vector is returned
	132	//ex: vecIn = [0.1 0.4 0.01 0.9 1.8] orderVecOut = [1 2 0 3 4]
	133	MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut);
	134
	135	void orderRankNoTies(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut);
	136
	137	//in this version orderVecOut does not preserv the same order as vecIn.
	138	//orderVecOut[0] cotains the lowest score and it is stored in orderVecOut[0].getValue()
	139	//The place in the original vector is stored in orderVecOut[0].getPlace()
	140	void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut);
	141
	142	//calculates the spearman rank correlation value
	143	MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
	144	//calculates the spearman rank correlation value, Ofir implementation
	145	MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
	146
	147	MDOUBLE calcCoVariance(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
	148	MDOUBLE calcPearsonCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec, const int numberOfSignificantDigits=5);
	149
	150	MDOUBLE computeFDRthreshold(Vdouble& sortedPVals, MDOUBLE levelOfFDRcontroled, bool isPValsSorted=false);
	151
	152
	153	MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
	154	MDOUBLE calcMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues);
	155	//MAD = mean absolute deviations distance
	156	MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
	157	MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
	158	MDOUBLE sumVdouble(const Vdouble & vec);
	159
	160	/* Will split a string into 2 by the given seperator
	161	Example for usage:
	162	string a, b, c;
	163	a.assign("Hello world!");
	164	splitString2(a, " ", b, c);
	165	cout << "b = " << b << endl << "c = " << c << endl;
	166	//b == Hello
	167	//c == world!
	168	*/
	169	void splitString2(string str, string seperater, string &first, string &second);
	170
	171	// used for gainLoss project
	172	int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories);
	173	int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories);
	174
	175	int sign(MDOUBLE r);
	176	MDOUBLE factorial(int x);
	177	MDOUBLE BinomialCoeff(int a, int b);
	178	int round2int(MDOUBLE num);
	179
	180	//This function does: ln(e(valuesVec[0])+e(valuesVec[1])+..e**(valuesVec[n]))
	181	//Which is: ln(e*(valuesVec[x]))(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x])))
	182	//Which is: valuesVec[x]+ln(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x])))
	183	//Where: x is the index of the largest element in valuesVec and every valuesVec[i] which is really small should be neglected in order to avoid underflow
	184	MDOUBLE exponentResolver(Vdouble& valuesVec);
	185
	186
	187	#endif
	188

+84

-0

libs/phylogeny/split.cpp less more

	0	// $Id: split.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "split.h"
	3	#include <cassert>
	4	#include <algorithm>
	5	using namespace std;
	6
	7	// there are always two options. Either the active set is _set[0] or _set[1].
	8	// this depends on the parameter _reverse.
	9	// The "1" will always be in the active set.
	10	// so, for example consider the leaves [0,1,2] (_max = 3).
	11	// The split {}{0,1,2} can be represented by both the empty split {} or the
	12	// {0,1,2} split. Because the {0,1,2} split contains the "0" - this will be the active split.
	13	// so we set _set[0] to be empty, and in _set[1] which is the active one (_reverse = true)
	14	// we insert the leaves.
	15	split::split (const int max): _max(max), _reverse(true){
	16	for(int j=0;j<max;++j) {
	17	_set[1].insert(j);
	18	}
	19	}
	20
	21	// isMember searches for the key in the active set.
	22	bool split::isMember(const int key) const {
	23	return(_set[_reverse].find(key)!=_set[_reverse].end());
	24	}
	25
	26
	27	void split::reverseMembership(const int key){
	28	assert(key<_max && key >= 0);
	29
	30	// where is the key now
	31	// if the key is member, than in = _reverese;
	32	// Otherwise in = !_reverse
	33	bool in =(isMember(key))?_reverse:!_reverse;
	34
	35	_set[in].erase(key);
	36	_set[!in].insert(key);
	37	if (key==0) // if we add "0", we need to reverse the split
	38	reverse();
	39	};
	40
	41
	42	int split::size() const {
	43	int tmp = _set[_reverse].size();
	44	return (tmp<_max-tmp?tmp:_max-tmp);
	45	}
	46
	47	void split::print(ostream& sout) const{ // = cout
	48	sout <<"size ="<<size()<<" ";
	49	set<int>::const_iterator i;
	50	for (i=_set[_reverse].begin();i != _set[_reverse].end();++i)
	51	sout << *i << " ";
	52	sout <<" \| ";
	53	for (i=_set[!_reverse].begin();i != _set[!_reverse].end();++i)
	54	sout << *i << " ";
	55	sout << endl;
	56	}
	57
	58	bool split::lessThen(const split& other) const{
	59	return(_set[_reverse]<other._set[other._reverse]);
	60	}
	61
	62	bool split::compatible(const split& other) const {
	63	set<int>::const_iterator i (_set[_reverse].begin());
	64	set<int>::const_iterator i_end (_set[_reverse].end());
	65	set<int>::const_iterator j (other._set[other._reverse].begin());
	66	set<int>::const_iterator j_end (other._set[other._reverse].end());
	67	return (includes(i,i_end,j,j_end) \|\| includes(j,j_end,i,i_end));
	68	}
	69
	70	void split::reverse(){ // actualy reverse membership in the set
	71	_reverse=!_reverse;
	72	}
	73
	74	bool operator<(const split& a, const split& b) {
	75	return(a.lessThen(b));
	76	}
	77
	78	ostream& operator<< (ostream &sout, const split& split) {
	79	split.print(sout);
	80	return sout;
	81	}
	82
	83

+75

-0

libs/phylogeny/split.h less more

	0	// $Id: split.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___SPLIT
	3	#define ___SPLIT
	4
	5	#include "definitions.h"
	6	#include <set>
	7	#include <vector>
	8	#include <iostream>
	9	#include <cassert>
	10	using namespace std;
	11
	12
	13	// this split always has the member "1" in it.
	14	// if not, it will take the reverse of the split, so that it dose have the "1" member.
	15
	16	class split {
	17	public:
	18	explicit split (const int max=0); // empty split
	19
	20	// get an itarator of members and the max member.
	21
	22	template<class Iterator>
	23	split (Iterator& i,
	24	Iterator& end,
	25	int max):_max(max), _reverse(true){
	26	for(int j=0;j<max;++j)
	27	_set[1].insert(j);
	28
	29	for (;i!=end;++i){
	30	assert((i)<_max && (i) >= 0);
	31	_set[0].insert(*i);
	32	_set[1].erase(*i);
	33	if (*i==0) // if we add "0", we may need to reverse the split
	34	reverse();
	35	}
	36	}
	37
	38	bool isMember(const int key) const;
	39	int size() const ;
	40	void print(ostream& sout = cout) const;
	41	bool lessThen(const split& other) const;
	42	bool compatible(const split& other) const ;
	43
	44	// remove the key from the active set to the non-active set or vice versa.
	45	// for example if the split is {0,1 \| 2}
	46	// reverseMembership(1) will change the split to this one: {0 \| 1,2 }
	47	void reverseMembership(const int key);
	48
	49	void getId(vector<int> & id) const {
	50	id.clear();
	51	bool small(_set[0].size()>_set[1].size());
	52	for (set<int>::const_iterator i=_set[small].begin();i!=_set[small].end();++i)
	53	id.push_back(*i);
	54	}
	55
	56	private:
	57	void reverse();
	58
	59
	60	int _max; // max element. all elements are asumed to be in the range [1..max]
	61	set<int> _set[2];
	62	bool _reverse;
	63	};
	64
	65	bool operator<(const split& a,
	66	const split& b) ;
	67
	68
	69
	70	ostream& operator<< (ostream &sout, const split& split) ;
	71
	72
	73
	74	#endif // ___SPLIT

+50

-0

libs/phylogeny/splitMap.cpp less more

	0	// $Id: splitMap.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "splitMap.h"
	3	#include <algorithm>
	4	using namespace std;
	5
	6	int splitMap::add(const split & in) { // add a split and return it's new count.
	7	return(_map[in]=_map[in]+1);
	8	}
	9
	10	class valCmp {
	11	public:
	12	bool operator()(const pair<split,int> & elem1, const pair<split,int> & elem2) {
	13	return (elem1.second > elem2.second);
	14	}
	15	};
	16
	17	vector<pair<split,int> > splitMap::sortSplits() const{
	18	vector<pair<split,int> > svec(_map.size());
	19	partial_sort_copy(_map.begin(),_map.end(),svec.begin(),svec.end(),valCmp());
	20	return svec;
	21	}
	22
	23	int splitMap::counts(const split& in) const {
	24	mapSplitInt::const_iterator i(_map.find(in));
	25	if (i==_map.end()) return 0;
	26	return i->second;
	27	}
	28
	29	void splitMap::print(ostream& sout) const {// default cout.
	30	for (mapSplitInt::const_iterator i = _map.begin(); i != _map.end();++i) {
	31	sout << i->second<<"\t"<<i->first;
	32	}
	33	sout <<endl;
	34	}
	35
	36
	37	ostream& operator<< (ostream &sout, const splitMap& split_map) {
	38	split_map.print(sout);
	39	return sout;
	40	}
	41
	42	/*splitMap::reverse_mapSplitInt splitMap::reverse() const
	43	{
	44	reverse_sMap_t rmap;
	45	for (sMap_t::const_iterator i=_map.begin(); i!=_map.end();++i)
	46	rmap.insert(rMapPair_t(i->second,i->first));
	47	return rmap;
	48	}
	49	*/

+37

-0

libs/phylogeny/splitMap.h less more

	0	// $Id: splitMap.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___SPLITMAP
	3	#define ___SPLITMAP
	4
	5	#include "definitions.h"
	6	#include "split.h"
	7	#include <map>
	8	using namespace std;
	9
	10	// splitMap is a map of split to integers used for counting the occurences of each split.
	11	// Questions we want the class to be able to answer:
	12	// 1. What is the occurence a specific split.
	13	// 2. what is the most common split
	14	// 3. Sort the splits according to their frequency.
	15
	16	class splitMap {
	17	// public:
	18	// typedef pair<int,const split> rMapPair_t;
	19	// typedef multimap<const int,const split> reverse_sMap_t;
	20	// typedef multimap<int,split> reverse_sMap_t;
	21	// reverse_sMap_t reverse() const ;
	22	public:
	23	explicit splitMap(){}; // empty constractor
	24	int add(const split & in); // return the new frequency.
	25	int counts(const split& in) const; // counts the number of occurances
	26	void print(ostream& sout = cout) const;
	27	vector<pair<split,int> > sortSplits() const;
	28	private:
	29
	30	typedef map<split,int> mapSplitInt;
	31	mapSplitInt _map;
	32	};
	33
	34	ostream& operator<< (ostream &sout, const splitMap& split_map);
	35	#endif
	36

+109

-0

libs/phylogeny/splitTreeUtil.cpp less more

	0	// $Id: splitTreeUtil.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "splitTreeUtil.h"
	3	#include "someUtil.h"
	4
	5	static int idFromName(const string name, const map<string, int> & nameIdMap)
	6	{
	7	map<string, int>::const_iterator i=nameIdMap.find(name);
	8	if (i==nameIdMap.end()) errorMsg::reportError(" error in splitTreeUtil. Name not found in nameIdMap");
	9	return (i->second);
	10	}
	11
	12	// returns true if all the sons of myNode are in the split.
	13	// return false if all the sons of myNode are NOT in the split
	14	// if some of the sons are in and some are not - set foundTheNodeAlready to true.
	15	// and set splitNode to be that node.
	16	static bool findNodeToSplitRecursive( const tree::nodeP myNode,
	17	const split& mySplit,
	18	tree::nodeP& splitNode,
	19	bool & foundTheNodeAlready,
	20	const map<string, int> & nameIdMap) {
	21	if (myNode->isLeaf()) return (mySplit.isMember(idFromName(myNode->name(),nameIdMap)));
	22	bool inSplit = findNodeToSplitRecursive(myNode->getSon(0),mySplit,splitNode,foundTheNodeAlready,nameIdMap);
	23	if (foundTheNodeAlready) return true;
	24	for (int i=1; i < myNode->getNumberOfSons(); ++i) {
	25	bool tmp = findNodeToSplitRecursive(myNode->getSon(i),mySplit,splitNode,foundTheNodeAlready,nameIdMap);
	26	if (foundTheNodeAlready) return true;
	27	if (tmp != inSplit) {
	28	foundTheNodeAlready = true;
	29	splitNode = myNode;
	30	return true;
	31	}
	32	}
	33	return inSplit;
	34	}
	35
	36
	37
	38	tree::nodeP findNodeToSplit(const tree& et,
	39	const split& mySplit,
	40	const map<string, int> & nameIdMap) {
	41	tree::nodeP res;
	42	bool foundTheNodeAlready = false;
	43	findNodeToSplitRecursive(et.getRoot(),mySplit,res,foundTheNodeAlready,nameIdMap);
	44	return res;
	45	}
	46
	47	void applySplit(tree& et,
	48	const split& mySplit,
	49	const map<string, int> & nameIdMap) {
	50	tree::nodeP node2split = findNodeToSplit(et,mySplit,nameIdMap);
	51	et.rootAt(node2split);
	52	applySplitToRoot(et,mySplit,nameIdMap);
	53	}
	54
	55	void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector<tree::nodeP> & son2split)
	56	{
	57	for (int k=0; k < son2split.size(); ++k) {
	58	if (son2split[k]->father() != fatherNode )
	59	errorMsg::reportError(" error in function bootstrap::splitSonsFromNode - nodes don't have the same father");
	60	}
	61	// if the split allready exists, we do not need to do anything.
	62	if (son2split.size()==fatherNode->getNumberOfSons() // the branch above us is the required split
	63	\|\| son2split.size() <=1 // the branch below us is it
	64	\|\| (fatherNode->father()==NULL && son2split.size()==fatherNode->getNumberOfSons()-1)
	65	// the branch above us is the required split
	66	)
	67	return;
	68
	69	tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum());
	70	theNewNode->setName("N"+int2string(theNewNode->id()));
	71	for (int i=0; i < son2split.size(); ++i) {
	72	son2split[i]->setFather(theNewNode);
	73	theNewNode->setSon(son2split[i]);
	74	// remove from son list of father node.
	75	fatherNode->removeSon(son2split[i]);
	76	}
	77	}
	78
	79	void applySplitToRoot(tree& et,
	80	const split& mySplit,
	81	const map<string, int> & nameIdMap) {
	82	vector<tree::nodeP> sonsThatHaveToBeSplit = findSonsThatHaveToBeSplit(et,mySplit,nameIdMap);
	83	splitSonsFromNode(et, et.getRoot(), sonsThatHaveToBeSplit);
	84	}
	85
	86	vector<tree::nodeP> findSonsThatHaveToBeSplit(const tree& et,
	87	const split& mySplit,
	88	const map<string, int> & nameIdMap){
	89	// we assume that split is compatible with the tree and that the split is a subset of the children of the root.
	90	// i.e., the node that has to be splitted is the root.
	91	vector<tree::nodeP> res;
	92	for (int i=0; i < et.getRoot()->getNumberOfSons(); ++i) {
	93	if (childIsInTheSplit(et.getRoot()->getSon(i),mySplit,nameIdMap)) {
	94	res.push_back(et.getRoot()->getSon(i));
	95	}
	96	}
	97	return res;
	98	}
	99
	100	bool childIsInTheSplit(const tree::nodeP & myNode,
	101	const split& mySplit,
	102	const map<string, int> & nameIdMap) {
	103	if (myNode->isInternal()) return childIsInTheSplit(myNode->getSon(0),mySplit,nameIdMap);
	104	else {// we are in a leaf
	105	return (mySplit.isMember(idFromName(myNode->name(),nameIdMap)));
	106	}
	107	}
	108

+25

-0

libs/phylogeny/splitTreeUtil.h less more

	0	// $Id: splitTreeUtil.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___SPLIT_TREE_UTIL
	3	#define ___SPLIT_TREE_UTIL
	4	#include "tree.h"
	5	#include "split.h"
	6
	7	#include <vector>
	8	#include <map>
	9	using namespace std;
	10
	11
	12	tree::nodeP findNodeToSplit(const tree& et,const split& mySplit,const map<string, int> & nameIdMap);
	13	void applySplit(tree& et, const split& mySplit,const map<string, int> & nameIdMap);
	14	void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector<tree::nodeP> & son2split);
	15	void applySplitToRoot(tree& et, const split& mySplit,const map<string, int> & nameIdMap);
	16	vector<tree::nodeP> findSonsThatHaveToBeSplit(const tree& et,const split& mySplit,const map<string, int> & nameIdMap);
	17	bool childIsInTheSplit(const tree::nodeP & myNode, const split& mySplit,const map<string, int> & nameIdMap);
	18
	19
	20
	21	#endif
	22
	23
	24

+149

-0

libs/phylogeny/ssrvDistanceSeqs2Tree.cpp less more

	0	// $Id: ssrvDistanceSeqs2Tree.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "ssrvDistanceSeqs2Tree.h"
	3	//#include "bestAlphaAndNu.h"
	4	#include "bestParamUSSRV.h"
	5	#include "someUtil.h"
	6	#include <float.h>
	7
	8	tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble weights, const tree constraintTreePtr) {
	9	_constraintTreePtr=constraintTreePtr;
	10	_alpha = initAlpha;
	11	_newNu = _nu = initNu;
	12	_weights = weights;
	13	return seqs2TreeIterativeInternal(sc, true);
	14	}
	15
	16	tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	17	_constraintTreePtr=constraintTreePtr;
	18	_weights = weights;
	19	return seqs2TreeIterativeInternal(sc, false);
	20	}
	21
	22	tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights, const tree constraintTreePtr) {
	23	_constraintTreePtr=constraintTreePtr;
	24	_weights = weights;
	25	return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
	26	}
	27
	28	tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights, const tree constraintTreePtr) {
	29	_alpha = initAlpha;
	30	_weights = weights;
	31
	32	_constraintTreePtr=constraintTreePtr;
	33	return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
	34	}
	35
	36	tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble weights, const tree constraintTreePtr) {
	37	_alpha = initAlpha;
	38	_newNu = _nu = initNu;
	39	_weights = weights;
	40
	41	_constraintTreePtr=constraintTreePtr;
	42	return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
	43	}
	44
	45	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	46	tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, MDOUBLE nu, const Vdouble weights, const tree constraintTreePtr) {
	47	_weights = weights;
	48	_alpha = alpha;
	49	_newNu = _nu = nu;
	50	_constraintTreePtr=constraintTreePtr;
	51	seqs2TreeOneIterationInternal(sc, true);
	52	return _newTree;
	53	}
	54
	55	tree ssrvDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, MDOUBLE nu, const Vdouble weights, const tree constraintTreePtr) {
	56	_weights = weights;
	57	_alpha = alpha;
	58	_newNu = _nu = nu;
	59	return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
	60	}
	61
	62	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	63	tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble weights, const tree constraintTreePtr) {
	64	return seqs2TreeIterative(sc,weights,constraintTreePtr);
	65	}
	66
	67	MDOUBLE ssrvDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
	68	{
	69	if (!dynamic_cast<tamura92*>(
	70	static_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel())
	71	->getBaseRM()
	72	)
	73	) {
	74	bestParamSSRV optimizer(true,true,false,true); // optimize alpha, nu, NOT tamura92 params, and bbl
	75	optimizer(et,sc,static_cast<stochasticProcessSSRV>(_spPtr),_weights,
	76	15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
	77	_epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
	78	_newAlpha=optimizer.getBestAlpha();
	79	_newNu=optimizer.getBestNu();
	80	return(optimizer.getBestL());
	81	} else {
	82	bestParamSSRV optimizer(true,true,true,true); // optimize alpha, nu, tamura92 params, and bbl
	83	optimizer(et,sc,static_cast<stochasticProcessSSRV>(_spPtr),_weights,
	84	15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
	85	_epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
	86	_newAlpha=optimizer.getBestAlpha();
	87	_newNu=optimizer.getBestNu();
	88	return(optimizer.getBestL());
	89	}
	90	}
	91
	92	MDOUBLE ssrvDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
	93	{
	94	_newAlpha = alpha;
	95	(static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
	96
	97	// optimize only nu (and tamura92 params, if relevant)
	98	if (!dynamic_cast<tamura92*>(
	99	static_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel())
	100	->getBaseRM()
	101	)
	102	) {
	103	bestParamSSRV optimizer(false,true,false,false);
	104	optimizer(et,sc,(static_cast<stochasticProcessSSRV>(_spPtr)),_weights,
	105	15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
	106	_epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
	107	_newNu=optimizer.getBestNu();
	108	return(optimizer.getBestL());
	109	} else {
	110	bestParamSSRV optimizer(false,true,true,false);
	111	optimizer(et,sc,(static_cast<stochasticProcessSSRV>(_spPtr)),_weights,
	112	15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
	113	_epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
	114	_newNu=optimizer.getBestNu();
	115	return(optimizer.getBestL());
	116	}
	117	}
	118
	119	void ssrvDistanceSeqs2Tree::acceptSideInfo()
	120	{
	121	_alpha = _newAlpha;
	122	_nu = _newNu;
	123	}
	124
	125	void ssrvDistanceSeqs2Tree::utilizeSideInfo()
	126	{
	127	// set new alpha value in the sp that is used in _distM
	128	LOG(10,<<"# utilizing alpha "<<_alpha<<" and nu "<<_nu<<endl);
	129	(static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
	130	(static_cast<stochasticProcessSSRV*>(_spPtr))->setRateOfRate(_nu);
	131	}
	132
	133	void ssrvDistanceSeqs2Tree::printSideInfo(ostream& out) const
	134	{
	135	out<<"Alpha: "<< _alpha <<" Nu: "<< _nu <<endl;
	136	}
	137
	138	// non virtual
	139	void ssrvDistanceSeqs2Tree::setSideInfo(const MDOUBLE alpha, MDOUBLE nu)
	140	{
	141	_alpha = alpha;
	142	_nu = nu;
	143	}
	144
	145	ssrvDistanceSeqs2Tree::alphaAndNu ssrvDistanceSeqs2Tree::getSideInfo() const
	146	{
	147	return alphaAndNu(_alpha, _nu);
	148	}

+63

-0

libs/phylogeny/ssrvDistanceSeqs2Tree.h less more

	0	// $Id: ssrvDistanceSeqs2Tree.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___SSRV_DISTANCE_SEQS2TREE
	3	#define ___SSRV_DISTANCE_SEQS2TREE
	4
	5	#include "distanceBasedSeqs2Tree.h"
	6	#include "tree.h"
	7
	8	/* class ssrvDistanceSeqs2Tree
	9	A type of distance-based tree reconstruction method like the iterative
	10	method commonAlphaDistanceSeqs2Tree, but using a model with SSRV
	11	(Site-Specific Rate Variation, AKA covarion model). Compared to
	12	commonAlphaDistanceSeqs2Tree, we change the distance method to use an
	13	SSRV model, and in the optimizations we estimate ni in addition to
	14	alpha.
	15	*/
	16	class ssrvDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
	17	public:
	18	// Given likeDist is assumed to hold a gamma-distribution, SSRV stochasticProcess
	19	ssrvDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
	20	const MDOUBLE epsilonLikelihoodImprovement = 0.001,
	21	const MDOUBLE epsilonLikelihoodImprovement4paramOptimiz = 0.001,
	22	const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
	23	const int maxIterationsBBL = 50)
	24	: iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4paramOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
	25	virtual ~ssrvDistanceSeqs2Tree () {}
	26
	27	// Datastruct for handling side info for the SSRV model (used as return value)
	28	struct alphaAndNu {
	29	MDOUBLE alpha;
	30	MDOUBLE nu;
	31	alphaAndNu(){}
	32	alphaAndNu(MDOUBLE setAlpha, MDOUBLE setNu) : alpha(setAlpha), nu(setNu) {}
	33	};
	34
	35	// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
	36	virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	37	// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
	38	tree seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, MDOUBLE nu, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	39	// Does one bootstrap iteration
	40	tree seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, MDOUBLE nu, const Vdouble weights, const tree constraintTreePtr=NULL);
	41	// Explicitly ask for iterations
	42	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble weights=NULL, const tree constraintTreePtr=NULL); // homogenous rates will be used for first iteration
	43	tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	44	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	45	virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	46	tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble weights=NULL, const tree constraintTreePtr=NULL);
	47
	48	// handling side info
	49	virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
	50	virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
	51	virtual void acceptSideInfo();
	52	virtual void utilizeSideInfo();
	53	virtual void printSideInfo(ostream& out) const;
	54	void setSideInfo(const MDOUBLE alpha, MDOUBLE nu);
	55	alphaAndNu getSideInfo() const;
	56
	57	protected:
	58	MDOUBLE _nu;
	59	MDOUBLE _newNu;
	60	};
	61
	62	#endif

+57

-0

libs/phylogeny/stochasticProcess.cpp less more

	0	// $Id: stochasticProcess.cpp 4660 2008-08-12 14:31:38Z cohenofi $
	1
	2	#include "stochasticProcess.h"
	3	#include "errorMsg.h"
	4
	5	stochasticProcess& stochasticProcess::operator=(const stochasticProcess &otherStoc) {
	6	if (this != &otherStoc) { // Check for self-assignment
	7	if (_pijAccelerator) delete _pijAccelerator;
	8	if (otherStoc._pijAccelerator)
	9	{
	10	pijAccelerator* p2 = otherStoc._pijAccelerator->clone(); // Create the new one FIRST...
	11	_pijAccelerator = p2;
	12	}
	13	else
	14	_pijAccelerator = NULL;
	15
	16	if (_distr) delete _distr;
	17	if (otherStoc._distr)
	18	{
	19	distribution* d2 = otherStoc._distr->clone();
	20	_distr = d2;
	21	}
	22	else{
	23	_distr = NULL;
	24	_isReversible = otherStoc.isReversible();
	25	}
	26	}
	27	// if (_distr) delete _distr;
	28	// _distr = new distribution(*otherStoc._distr);
	29	return *this;
	30	}
	31
	32
	33	stochasticProcess::stochasticProcess(const distribution in_distr,const pijAccelerator pijAccelerator, bool isReversible) :
	34	_distr(in_distr->clone()), _pijAccelerator(pijAccelerator->clone()), _isReversible(isReversible){
	35
	36	}
	37
	38	stochasticProcess::stochasticProcess(const stochasticProcess& other):
	39	_distr(NULL), _pijAccelerator(NULL){
	40	if (other._pijAccelerator != NULL) _pijAccelerator = other._pijAccelerator->clone();
	41	if (other._distr != NULL) _distr = other._distr->clone();
	42	_isReversible = other.isReversible();
	43	}
	44
	45	stochasticProcess::~stochasticProcess() {
	46	delete _distr;
	47	delete _pijAccelerator;
	48	}
	49
	50
	51	void stochasticProcess::setDistribution(const distribution* in_distr)
	52	{
	53	if (_distr) delete _distr;
	54	if (in_distr == NULL) _distr = NULL;
	55	else _distr = in_distr->clone();
	56	}

+58

-0

libs/phylogeny/stochasticProcess.h less more

	0	// $Id: stochasticProcess.h 2511 2007-11-04 12:08:50Z cohenofi $
	1
	2	#ifndef ___STOCHASTIC_PROCESS
	3	#define ___STOCHASTIC_PROCESS
	4
	5	#include "pijAccelerator.h"
	6	#include "distribution.h"
	7	#include <cassert>
	8
	9	class stochasticProcess{
	10	public:
	11	explicit stochasticProcess(const distribution in_distr,const pijAccelerator pijAccelerator, bool isReversible = true);
	12	explicit stochasticProcess() {
	13	_distr=NULL; _pijAccelerator=NULL; _isReversible=true;
	14	}
	15	stochasticProcess(const stochasticProcess& other);
	16	virtual stochasticProcess* clone() const {return new stochasticProcess(*this);}
	17
	18	const int alphabetSize() const {return _pijAccelerator->alphabetSize();} // The alphabet size is the same as the matrix Pij size
	19
	20	virtual const int categories() const {return _distr->categories();}
	21	virtual const MDOUBLE rates(const int i) const {return _distr->rates(i);}
	22	virtual const MDOUBLE ratesProb(const int i) const {return _distr->ratesProb(i);}
	23
	24
	25	virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const {
	26	if (t!=0) return _pijAccelerator->Pij_t(i,j,t);
	27	return (i==j)? 1 : 0;
	28	}
	29
	30	const MDOUBLE freq(const int i) const {assert(i>=0);return _pijAccelerator->freq(i);} // P(i)
	31	const MDOUBLE dPij_dt(const int i,const int j,const MDOUBLE t) const { return _pijAccelerator->dPij_dt(i,j,t);}
	32	const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const { return _pijAccelerator->d2Pij_dt2(i,j,t);}
	33
	34
	35	virtual distribution* distr() const {return _distr;} // @@@@ this const is a lie !!!
	36	virtual const pijAccelerator* getPijAccelerator() const {return _pijAccelerator;}
	37	virtual void setDistribution(const distribution* in_distr);
	38
	39	stochasticProcess& operator=(const stochasticProcess &otherStoc);
	40	virtual ~stochasticProcess();
	41	virtual void setGlobalRate(const MDOUBLE x) {_distr->setGlobalRate(x);}
	42	virtual MDOUBLE getGlobalRate() const {return _distr->getGlobalRate();}
	43	const bool isReversible() const {return _isReversible;}
	44
	45
	46	protected:
	47	distribution *_distr;
	48	pijAccelerator *_pijAccelerator;
	49	bool _isReversible;
	50	};
	51
	52
	53
	54	#endif
	55
	56
	57	// Stochastic process is composed of two objects: a distribution of rates and a Pij accelerator.

+19

-0

libs/phylogeny/stochasticProcessSSRV.cpp less more

	0	// $Id: stochasticProcessSSRV.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "stochasticProcessSSRV.h"
	3	#include "replacementModelSSRV.h"
	4
	5	// it's important to call static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->updateQ(), after changing
	6	// this returned pointer. (when changing alpha)
	7	distribution* stochasticProcessSSRV::distr() const
	8	{
	9	return ( static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->getDistribution() );
	10	}
	11
	12
	13	void stochasticProcessSSRV::setDistribution(const distribution* in_distr)
	14	{
	15	static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->setDistribution(in_distr);
	16	}
	17
	18

+48

-0

libs/phylogeny/stochasticProcessSSRV.h less more

	0	// $Id: stochasticProcessSSRV.h 1923 2007-04-04 16:38:14Z privmane $
	1
	2
	3	#ifndef ___STOCHASTIC_PROCESS_SSRV
	4	#define ___STOCHASTIC_PROCESS_SSRV
	5
	6	#include "stochasticProcess.h"
	7	#include "replacementModelSSRV.h"
	8
	9	// This is a Stochastic process that its distribution is located inside its accelerator.
	10	// _dist should be NULL all the time.
	11	// The number of categories is always 1.
	12	// _pijAccelerator must contain a replacementModelSSRV* as a member.
	13	// The distribution is located inside the replacement model which is a member of _pijAccelerator.
	14
	15	class stochasticProcessSSRV : public stochasticProcess{
	16	public:
	17	explicit stochasticProcessSSRV(const pijAccelerator *pijAccelerator) :
	18	stochasticProcess() { _pijAccelerator = pijAccelerator->clone();}
	19	explicit stochasticProcessSSRV() : stochasticProcess() {}
	20	stochasticProcessSSRV(const stochasticProcessSSRV& other) : stochasticProcess(other) {}
	21	stochasticProcessSSRV& operator=(const stochasticProcessSSRV &other) {stochasticProcess::operator=(other); return (*this);}
	22	virtual stochasticProcess* clone() const {return new stochasticProcessSSRV(*this);}
	23
	24	virtual ~stochasticProcessSSRV() {}
	25
	26	virtual const int categories() const { return 1; }
	27	virtual const MDOUBLE rates(const int i) const {return 1.0;}
	28	virtual const MDOUBLE ratesProb(const int i) const {return 1.0;}
	29
	30	virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const {
	31	// as opposed to normal stochastic-process. even when t=0 and i!=j the result might be > 0
	32	return _pijAccelerator->Pij_t(i,j,t);
	33	}
	34
	35	virtual distribution* distr() const; // @@@@ this const is a lie !!!
	36	virtual void setDistribution(const distribution* in_distr);
	37
	38	virtual void setGlobalRate(const MDOUBLE x) {distr()->setGlobalRate(x);} // @@@@ should this also call updateQ of the RM ??? Doesn't really metter when using gamma distribution
	39	virtual MDOUBLE getGlobalRate() const {return distr()->getGlobalRate();}
	40
	41	void setRateOfRate(MDOUBLE rateOfRate) {
	42	static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())
	43	->setRateOfRate(rateOfRate);
	44	}
	45	};
	46
	47	#endif

+6

-0

libs/phylogeny/suffStatComponent.cpp less more

	0	// $Id: suffStatComponent.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "suffStatComponent.h"
	3
	4
	5

+250

-0

libs/phylogeny/suffStatComponent.h less more

	0	// $Id: suffStatComponent.h 9253 2011-01-31 01:37:21Z rubi $
	1
	2	#ifndef ___SUFF_STAT_COMPONENT
	3	#define ___SUFF_STAT_COMPONENT
	4
	5	#include "definitions.h"
	6	#include <vector>
	7	using namespace std;
	8
	9	// spec = for a specific node. global = for all the nodes
	10	// hom = no rate variation. gam = with rate variation. gamProportional = with gobal and local rate variation
	11	// pos = for one position
	12	//-------------------------------------------------------------
	13	class suffStatSpecHomPos{ // this is for a specific node.
	14	public:
	15	void set(const int letter,const doubleRep& val) {
	16	_V[letter]=val;
	17	}
	18
	19	doubleRep get(const int letter) const {
	20	doubleRep tmp=_V[letter];
	21	// cout << "tmp =";
	22	// tmp.outputn(cout);
	23
	24	return tmp;
	25	}
	26
	27	void allocatePlace(const int alphabetSize) {
	28	_V.resize(alphabetSize);
	29	}
	30	bool isEmpty (){return (_V.empty());};
	31	int size() const {return _V.size();}
	32
	33	private:
	34	vector<doubleRep> _V;//size = letter
	35	};
	36	//-------------------------------------------------------------
	37	/*
	38	class suffStatSpecGamPos{// this is for a specific node with rates
	39	public:
	40	void set(const int rateCategor,
	41	const int letter,const MDOUBLE val) {
	42	_V[rateCategor].set(letter,val);
	43	}
	44
	45	MDOUBLE get(const int rateCategor,
	46	const int letter) const {
	47	return _V[rateCategor].get(letter);
	48	}
	49	void allocatePlace(const int numberOfrateCategories,const int alphabetSize) {
	50	_V.resize(numberOfrateCategories);
	51	for (int i=0; i < numberOfrateCategories; ++i) {
	52	_V[i].allocatePlace(alphabetSize);
	53	}
	54	}
	55	bool isEmpty (){return (_V.empty());};
	56	private:
	57	vector<suffStatSpecHomPos> _V;//rateCategor,letter
	58	};
	59	*/
	60	//-------------------------------------------------------------
	61	/*
	62	class suffStatSpecGam{// this is for a specific node with rates
	63	public:
	64	void set(const int pos,const int rateCategor,
	65	const int letter,const MDOUBLE val) {
	66	_V[pos].set(rateCategor,letter,val);
	67	}
	68
	69	MDOUBLE get(const int pos,const int rateCategor,
	70	const int letter) const {
	71	return _V[pos].get(rateCategor,letter);
	72	}
	73
	74	void allocatePlace(const int pos,const int numberOfrateCategories,const int alphabetSize) {
	75	_V.resize(pos);
	76	for (int i=0;i<pos;++i) _V[i].allocatePlace(numberOfrateCategories,alphabetSize);
	77	}
	78	bool isEmpty (){return (_V.empty());};
	79	suffStatSpecGamPos& operator[] (int index) {return _V[index];}
	80	const suffStatSpecGamPos& operator[] (int index) const {return _V[index];}
	81	private:
	82	vector<suffStatSpecGamPos> _V;//pos,rateCategor,letter
	83	};
	84	*/
	85	//-------------------------------------------------------------
	86	/*
	87	class suffStatGlobalGam {
	88	public:
	89	MDOUBLE get(const int nodeId, const int pos,const int rateCategor,
	90	const int letter) const {
	91	return _V[nodeId].get(pos,rateCategor,letter);
	92	}
	93	void allocatePlace(const int numOfNodes,
	94	const int pos,
	95	const int numberOfrateCategories,
	96	const int alphabetSize) {
	97	_V.resize(numOfNodes);
	98	for (int i=0;i<numOfNodes;++i) _V[i].allocatePlace(pos,numberOfrateCategories,alphabetSize);
	99	}
	100	int size() const {return _V.size();}
	101	suffStatSpecGam& operator[] (int index) {return _V[index];}
	102	const suffStatSpecGam& operator[] (int index) const {return _V[index];}
	103
	104	private:
	105	vector<suffStatSpecGam> _V;
	106	};
	107	*/
	108	//-------------------------------------------------------------
	109	class suffStatGlobalHomPos{ // this is for all nodes
	110	public:
	111	void set(const int nodeId,const int letter,const doubleRep val) {
	112	_V[nodeId].set(letter,val);
	113	}
	114
	115	doubleRep get(const int nodeId,const int letter) const {
	116	doubleRep tmp(_V[nodeId].get(letter));
	117	// tmp;
	118
	119	// cout << "tmp2=";
	120	// tmp.outputn(cout);
	121	return tmp;
	122	}
	123
	124	void allocatePlace(const int numOnNodes,const int alphabetSize) {
	125	_V.resize(numOnNodes);
	126	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);}
	127	}
	128	bool isEmpty (){return (_V.empty());};
	129	int size() const {return _V.size();}
	130	private:
	131	vector<suffStatSpecHomPos> _V;//size = number of nodes.
	132	};
	133	//-------------------------------------------------------------
	134	class suffStatGlobalGamPos{ // this is for all nodes
	135	public:
	136	void set(const int categor,const int nodeId,const int letter,const doubleRep val) {
	137	_V[categor].set(nodeId,letter,val);
	138	}
	139
	140	doubleRep get(const int categor,const int nodeId,const int letter) const {
	141	return _V[categor].get(nodeId,letter);
	142	}
	143
	144	void allocatePlace(const int categor,const int numOnNodes,const int alphabetSize) {
	145	_V.resize(categor);
	146	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes,alphabetSize);}
	147	}
	148	bool isEmpty (){return (_V.empty());}
	149	int size() const {return _V.size();}
	150
	151	suffStatGlobalHomPos& operator[] (int index) {return _V[index];}
	152	const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];}
	153	private:
	154	vector<suffStatGlobalHomPos> _V;//size = number of categories
	155	};
	156	//-------------------------------------------------------------
	157	class suffStatGlobalGamProportionalPos{ // this is for all nodes
	158	public:
	159	void set(const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter,const doubleRep val) {
	160	_V[globalRateCategor].set(localRateCategor,nodeId,letter,val);
	161	}
	162
	163	doubleRep get(const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter) const {
	164	return _V[globalRateCategor].get(localRateCategor,nodeId,letter);
	165	}
	166
	167	void allocatePlace(const int globalRateCategor,const int localRateCategor,const int numOnNodes,const int alphabetSize) {
	168	_V.resize(globalRateCategor);
	169	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(localRateCategor,numOnNodes,alphabetSize);}
	170	}
	171	bool isEmpty (){return (_V.empty());}
	172	int size() const {return _V.size();}
	173
	174	suffStatGlobalGamPos& operator[] (int index) {return _V[index];}
	175	const suffStatGlobalGamPos& operator[] (int index) const {return _V[index];}
	176	private:
	177	vector<suffStatGlobalGamPos> _V;//size = number of global rate categories
	178	};
	179	//-------------------------------------------------------------
	180	class suffStatGlobalGam{ // this is for all positions (and for all nodes).
	181	public:
	182	void set(const int pos,const int categor,const int nodeId,const int letter,const doubleRep val) {
	183	_V[pos].set(categor,nodeId,letter,val);
	184	}
	185
	186	doubleRep get(const int pos,const int categor,const int nodeId,const int letter) const {
	187	return _V[pos].get(categor,nodeId,letter);
	188	}
	189
	190	void allocatePlace(const int pos,const int categor,const int numOnNodes,const int alphabetSize) {
	191	_V.resize(pos);
	192	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(categor,numOnNodes,alphabetSize);}
	193	}
	194	bool isEmpty (){return (_V.empty());}
	195	int size() const {return _V.size();}
	196	suffStatGlobalGamPos& operator[] (int index) {return _V[index];}
	197	const suffStatGlobalGamPos& operator[] (int index) const {return _V[index];}
	198	private:
	199	vector<suffStatGlobalGamPos> _V;
	200	};
	201
	202	//-------------------------------------------------------------
	203	class suffStatGlobalGamProportional{ // this is for all positions (and for all nodes).
	204	public:
	205	void set(const int pos,const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter,const doubleRep val) {
	206	_V[pos].set(globalRateCategor,localRateCategor,nodeId,letter,val);
	207	}
	208
	209	doubleRep get(const int pos,const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter) const {
	210	return _V[pos].get(globalRateCategor,localRateCategor,nodeId,letter);
	211	}
	212
	213	void allocatePlace(const int pos,const int globalRateCategor,const int localRateCategor,const int numOnNodes,const int alphabetSize) {
	214	_V.resize(pos);
	215	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(globalRateCategor,localRateCategor,numOnNodes,alphabetSize);}
	216	}
	217	bool isEmpty (){return (_V.empty());}
	218	int size() const {return _V.size();}
	219	suffStatGlobalGamProportionalPos& operator[] (int index) {return _V[index];}
	220	const suffStatGlobalGamProportionalPos& operator[] (int index) const {return _V[index];}
	221	private:
	222	vector<suffStatGlobalGamProportionalPos> _V;
	223	};
	224
	225	// from ItayM not to use with the EM algorithm.
	226	class suffStatGlobalHom{ // this is for all positions (and for all nodes).
	227	public:
	228	void set(const int pos, const int nodeId, const int letter,const doubleRep val) {
	229	_V[pos].set(nodeId, letter, val);
	230	}
	231
	232	doubleRep get(const int pos, const int nodeId, const int letter) const {
	233	return _V[pos].get(nodeId, letter);
	234	}
	235
	236	void allocatePlace(const int pos, const int numOnNodes, const int alphabetSize) {
	237	_V.resize(pos);
	238	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes, alphabetSize);}
	239	}
	240	bool isEmpty (){return (_V.empty());};
	241	suffStatGlobalHomPos& operator[] (int index) {return _V[index];}
	242	const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];}
	243	private:
	244	vector<suffStatGlobalHomPos> _V;
	245	};
	246
	247
	248	#endif
	249

+236

-0

libs/phylogeny/suffStatGammaMixture.cpp less more

	0	#include "suffStatGammaMixture.h"
	1	#include "mixtureDistribution.h"
	2	#include "computePijComponent.h"
	3	#include "likelihoodComputation.h"
	4	#include "gammaUtilities.h"
	5	#include "uniDistribution.h"
	6
	7
	8	#include <cmath>
	9	#include <fstream>
	10	using namespace likelihoodComputation;
	11
	12
	13	suffStatGammaMixture::suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree)
	14	{
	15	_pSp = &cur_sp;
	16	_pSc = &sc;
	17	_pTree = &inTree;
	18	}
	19
	20	suffStatGammaMixture::~suffStatGammaMixture()
	21	{
	22	}
	23
	24
	25	void suffStatGammaMixture::allocatePlaceForSuffStat() {
	26	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	27	int componentNum = pMixture->getComponentsNum();
	28	_MkVec.clear();
	29	_MkVec.resize(componentNum, 0);
	30	_AkVec.clear();
	31	_AkVec.resize(componentNum, 0);
	32	_BkVec.clear();
	33	_BkVec.resize(componentNum, 0);
	34	}
	35
	36	void suffStatGammaMixture::computePijForEachComponent(vector<computePijGam>& cpgVec,
	37	vector<stochasticProcess>& spVec) {
	38	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	39	int componentNum = pMixture->getComponentsNum();
	40	for (int comp = 0; comp < componentNum; ++comp) {
	41	//create a local sp so to compute likelihoods of this component only
	42	stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
	43	cpgVec[comp].fillPij(*_pTree, compSp);
	44	spVec.push_back(compSp);
	45	}
	46	}
	47
	48	void suffStatGammaMixture::computeStatistics()
	49	{
	50	///////////////as in getTreeLikelihoodAllPosAlphTheSame
	51	//computePijGam pi;
	52	//pi.fillPij(_pTree, _pSp);
	53	//MDOUBLE res =0;
	54	//doubleRep LofPos;
	55	//int k;
	56	//for (k=0; k < _pSc->seqLen(); ++k)
	57	//{
	58	// doubleRep tmp=0;
	59	// for (int i=0; i < _pSp->categories();++i)
	60	// {
	61	// tmp += getLofPos(k, _pTree, _pSc, pi[i], _pSp) _pSp->ratesProb(i);
	62	// }
	63	// LofPos = tmp;
	64	// res += log(LofPos);
	65	//}
	66	//////////////////////////////////////////////
	67
	68	//mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	69	//int componentNum = pMixture->getComponentsNum();
	70	//MDOUBLE res2 = 0.0;
	71	//vector<computePijGam> cpgVec(componentNum);
	72	//vector<stochasticProcess> spVec;
	73	//
	74	//for (int comp = 0; comp < componentNum; ++comp) {
	75	// //create a local sp so to compute likelihoods of this component only
	76	// stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
	77	// cpgVec[comp].fillPij(*_pTree, compSp);
	78	// spVec.push_back(compSp);
	79	//}
	80	//
	81	//for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	82	//{
	83	// int comp;
	84	// for (comp = 0; comp < componentNum; ++comp)
	85	// {
	86	// const generalGammaDistribution* pDist = pMixture->getComponent(comp);
	87	// for (int cat=0; cat < pDist->categories(); ++cat)
	88	// {
	89	// MDOUBLE tmp = pDist->ratesProb(cat) * getLofPos(pos, _pTree, _pSc, cpgVec[comp][cat], *_pSp);
	90	// res2 += log(tmp);
	91	// }
	92	// }
	93	//}
	94	//////////////////////////////////////////////
	95	allocatePlaceForSuffStat();
	96	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	97	int componentNum = pMixture->getComponentsNum();
	98
	99	//compute Pij for each component
	100	vector<computePijGam> cpgVec(componentNum);
	101	vector<stochasticProcess> spVec;
	102	computePijForEachComponent(cpgVec,spVec);
	103
	104
	105	//compute statistics: M_k, A_k, B_k
	106	//Here we sum over all positions.
	107	//go over all positions [pos] and compute for each component [k]: M_k(pos), E[R]_k(pos), E[logR]_k(pos)
	108	//Then compute A_k and B_k for that position.
	109	for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	110	{
	111	MDOUBLE sumAllComponents = 0.0;
	112	Vdouble MkPosVec(componentNum, 0.0); //the contribution of position pos to the M_K statistic
	113	Vdouble Exp_RkVec(componentNum, 0.0);
	114	Vdouble Exp_LogRkVec(componentNum, 0.0);
	115	int comp;
	116	for (comp = 0; comp < componentNum; ++comp)
	117	{
	118	// here we compute P(H[i]=k, data\| cur_mixtureDistribution)
	119	//P(H[i]=k, data\| teta) = P(H[i]=k)* (sum_over_all_categories{P(data\|r)P(r))
	120	///////////////////////////
	121	const generalGammaDistribution* pDist = pMixture->getComponent(comp);
	122	MDOUBLE Exp_Rk, Exp_LogRk, sum;
	123	Exp_Rk = Exp_LogRk = sum = 0.0;
	124	for (int cat=0; cat < pDist->categories(); ++cat)
	125	{
	126	MDOUBLE LofP = convert(likelihoodComputation::getLofPos(pos, _pTree, _pSc, cpgVec[comp][cat], spVec[comp]));
	127	MDOUBLE Pr = pDist->ratesProb(cat) * LofP;
	128	sum += Pr;
	129	Exp_RkVec[comp] += Pr * pDist->rates(cat);
	130	Exp_LogRkVec[comp] += Pr * log(pDist->rates(cat));
	131	}
	132	MkPosVec[comp] = sum;
	133	sumAllComponents += MkPosVec[comp] * pMixture->getComponentProb(comp);;
	134	}
	135
	136	for (comp = 0; comp < componentNum; ++comp)
	137	{
	138	MDOUBLE factor = pMixture->getComponentProb(comp)/ sumAllComponents;
	139	_MkVec[comp] += factor* MkPosVec[comp] ;
	140	_AkVec[comp] += factor * Exp_RkVec[comp];
	141	_BkVec[comp] += factor * Exp_LogRkVec[comp];
	142	}
	143	}// end of loop over positions
	144	spVec.clear();
	145	cpgVec.clear();
	146	}
	147
	148
	149	#include "uniformDistribution.h"
	150	void suffStatGammaMixture::plotStatistics(ofstream& outFile)
	151	{
	152	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	153	if (pMixture->getComponentsNum() != 1)
	154	errorMsg::reportError("Sorry, I plot only 1 component");
	155
	156	outFile <<"R"<<"\t"<<"Postr"<<"\t"<<"Er"<<"\t"<<"Elog_r"<<endl;
	157	const generalGammaDistribution* pDist = pMixture->getComponent(0);
	158	int numCat = 200, maxR = 10;
	159	uniformDistribution uniDist(numCat, 0, maxR);
	160	/////////calc the prior of each interval
	161	Vdouble priorProbs(uniDist.categories());
	162	MDOUBLE upperP, lowerP = 0;
	163	for (int i = 0; i<uniDist.categories();++i)
	164	{
	165	upperP = pDist->getCumulativeProb(uniDist.getBorder(i+1));
	166	priorProbs[i] = upperP - lowerP;
	167	lowerP = upperP;
	168	}
	169
	170	distribution * pUni = new uniDistribution;
	171
	172	stochasticProcess uniSp(pUni, _pSp->getPijAccelerator());
	173	//loop over all r
	174	for (int ri=0; ri < uniDist.categories(); ++ri)
	175	{
	176	MDOUBLE Exp_R = 0.0;
	177	MDOUBLE Exp_LogR = 0.0;
	178	MDOUBLE PosteriorR = 0.0;
	179	MDOUBLE rate = uniDist.rates(ri);
	180	if (rate == 0.0)
	181	rate = 0.000001;
	182
	183	//Here we sum over all positions.
	184	//go over all positions [pos] and compute: PosrteriorR(=P(D\|r)*P(r)), E[R]_k(pos), E[logR]_k(pos)
	185	for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	186	{
	187	MDOUBLE PrPos = priorProbs[ri] * convert(likelihoodComputation::getLofPos(pos, _pTree, _pSc, uniSp, rate));
	188	PosteriorR += PrPos;
	189	Exp_R += PrPos * rate;
	190	Exp_LogR += PrPos * log(rate);
	191
	192	}
	193
	194	outFile <<rate<<"\t"<<PosteriorR<<"\t"<<Exp_R<<"\t"<<Exp_LogR<<endl;
	195	}
	196
	197	delete pUni;
	198	}
	199
	200
	201	MDOUBLE suffStatGammaMixture::computeQ2()
	202	{
	203	MDOUBLE res=0;
	204
	205	return res;
	206	}
	207
	208
	209
	210	MDOUBLE suffStatGammaMixture::computeQ()
	211	{
	212	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	213	MDOUBLE res = 0.0;
	214	//////////////////////////////////
	215	MDOUBLE res2 = 0.0;
	216	int compNum = pMixture->getComponentsNum();
	217	///////////////////////////////////
	218	for (int comp = 0;comp < compNum ; ++comp)
	219	{
	220	MDOUBLE P_k = pMixture->getComponentProb(comp);
	221	MDOUBLE alpha_k = pMixture->getAlpha(comp);
	222	MDOUBLE beta_k = pMixture->getBeta(comp);
	223	MDOUBLE first = _MkVec[comp] * log(P_k);
	224	MDOUBLE second = _MkVec[comp] * alpha_k*log(beta_k);
	225	MDOUBLE third = -_MkVec[comp] * gammln(alpha_k);
	226	MDOUBLE fourth = -_AkVec[comp]*beta_k;
	227	MDOUBLE fifth = _BkVec[comp]*(alpha_k-1.0);
	228	res += _MkVec[comp] * (log(P_k) + alpha_k*log(beta_k) - gammln(alpha_k))
	229	- (_AkVec[comp]*beta_k)
	230	+ _BkVec[comp]*(alpha_k-1);
	231	////////////////////////////////////
	232	}
	233	res2 = computeQ2();
	234	return res;
	235	}

+58

-0

libs/phylogeny/suffStatGammaMixture.h less more

	0	#ifndef ___SUFF_STAT_GAMMA_MIXTURE
	1	#define ___SUFF_STAT_GAMMA_MIXTURE
	2	/************************************************************
	3	The suffStatGammaMixture class is used to obtain the sufficient statistics
	4	that are neccessary for the EM algorithm to compute the mixture distribution parameters.
	5	The following notations are used below:
	6	P(h[i]=k): the probability that position i belongs to the Kth Gamma component.
	7	teta_t: the current mixture distribution parameters (the alpha, beta, and the probability of each component).
	8
	9	There are 3 sufficient statistics:
	10	M_k: the expected number of positions belong to the Kth component.
	11	sigma(i = 1 to seqLen){P(h[i] = k\|data, cur_mixtureDistribution)}
	12	A_k: sigma(i = 1 to seqLen){P(h[i] = k\|data, cur_mixtureDistribution) * E[r\|h[i] = k, data, cur_mixtureDistribution]}
	13	B_k: sigma(i = 1 to seqLen){P(h[i] = k\|data, cur_mixtureDistribution) * E[log(r)\|h[i] = k, data, cur_mixtureDistribution]}
	14	************************************************************/
	15	#include "definitions.h"
	16	#include "stochasticProcess.h"
	17	#include "sequenceContainer.h"
	18	#include "tree.h"
	19	#include "mixtureDistribution.h"
	20	#include "computePijComponent.h"
	21
	22	class suffStatGammaMixture{
	23
	24	public:
	25
	26	explicit suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree);
	27	virtual ~suffStatGammaMixture();
	28
	29	void computeStatistics();
	30
	31	void plotStatistics(ofstream & outF);
	32	MDOUBLE getMk(int comp) const {return _MkVec[comp];}
	33	MDOUBLE getAk(int comp) const {return _AkVec[comp];}
	34	MDOUBLE getBk(int comp) const {return _BkVec[comp];}
	35	MDOUBLE computeQ();
	36	MDOUBLE computeQ2();
	37
	38
	39	private:
	40	MDOUBLE computeStatisticsForComponent(int pos, int componentNum, const computePijGam& cpg);
	41	void allocatePlaceForSuffStat();
	42	void computePijForEachComponent(vector<computePijGam>& cpgVec,vector<stochasticProcess>& spVec);
	43
	44	private:
	45	Vdouble _MkVec;
	46	Vdouble _AkVec;
	47	Vdouble _BkVec;
	48
	49	const stochasticProcess* _pSp;
	50	const sequenceContainer* _pSc;
	51	const tree* _pTree;
	52	};
	53
	54
	55
	56	#endif
	57

+73

-0

libs/phylogeny/talRandom.cpp less more

	0	// $Id: talRandom.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "talRandom.h"
	3
	4	RandintTal talRandom::r = static_cast<long>(time(0)) ;
	5
	6	MDOUBLE talRandom::DblGammaGreaterThanOne(MDOUBLE dblAlpha) {
	7	// Code adopted from David Heckerman
	8	//-----------------------------------------------------------
	9	// DblGammaGreaterThanOne(dblAlpha)
	10	//
	11	// routine to generate a gamma random variable with unit scale and
	12	// alpha > 1
	13	// reference: Ripley, Stochastic Simulation, p.90
	14	// Chang and Feast, Appl.Stat. (28) p.290
	15	//-----------------------------------------------------------
	16	MDOUBLE rgdbl[6];
	17
	18	rgdbl[1] = dblAlpha - 1.0;
	19	rgdbl[2] = (dblAlpha - (1.0 / (6.0 * dblAlpha))) / rgdbl[1];
	20	rgdbl[3] = 2.0 / rgdbl[1];
	21	rgdbl[4] = rgdbl[3] + 2.0;
	22	rgdbl[5] = 1.0 / sqrt(dblAlpha);
	23
	24	for (;;)
	25	{
	26	MDOUBLE dblRand1;
	27	MDOUBLE dblRand2;
	28	do
	29	{
	30	dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0);
	31	dblRand2 = giveRandomNumberBetweenZeroAndEntry(1.0);
	32
	33	if (dblAlpha > 2.5)
	34	dblRand1 = dblRand2 + rgdbl[5] * (1.0 - 1.86 * dblRand1);
	35
	36	} while (!(0.0 < dblRand1 && dblRand1 < 1.0));
	37
	38	MDOUBLE dblTemp = rgdbl[2] * dblRand2 / dblRand1;
	39
	40	if (rgdbl[3] * dblRand1 + dblTemp + 1.0 / dblTemp <= rgdbl[4] \|\|
	41	rgdbl[3] * log(dblRand1) + dblTemp - log(dblTemp) < 1.0)
	42	{
	43	return dblTemp * rgdbl[1];
	44	}
	45	}
	46	assert(false);
	47	return 0.0;
	48	}
	49
	50	MDOUBLE talRandom::DblGammaLessThanOne(MDOUBLE dblAlpha){
	51	//routine to generate a gamma random variable with
	52	//unit scale and alpha < 1
	53	//reference: Ripley, Stochastic Simulation, p.88
	54	MDOUBLE dblTemp;
	55	const MDOUBLE dblexp = exp(1.0);
	56	for (;;){
	57	MDOUBLE dblRand0 = giveRandomNumberBetweenZeroAndEntry(1.0);
	58	MDOUBLE dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0);
	59	if (dblRand0 <= (dblexp / (dblAlpha + dblexp))){
	60	dblTemp = pow(((dblAlpha + dblexp) * dblRand0) /
	61	dblexp, 1.0 / dblAlpha);
	62	if (dblRand1 <= exp(-1.0 * dblTemp)) return dblTemp;
	63	} else {
	64	dblTemp = -1.0 * log((dblAlpha + dblexp) * (1.0 - dblRand0) /
	65	(dblAlpha * dblexp));
	66	if (dblRand1 <= pow(dblTemp,dblAlpha - 1.0)) return dblTemp;
	67	}
	68	}
	69	assert(false);
	70	return 0.0;
	71	} // DblGammaLessThanOne
	72

+98

-0

libs/phylogeny/talRandom.h less more

	0	// $Id: talRandom.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___TAL_RANDOM
	3	#define ___TAL_RANDOM
	4
	5	#include "definitions.h"
	6	#include "logFile.h"
	7	#include <cmath>
	8	#include <cassert>
	9	#include <ctime>
	10
	11	class RandintTal {
	12	unsigned long randx;
	13	public:
	14	RandintTal(long s=0) {randx=s;}
	15	void seedTal(long s) {randx=s;}
	16	int absTal(int x) {return x&0x7fffffff;}
	17	static MDOUBLE maxTal() {return 2147483648.0;}
	18	int drawTal() {return randx = randx*1103515245+12345;}
	19	MDOUBLE fdrawTal() {return absTal(drawTal())/maxTal();} //random number between zero and 1
	20	};
	21
	22	class talRandom {
	23	public:
	24	// note the number you get is between 0 and entry not including entry!
	25	static MDOUBLE giveRandomNumberBetweenZeroAndEntry(MDOUBLE entry) {
	26	MDOUBLE tm=r.fdrawTal();
	27	return (tm * entry);
	28	}
	29
	30	static bool flipCoin() {
	31	return ((talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)-0.5)>0);
	32	}
	33
	34	// note the number you get is between 0 and entry not including entry!
	35	static int giveIntRandomNumberBetweenZeroAndEntry(int entry) {
	36	return (int)(giveRandomNumberBetweenZeroAndEntry(entry));
	37	}
	38
	39	static void setSeed(const unsigned long seed) {
	40	r.seedTal(seed);
	41	}
	42
	43	static const MDOUBLE rand_gaussian(const MDOUBLE mean, const MDOUBLE variance) {
	44	const int N=100;
	45	static MDOUBLE X;
	46	X=0.0-N/2; /* set mean to 0 */
	47	for (int ri = 0;ri< N;ri++){
	48	// X += 1.0*rand()/RAND_MAX;
	49	X += giveRandomNumberBetweenZeroAndEntry(1.0);
	50	}
	51
	52	/* for uniform randoms in [0,1], mu = 0.5 and var = 1/12 */
	53	/* adjust X so mu = 0 and var = 1 */
	54
	55	// X = X * sqrt(12 / N); /* adjust variance to 1 */
	56	// cout <<X * sqrt(variance*12.0/N) + mean<<" ";
	57	MDOUBLE g = X * sqrt(variance*12.0/N) + mean;
	58	return (g);
	59	}
	60
	61	static MDOUBLE SampleGamma(MDOUBLE Alpha, MDOUBLE Beta) {
	62	MDOUBLE x= SampleGammaNorm(Alpha)/Beta;
	63	//LOG(700, << "SampleGamma(" << Alpha << " " << Beta << ") = " << x << "\n");
	64	return x;
	65	}
	66	static MDOUBLE SampleGamma(MDOUBLE Alpha) {
	67	MDOUBLE x= SampleGamma(Alpha, Alpha);
	68	//LOG(700, << "SampleGamma(" << Alpha << ") = " << x << "\n");
	69	return x;
	70	}
	71	static MDOUBLE rand_exp(const MDOUBLE mean) {
	72	return - mean * log(giveRandomNumberBetweenZeroAndEntry(1.0));//pg 64: Ross, Simulation 2nd.
	73	}
	74
	75	static MDOUBLE giveRandomNumberBetweenTwoPoints(const MDOUBLE lower_point, const MDOUBLE upper_point) {
	76	MDOUBLE u = giveRandomNumberBetweenZeroAndEntry(upper_point - lower_point);
	77	return (u + lower_point);
	78	}
	79
	80
	81	private:
	82	static RandintTal r;
	83
	84	// Routine to generate a gamma random variable with unit scale (beta = 1)
	85	static MDOUBLE SampleGammaNorm(MDOUBLE dblAlpha) {
	86	assert(dblAlpha > 0.0);
	87	if( dblAlpha < 1.0 ) return DblGammaLessThanOne(dblAlpha);
	88	else if( dblAlpha > 1.0 ) return DblGammaGreaterThanOne(dblAlpha);
	89	return -log(giveRandomNumberBetweenZeroAndEntry(1.0));
	90	}
	91	static MDOUBLE DblGammaGreaterThanOne(MDOUBLE dblAlpha);
	92	static MDOUBLE DblGammaLessThanOne(MDOUBLE dblAlpha);
	93
	94
	95	};
	96	#endif
	97

+167

-0

libs/phylogeny/tamura92.cpp less more

	0	// $Id: tamura92.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "tamura92.h"
	3	#include "errorMsg.h"
	4
	5	// This implementation was copied from the Bio++ Phyl library (by Julien Dutheil) - file T92.cpp
	6
	7	tamura92::tamura92(const MDOUBLE theta,
	8	const MDOUBLE TrTv)
	9	: _theta(theta), _TrTv(TrTv) {
	10
	11	_freq.resize(4);
	12	changeTheta(theta);
	13	}
	14
	15	void tamura92::changeTheta(const MDOUBLE theta) {
	16	_theta = theta;
	17	_freq[0] = _freq[3] = (1.0 - theta) / 2.0;
	18	_freq[1] = _freq[2] = theta / 2.0;
	19	}
	20
	21	const MDOUBLE tamura92::Pij_t(const int i, const int j, const MDOUBLE t) const {
	22	double k = (_TrTv + 1.0) / 2.0;
	23	double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
	24	double l = r * t;
	25	double exp1 = exp(-l);
	26	double exp2 = exp(-k * l);
	27
	28	switch(i) {
	29	//A
	30	case 0 : {
	31	switch(j) {
	32	case 0 : return _freq[0] * (1.0 + exp1) + _theta * exp2; //A
	33	case 1 : return _freq[1] * (1.0 - exp1); //C
	34	case 2 : return _freq[2] * (1.0 + exp1) - _theta * exp2; //G
	35	case 3 : return _freq[3] * (1.0 - exp1); //T, U
	36	}
	37	}
	38	//C
	39	case 1 : {
	40	switch(j) {
	41	case 0 : return _freq[0] * (1.0 - exp1); //A
	42	case 1 : return _freq[1] * (1.0 + exp1) + (1. - _theta) * exp2; //C
	43	case 2 : return _freq[2] * (1.0 - exp1); //G
	44	case 3 : return _freq[3] * (1.0 + exp1) - (1. - _theta) * exp2; //T, U
	45	}
	46	}
	47	//G
	48	case 2 : {
	49	switch(j) {
	50	case 0 : return _freq[0] * (1.0 + exp1) - (1. - _theta) * exp2; //A
	51	case 1 : return _freq[1] * (1.0 - exp1); //C
	52	case 2 : return _freq[2] * (1.0 + exp1) + (1. - _theta) * exp2; //G
	53	case 3 : return _freq[3] * (1.0 - exp1); //T, U
	54	}
	55	}
	56	//T, U
	57	case 3 : {
	58	switch(j) {
	59	case 0 : return _freq[0] * (1.0 - exp1); //A
	60	case 1 : return _freq[1] * (1.0 + exp1) - _theta * exp2; //C
	61	case 2 : return _freq[2] * (1.0 - exp1); //G
	62	case 3 : return _freq[3] * (1.0 + exp1) + _theta * exp2; //T, U
	63	}
	64	}
	65	}
	66	return -1;
	67	}
	68
	69	const MDOUBLE tamura92::dPij_dt(const int i,const int j, const MDOUBLE t) const {
	70	double k = (_TrTv + 1.0) / 2.0;
	71	double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
	72	double l = r * t;
	73	double exp1 = exp(-l);
	74	double exp2 = exp(-k * l);
	75
	76	switch(i) {
	77	//A
	78	case 0 : {
	79	switch(j) {
	80	case 0 : return r * (_freq[0] * - exp1 + _theta * -k * exp2); //A
	81	case 1 : return r * (_freq[1] * exp1); //C
	82	case 2 : return r * (_freq[2] * - exp1 - _theta * -k * exp2); //G
	83	case 3 : return r * (_freq[3] * exp1); //T, U
	84	}
	85	}
	86	//C
	87	case 1 : {
	88	switch(j) {
	89	case 0 : return r * (_freq[0] * exp1); //A
	90	case 1 : return r * (_freq[1] * - exp1 + (1.0 - _theta) * -k * exp2); //C
	91	case 2 : return r * (_freq[2] * exp1); //G
	92	case 3 : return r * (_freq[3] * - exp1 - (1.0 - _theta) * -k * exp2); //T, U
	93	}
	94	}
	95	//G
	96	case 2 : {
	97	switch(j) {
	98	case 0 : return r * (_freq[0] * - exp1 - (1.0 - _theta) * -k * exp2); //A
	99	case 1 : return r * (_freq[1] * exp1); //C
	100	case 2 : return r * (_freq[2] * - exp1 + (1.0 - _theta) * -k * exp2); //G
	101	case 3 : return r * (_freq[3] * exp1); //T, U
	102	}
	103	}
	104	//T, U
	105	case 3 : {
	106	switch(j) {
	107	case 0 : return r * (_freq[0] * exp1); //A
	108	case 1 : return r * (_freq[1] * - exp1 - _theta * -k * exp2); //C
	109	case 2 : return r * (_freq[2] * exp1); //G
	110	case 3 : return r * (_freq[3] * - exp1 + _theta * -k * exp2); //T, U
	111	}
	112	}
	113	}
	114	return -1;
	115	}
	116
	117	const MDOUBLE tamura92::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
	118	double k = (_TrTv + 1.0) / 2.;
	119	double k2 = k * k;
	120	double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
	121	double l = r * t;
	122	double r2 = r * r;
	123	double exp1 = exp(-l);
	124	double exp2 = exp(-k * l);
	125
	126	switch(i) {
	127	//A
	128	case 0 : {
	129	switch(j) {
	130	case 0 : return r2 * (_freq[0] * exp1 + _theta * k2 * exp2); //A
	131	case 1 : return r2 * (_freq[1] * - exp1); //C
	132	case 2 : return r2 * (_freq[2] * exp1 - _theta * k2 * exp2); //G
	133	case 3 : return r2 * (_freq[3] * - exp1); //T, U
	134	}
	135	}
	136	//C
	137	case 1 : {
	138	switch(j) {
	139	case 0 : return r2 * (_freq[0] * - exp1); //A
	140	case 1 : return r2 * (_freq[1] * exp1 + (1.0 - _theta) * k2 * exp2); //C
	141	case 2 : return r2 * (_freq[2] * - exp1); //G
	142	case 3 : return r2 * (_freq[3] * exp1 - (1.0 - _theta) * k2 * exp2); //T, U
	143	}
	144	}
	145	//G
	146	case 2 : {
	147	switch(j) {
	148	case 0 : return r2 * (_freq[0] * exp1 - (1.0 - _theta) * k2 * exp2); //A
	149	case 1 : return r2 * (_freq[1] * - exp1); //C
	150	case 2 : return r2 * (_freq[2] * exp1 + (1.0 - _theta) * k2 * exp2); //G
	151	case 3 : return r2 * (_freq[3] * - exp1); //T, U
	152	}
	153	}
	154	//T, U
	155	case 3 : {
	156	switch(j) {
	157	case 0 : return r2 * (_freq[0] * - exp1); //A
	158	case 1 : return r2 * (_freq[1] * exp1 - _theta * k2 * exp2); //C
	159	case 2 : return r2 * (_freq[2] * - exp1); //G
	160	case 3 : return r2 * (_freq[3] * exp1 + _theta * k2 * exp2); //T, U
	161	}
	162	}
	163	}
	164	return -1;
	165	}
	166

+36

-0

libs/phylogeny/tamura92.h less more

	0	// $Id: tamura92.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___TAMURA92
	3	#define ___TAMURA92
	4
	5	#include "replacementModel.h"
	6	#include <cmath>
	7
	8	class tamura92 : public replacementModel {
	9	public:
	10	explicit tamura92(const MDOUBLE theta,
	11	const MDOUBLE TrTv);
	12
	13	virtual replacementModel* clone() const { return new tamura92 (*this); }
	14
	15	const int alphabetSize() const {return 4;}
	16	inline void changeTrTv(const MDOUBLE TrTv) { _TrTv = TrTv; }
	17	void changeTheta(const MDOUBLE theta);
	18	MDOUBLE getTrTv() const {return _TrTv;}
	19	MDOUBLE getTheta() const {return _theta;}
	20
	21	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
	22	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
	23	const MDOUBLE freq(const int i) const {return _freq[i];};
	24	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
	25
	26	const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const;
	27
	28	private:
	29	Vdouble _freq;
	30	MDOUBLE _theta;
	31	MDOUBLE _TrTv;
	32	};
	33
	34	#endif
	35

+34

-0

libs/phylogeny/tests/.cvsignore less more

	0	bootstrap_test
	1	bootstrap_test.debug
	2	bootstrap_test.out.tmp
	3	checkTreeLikelihoodGivenBranches
	4	checkTreeLikelihoodGivenBranches.out.tmp
	5	computeNJtreeJCmodel
	6	computeNJtreeJCmodel.out.tmp
	7	doubleRep
	8	doubleRep.out.tmp
	9	given2seqEstimateTheDistBetweenThem
	10	given2seqEstimateTheDistBetweenThem.out.tmp
	11	given2seqEstimateTheDistBetweenThemGamma
	12	given2seqEstimateTheDistBetweenThemGamma.out.tmp
	13	given2seqEstimateTheDistanceK2P
	14	given2seqEstimateTheDistanceK2P.out.tmp
	15	given2seqTheDistBetweenThem-3
	16	given2seqTheDistBetweenThem-3.out.tmp
	17	optimizeBranchesJC_EM
	18	optimizeBranchesJC_EM.out.tmp
	19	optimizeBranchesJC_EM_gam_estimate_alp
	20	optimizeBranchesJC_EM_gam_estimate_alp.out.tmp
	21	optimizeBranchesJC_EM_gamma
	22	optimizeBranchesJC_EM_gamma.out.tmp
	23	optimize_HKY_param
	24	optimize_HKY_param.out.tmp
	25	readTreeWithComments
	26	readTreeWithComments.debug
	27	readTreeWithComments.out.tmp
	28	splitMap_test
	29	splitMap_test.debug
	30	splitMap_test.out.tmp
	31	split_test
	32	split_test.debug
	33	split_test.out.tmp

+61

-0

libs/phylogeny/tests/Makefile less more

	0	CPPFLAGS= -g -Wall -Wno-sign-compare -I.. -DLOG -ftemplate-depth-25 -O0
	1	CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I.. -DLOG -ftemplate-depth-32
	2
	3	# -O3
	4	LDFLAGS= -L..
	5	LDLIBS= -lEvolTree
	6
	7	CC=g++
	8	CXX=g++
	9
	10	.PHONY: test tests run all clean
	11
	12	TESTS= split_test splitMap_test bootstrap_test \
	13	given2seqTheDistBetweenThem-3 \
	14	given2seqEstimateTheDistBetweenThem computeNJtreeJCmodel \
	15	checkTreeLikelihoodGivenBranches optimizeBranchesJC_EM \
	16	given2seqEstimateTheDistBetweenThemGamma \
	17	optimizeBranchesJC_EM_gamma \
	18	optimizeBranchesJC_EM_gam_estimate_alp \
	19	given2seqEstimateTheDistanceK2P optimize_HKY_param \
	20	doubleRep readTreeWithComments
	21	#ludouble_test DistanceTableFromTree
	22	OTHER_TESTS= exhaustiveSearch
	23
	24	all: test
	25	run: $(addsuffix .out.tmp,$(TESTS))
	26
	27	libEvolDebug=../libEvolTreeDebug.a
	28
	29	DEBUGEXEC = $(addsuffix .debug,$(TESTS))
	30	$(DEBUGEXEC): $(libEvolDebug)
	31
	32	$(TESTS): ../libEvolTree.a
	33
	34	test: $(addsuffix .test,$(TESTS))
	35
	36	alltest: $(addsuffix .test,$(TESTS) $(OTHER_TESTS))
	37
	38	%.test: %.out.tmp %.out.standard
	39	diff $^
	40
	41	readTreeWithComments.out.tmp: readTreeWithComments treeWithComments.tree
	42	./$^>$@
	43
	44	%.out.tmp: %
	45	$(*) > $@
	46
	47	tests: $(TESTS)
	48
	49
	50	%.debug.o: %.c
	51	$(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@
	52
	53	%.debug.o: %.cpp
	54	$(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@
	55
	56	debug: $(DEBUGEXEC)
	57
	58
	59	clean:
	60	-rm -f $(TESTS) .out.tmp .o

+97

-0

libs/phylogeny/tests/bootstrap_test.cpp less more

	0	#include "bootstrap.h"
	1	#include "treeUtil.h"
	2	#include "someUtil.h"
	3	using namespace std;
	4
	5	int main()
	6	{
	7	cout << "creating a bootstrap object from a file"<<endl;
	8
	9	string filename("bootstrap_test.txt");
	10
	11	vector<tree> tv(getStartingTreeVecFromFile(filename));
	12
	13	// first constractor
	14	cout << " first constractor"<<endl;
	15	bootstrap b1(filename);
	16	b1.print();
	17	cout <<endl;
	18
	19	// secound constractor
	20	cout << " secound constractor" <<endl;
	21	bootstrap b2(tv);
	22	b2.print();
	23	cout <<endl;
	24
	25	cout << "getting weights from a tree" << endl;
	26	map<int,MDOUBLE> v1(b1.getWeightsForTree(tv[0])) ;
	27	for (map<int,MDOUBLE>::iterator i = v1.begin();i!=v1.end();++i)
	28	cout << " "<<i->second;
	29	cout << endl;
	30
	31	cout << "print the support of a tree" <<endl;
	32	b1.printTreeWithBPvalues(cout, tv[0], v1);
	33	cout <<endl <<endl;
	34
	35
	36	cout<< "remove the first tree from the list, and use is as bases for additional computation"<<endl;
	37	tree t(tv[0]);
	38	cout<< "use the secound tree twice"<<endl;
	39	tv[0]=tv[1];
	40
	41	// secound constractor
	42	bootstrap b3(tv);
	43	b3.print_names(cout);
	44	b3.print();
	45	map<int,MDOUBLE> v3(b3.getWeightsForTree(t)) ;
	46	// for (map<int,MDOUBLE>::iterator i = v3.begin();i!=v3.end();++i)
	47	// cout << " "<<i->second;
	48	//cout << endl;
	49
	50	cout << "print the support of the removed tree"<<endl;
	51	b3.printTreeWithBPvalues(cout, t, v3);
	52	cout <<endl;
	53
	54	cout <<endl<<endl<<endl<<"compatability"<<endl;
	55	tree t2(b3.consensusTree());
	56	// cout << t2<<endl;
	57	map<int,MDOUBLE> support(b3.getWeightsForTree(t2));
	58
	59
	60	b3.printTreeWithBPvalues(cout, t2, support);
	61	cout <<endl;
	62
	63	// for (map<int,MDOUBLE>::const_iterator ii= support.begin(); ii != support.end();++ii)
	64	// cout << ii->second <<" ";
	65	// cout << endl;
	66
	67	cout <<"compatability 0.0"<<endl;
	68	t=b3.consensusTree(0.);
	69	support=b3.getWeightsForTree(t);
	70	b3.printTreeWithBPvalues(cout, t, support);
	71	cout <<endl;
	72
	73	// for (map<int,MDOUBLE>::iterator i=support.begin();i!=support.end();++i)
	74	// {
	75	// cout << "<"<<i->first<<","<<i->second <<">:"<<support[i->first]<<endl;
	76	// }
	77
	78	double c=0.8;
	79	cout <<"compatability "<<c<<endl;
	80	t=b3.consensusTree(c);
	81	support=b3.getWeightsForTree(t);
	82	b3.printTreeWithBPvalues(cout, t, support);
	83	cout <<endl;
	84	// for (map<int,MDOUBLE>::iterator i=support.begin();i!=support.end();++i)
	85	// {
	86	// cout << "<"<<i->first<<","<<i->second <<">:"<<support[i->first]<<endl;
	87	// }
	88
	89	// for (map<int,MDOUBLE>::const_iterator i=support.begin();i!=support.end();++i)
	90	// {
	91	// cout << "<"<<i->first<<","<<">:"<<support[i->first]<<endl;
	92	// }
	93
	94
	95	return (0);
	96	}

+53

-0

libs/phylogeny/tests/bootstrap_test.out.standard less more

	0	creating a bootstrap object from a file
	1	first constractor
	2	5 size =2 0 1 \| 2 3 4 5
	3	1 size =3 0 1 2 \| 3 4 5
	4	1 size =2 0 1 2 3 \| 4 5
	5	2 size =2 0 1 2 5 \| 3 4
	6	1 size =2 0 1 3 5 \| 2 4
	7	1 size =2 0 1 4 5 \| 2 3
	8	4 size =3 0 1 5 \| 2 3 4
	9
	10
	11	secound constractor
	12	5 size =2 0 1 \| 2 3 4 5
	13	1 size =3 0 1 2 \| 3 4 5
	14	1 size =2 0 1 2 3 \| 4 5
	15	2 size =2 0 1 2 5 \| 3 4
	16	1 size =2 0 1 3 5 \| 2 4
	17	1 size =2 0 1 4 5 \| 2 3
	18	4 size =3 0 1 5 \| 2 3 4
	19
	20
	21	getting weights from a tree
	22	0 1 0 0 0.8 0 0.4 0 0 0
	23	print the support of a tree
	24	((Baboon:1e-06,Human:1e-06):1e-06[1],(Rat:1e-06,(Langur:1e-06,Cow:1e-06):1e-06[0.4]):1e-06[0.8],Horse:1e-06);
	25
	26	remove the first tree from the list, and use is as bases for additional computation
	27	use the secound tree twice
	28	{Baboon = 0}
	29	{Cow = 4}
	30	{Horse = 5}
	31	{Human = 1}
	32	{Langur = 2}
	33	{Rat = 3}
	34	5 size =2 0 1 \| 2 3 4 5
	35	1 size =2 0 1 2 3 \| 4 5
	36	1 size =2 0 1 2 5 \| 3 4
	37	1 size =3 0 1 3 \| 2 4 5
	38	1 size =2 0 1 3 5 \| 2 4
	39	2 size =2 0 1 4 5 \| 2 3
	40	4 size =3 0 1 5 \| 2 3 4
	41
	42	print the support of the removed tree
	43	((Baboon:1e-06,Human:1e-06):1e-06[1],(Rat:1e-06,(Langur:1e-06,Cow:1e-06):1e-06[0.2]):1e-06[0.8],Horse:1e-06);
	44
	45
	46
	47	compatability
	48	(Cow:0.3,Langur:0.3,Rat:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]);
	49	compatability 0.0
	50	(Langur:0.3,Rat:0.3,(Cow:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]):1e-06[0.4]);
	51	compatability 0.8
	52	(Cow:0.3,Langur:0.3,Rat:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]);

+5

-0

libs/phylogeny/tests/bootstrap_test.txt less more

	0	((Baboon,Human),(Rat,(Langur,Cow)),Horse);
	1	((Baboon,Human),((Langur,Rat),Cow),Horse);
	2	((Baboon,Human),((Rat,Cow),Langur),Horse);
	3	((Baboon,Human),(Rat,(Langur,Cow)),Horse);
	4	((Baboon,Human),(Langur,(Cow,Horse)),Rat);

+51

-0

libs/phylogeny/tests/checkTreeLikelihoodGivenBranches.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "nucJC.h"
	6	#include "sequence.h"
	7	#include "distribution.h"
	8	#include "stochasticProcess.h"
	9	#include "uniDistribution.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "likelihoodComputation.h"
	15
	16	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	17
	18	int main(int argc,char*argv[]) {
	19	cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."<<endl;
	20	string seqFile = "nuc7.phylip.txt";
	21	distribution *dist = new uniDistribution;
	22	replacementModel *probMod=new nucJC;
	23	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	24	stochasticProcess sp(dist, pijAcc);
	25	ifstream in(seqFile.c_str());
	26	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	27	nucleotide myAlph;
	28	sequenceContainer original = phylipFormat::read(in,&myAlph);
	29	// const MDOUBLE myToll = 0.0001;
	30
	31	const string treeFileName = "sevenTaxaTree.txt";
	32	tree myT(treeFileName);
	33	cout<<"computing the log likelihood of the tree..."<<endl;
	34
	35	MDOUBLE resL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(myT,original,sp);
	36	//tree njTree = nj1.computeNJtree(disTab,vNames);
	37	//ofstream out("njTreeRes.txt");
	38	//njTree.output(out);
	39
	40
	41
	42	//MDOUBLE resL = 0;
	43	//MDOUBLE resD = likeDist1.giveDistance(s1,s2,NULL,&resL);
	44	cout<<" the likelihood of the tree is:"<<resL<<endl;
	45	//cout<<" the ML distance between these 2 sequences is:"<<resD<<endl;
	46
	47	delete dist;
	48	delete probMod;
	49	return 0;
	50	}

+3

-0

libs/phylogeny/tests/checkTreeLikelihoodGivenBranches.out.standard less more

	0	This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given).
	1	computing the log likelihood of the tree...
	2	the likelihood of the tree is:-2228.09

+60

-0

libs/phylogeny/tests/computeNJtreeJCmodel.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "nucJC.h"
	6	#include "sequence.h"
	7	#include "distribution.h"
	8	#include "stochasticProcess.h"
	9	#include "uniDistribution.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "jcDistance.h"
	15	#include "distanceTable.h"
	16	#include "nj.h"
	17	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	18
	19	int main(int argc,char*argv[]) {
	20	cout<<"This program computes for the JC model, the NJ tree."<<endl;
	21	string seqFile = "nuc7.phylip.txt";
	22	if (argc>1)
	23	seqFile=argv[1];
	24	distribution *dist = new uniDistribution;
	25	replacementModel *probMod=new nucJC;
	26	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	27	stochasticProcess sp(dist, pijAcc);
	28	ifstream in(seqFile.c_str());
	29	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	30	nucleotide myAlph;
	31	sequenceContainer original = phylipFormat::read(in,&myAlph);
	32
	33
	34	//const MDOUBLE myToll = 0.0001;
	35
	36	cout<<"computing the NJ tree..."<<endl;
	37	jcDistance likeDist1(myAlph.size());
	38	VVdouble disTab;
	39	vector<string> vNames;
	40	giveDistanceTable(&likeDist1,
	41	original,
	42	disTab,
	43	vNames);
	44	NJalg nj1;
	45	tree njTree = nj1.computeTree(disTab,vNames);
	46	// ofstream out("njTreeRes.txt");
	47	njTree.output(cout);
	48
	49
	50
	51	//MDOUBLE resL = 0;
	52	//MDOUBLE resD = likeDist1.giveDistance(s1,s2,NULL,&resL);
	53	//cout<<" the likelihood of these 2 sequences is:"<<resL<<endl;
	54	//cout<<" the ML distance between these 2 sequences is:"<<resD<<endl;
	55
	56	delete dist;
	57	delete probMod;
	58	return 0;
	59	}

+3

-0

libs/phylogeny/tests/computeNJtreeJCmodel.out.standard less more

	0	This program computes for the JC model, the NJ tree.
	1	computing the NJ tree...
	2	(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925);

+17

-0

libs/phylogeny/tests/doubleRep.cpp less more

	0	// we want to make sure that we are using the doubleRep class with DOUBLEREP enabled.
	1	#define DOUBLEREP t
	2	#include "../doubleRep.cpp"
	3
	4
	5	int main()
	6	{
	7	double d=5.352e-30;
	8	doubleRep k;
	9	k= d;
	10	k.output(cout);cout<<endl;
	11	cout << k.mantissa() <<" "<< k.expon() <<endl<<endl;
	12
	13	cout <<"as double "<<d<<endl;
	14	cout <<"as doubleRep "<<convert(k)<<endl;
	15	return(0);
	16	}

+5

-0

libs/phylogeny/tests/doubleRep.out.standard less more

	0	0.848058 * 2^-97
	1	0.848058 -97
	2
	3	as double 5.352e-30
	4	as doubleRep 5.352e-30

+48

-0

libs/phylogeny/tests/exhaustiveSearch.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	#include <iomanip>
	4	using namespace std;
	5
	6	#include "nucJC.h"
	7	#include "sequence.h"
	8	#include "distribution.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "uniDistribution.h"
	12	#include "trivialAccelerator.h"
	13	#include "sequenceContainer.h"
	14	#include "nucleotide.h"
	15	#include "phylipFormat.h"
	16	#include "likelihoodComputation.h"
	17	#include "bestHKYparam.h"
	18	#include "evaluateCharacterFreq.h"
	19	#include "bblEM.h"
	20	#include "allTrees.h"
	21
	22
	23	int main(int argc,char*argv []) {
	24	cout<<"exhaustive search"<<endl;
	25
	26	// getting the data.
	27	string seqFile1 = "nuc7.phylip.txt";
	28	ifstream in1(seqFile1.c_str());
	29	if (!in1) {errorMsg::reportError("unable to open input sequence file");}
	30	nucleotide myAlph;
	31	sequenceContainer original1 = phylipFormat::read(in1,&myAlph);
	32	in1.close();
	33
	34	distribution *dist = new uniDistribution;
	35	replacementModel *probMod=new nucJC;
	36	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	37	stochasticProcess sp(dist, pijAcc);
	38
	39	allTrees allTrees1(false);
	40	allTrees1.recursiveFind(&original1,&sp);
	41	cout<<" Log likelihood for best tree = "<<allTrees1.getBestScore()<<endl;
	42	allTrees1.getBestTree().output(cout);
	43
	44	delete dist;
	45	delete probMod;
	46	return 0;
	47	}

+3

-0

libs/phylogeny/tests/exhaustiveSearch.out.standard less more

	0	exhaustive search
	1	................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. Log likelihood for best tree = -2219.51
	2	(Aal:0.054390,((Mtr:0.048174,Ttt:0.018190):0.062200,Ese:0.061401):0.037929,(Eco:0.091967,(Meu:0.042443,Dvi:0.107768):0.089076):0.034799);

+44

-0

libs/phylogeny/tests/given2seqEstimateTheDistBetweenThem.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "sequence.h"
	6	#include "distribution.h"
	7	#include "stochasticProcess.h"
	8	#include "uniDistribution.h"
	9	#include "nucJC.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "likeDist.h"
	15	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	16
	17	int main(int argc,char*argv[]) {
	18	cout<<"This program computes for the JC model, when two sequences are given, and the distance between these two sequences is known, the likelihood."<<endl;
	19	string seqFile = "s2l4_DNA.txt";
	20	distribution *dist = new uniDistribution;
	21	replacementModel *probMod=new nucJC;
	22	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	23	stochasticProcess sp(dist, pijAcc);
	24	ifstream in(seqFile.c_str());
	25	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	26	nucleotide myAlph;
	27	sequenceContainer original = phylipFormat::read(in,&myAlph);
	28	const MDOUBLE myToll = 0.0001;
	29	if (original.numberOfSeqs() != 2) {
	30	errorMsg::reportError("for this check, there suppose to be only 2 sequences",1);
	31	}
	32	sequence s1 = original[0];
	33	sequence s2 = original[1];
	34	likeDist likeDist1(sp,myToll);
	35	MDOUBLE resL = 0;
	36	MDOUBLE resD = likeDist1.giveDistance(s1,s2,NULL,&resL);
	37	cout<<" the likelihood of these 2 sequences is:"<<resL<<endl;
	38	cout<<" the ML distance between these 2 sequences is:"<<resD<<endl;
	39
	40	delete dist;
	41	delete probMod;
	42	return 0;
	43	}

+3

-0

libs/phylogeny/tests/given2seqEstimateTheDistBetweenThem.out.standard less more

	0	This program computes for the JC model, when two sequences are given, and the distance between these two sequences is known, the likelihood.
	1	the likelihood of these 2 sequences is:-10.515
	2	the ML distance between these 2 sequences is:0.823959

+45

-0

libs/phylogeny/tests/given2seqEstimateTheDistBetweenThemGamma.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "sequence.h"
	6	#include "distribution.h"
	7	#include "stochasticProcess.h"
	8	#include "gammaDistribution.h"
	9	#include "nucJC.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "likeDist.h"
	15	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	16
	17	int main(int argc,char*argv[]) {
	18	cout<<"This program computes for the HKY model, when two sequences are given, and the distance between these two sequences is known, the likelihood."<<endl;
	19	string seqFile = "s2l4_DNA.txt";
	20	distribution *dist = new gammaDistribution(4.0,8);
	21	replacementModel *probMod=new nucJC;
	22	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	23	stochasticProcess sp(dist, pijAcc);
	24	ifstream in(seqFile.c_str());
	25	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	26	nucleotide myAlph;
	27	sequenceContainer original = phylipFormat::read(in,&myAlph);
	28	const MDOUBLE myToll = 0.0001;
	29	if (original.numberOfSeqs() != 2) {
	30	errorMsg::reportError("for this check, there suppose to be only 2 sequences",1);
	31	}
	32	sequence s1 = original[0];
	33	sequence s2 = original[1];
	34	likeDist likeDist1(sp,myToll);
	35	MDOUBLE resL = 0;
	36	MDOUBLE resD = likeDist1.giveDistance(s1,s2,NULL,&resL);
	37	cout<<" the likelihood of these 2 sequences is:"<<resL<<endl;
	38	cout<<" the ML distance between these 2 sequences is:"<<resD<<endl;
	39
	40	delete dist;
	41	delete probMod;
	42	// system("PAUSE");
	43	return 0;
	44	}

+3

-0

libs/phylogeny/tests/given2seqEstimateTheDistBetweenThemGamma.out.standard less more

	0	This program computes for the HKY model, when two sequences are given, and the distance between these two sequences is known, the likelihood.
	1	the likelihood of these 2 sequences is:-10.515
	2	the ML distance between these 2 sequences is:0.94261

+57

-0

libs/phylogeny/tests/given2seqEstimateTheDistanceK2P.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "hky.h"
	6	#include "sequence.h"
	7	#include "distribution.h"
	8	#include "stochasticProcess.h"
	9	#include "uniDistribution.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "likeDist.h"
	15	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	16
	17	int main(int argc,char*argv[]) {
	18	cout<<"This program computes for the K2P model, when two sequences are given, the ML distance and its likelihood."<<endl;
	19	string seqFile = "s2l4_DNA.txt";
	20	distribution *dist = new uniDistribution;
	21	replacementModel *probMod1=new hky(0.25,0.25,0.25,0.25,0.5);
	22	pijAccelerator * pijAcc1 = new trivialAccelerator(probMod1);
	23	replacementModel *probMod2=new hky(0.25,0.25,0.25,0.25,10);
	24	pijAccelerator * pijAcc2 = new trivialAccelerator(probMod2);
	25	stochasticProcess sp1(dist, pijAcc1);
	26	stochasticProcess sp2(dist, pijAcc2);
	27	ifstream in(seqFile.c_str());
	28	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	29	nucleotide myAlph;
	30	sequenceContainer original = phylipFormat::read(in,&myAlph);
	31	const MDOUBLE myToll = 0.0001;
	32	if (original.numberOfSeqs() != 2) {
	33	errorMsg::reportError("for this check, there suppose to be only 2 sequences",1);
	34	}
	35	sequence s1 = original[0];
	36	sequence s2 = original[1];
	37
	38	MDOUBLE resL1 = 0;
	39	likeDist likeDist1(sp1,myToll);
	40	MDOUBLE resD1 = likeDist1.giveDistance(s1,s2,NULL,&resL1);
	41	cout<<endl<<"For Tr/Tv = 0.5" <<endl;
	42	cout<<" the likelihood of these 2 sequences is:"<<resL1<<endl;
	43	cout<<" the ML distance between these 2 sequences is:"<<resD1<<endl;
	44
	45	MDOUBLE resL2 = 0;
	46	likeDist likeDist2(sp2,myToll);
	47	MDOUBLE resD2 = likeDist2.giveDistance(s1,s2,NULL,&resL2);
	48	cout<<endl<<"For Tr/Tv = 10" <<endl;
	49	cout<<" the likelihood of these 2 sequences is:"<<resL2<<endl;
	50	cout<<" the ML distance between these 2 sequences is:"<<resD2<<endl;
	51
	52	delete dist;
	53	delete probMod1;
	54	delete probMod2;
	55	return 0;
	56	}

+9

-0

libs/phylogeny/tests/given2seqEstimateTheDistanceK2P.out.standard less more

	0	This program computes for the K2P model, when two sequences are given, the ML distance and its likelihood.
	1
	2	For Tr/Tv = 0.5
	3	the likelihood of these 2 sequences is:-10.515
	4	the ML distance between these 2 sequences is:0.823959
	5
	6	For Tr/Tv = 10
	7	the likelihood of these 2 sequences is:-8.70737
	8	the ML distance between these 2 sequences is:0.857854

+46

-0

libs/phylogeny/tests/given2seqTheDistBetweenThem-3.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	using namespace std;
	4
	5	#include "sequence.h"
	6	#include "distribution.h"
	7	#include "stochasticProcess.h"
	8	#include "uniDistribution.h"
	9	#include "nucJC.h"
	10	#include "trivialAccelerator.h"
	11	#include "sequenceContainer.h"
	12	#include "nucleotide.h"
	13	#include "phylipFormat.h"
	14	#include "likeDist.h"
	15	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	16
	17	int main(int argc,char*argv[]) {
	18	cout<<"This program computes for the JC model, when two sequences are given, and the distance between these two sequences is known, the likelihood."<<endl;
	19	string seqFile = "s2l4_DNA.txt";
	20	distribution *dist = new uniDistribution;
	21	replacementModel *probMod=new nucJC;
	22	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	23	stochasticProcess sp(dist, pijAcc);
	24	ifstream in(seqFile.c_str());
	25	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	26	nucleotide myAlph;
	27	sequenceContainer original = phylipFormat::read(in,&myAlph);
	28	const MDOUBLE myToll = 0.0001;
	29	if (original.numberOfSeqs() != 2) {
	30	errorMsg::reportError("for this check, there suppose to be only 2 sequences",1);
	31	}
	32	sequence s1 = original[0];
	33	sequence s2 = original[1];
	34	likeDist likeDist1(sp,myToll);
	35	MDOUBLE resL = likeDist1.giveLikelihood(s1,s2,0.01);
	36	cout<<" the likelihood of these 2 sequences is:"<<resL<<endl;
	37	MDOUBLE dis = likeDist1.giveDistance(s1,s2,NULL,&resL);
	38	cout<<" the optimal distance between the distances is:"<<dis<<endl;
	39	cout<<" the optimal likelihood of these 2 sequences is:"<<resL<<endl;
	40
	41
	42	delete dist;
	43	delete probMod;
	44	return 0;
	45	}

+4

-0

libs/phylogeny/tests/given2seqTheDistBetweenThem-3.out.standard less more

	0	This program computes for the JC model, when two sequences are given, and the distance between these two sequences is known, the likelihood.
	1	the likelihood of these 2 sequences is:-16.986
	2	the optimal distance between the distances is:0.823959
	3	the optimal likelihood of these 2 sequences is:-10.515

+1

-0

libs/phylogeny/tests/njTreeRes.txt less more

0

(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925);

+88

-0

libs/phylogeny/tests/nuc7.phylip.txt less more

	0	7 525
	1	Dvi ACAGCCCAAC CATTTGTATA GGTGATAGAA AAGGCTATAG TTATAGTACC
	2	Meu CAAGCCCAAT CATTTGTATA GGCGATAGAA AAGGCAATAA CGCAAGTACC
	3	Ese CTAGCCCAAA CATTTGTATA GGAGATAGAA ATTGCAATAG AAATAGTACC
	4	Ttt CTAGCCCAAA CATTCGTATC AGAGACAGAA ATTGCTATAG AGATAGTACC
	5	Mtr CTAGCCCAAA GATCAGTATC AGCGACAGAA ATTGCTATAG AGATAGTACC
	6	Eco CTAGCCTATT CATTTGTATA GGAGATAGAA AATGCAATAG AAAAAGTACC
	7	Aal CTAGCCCAAA CATTTGTATA GGAGATAGAA ATAGCTATAG AGAAAGTACC
	8
	9	GCAAGGGAAA AATGAAAGTA ATTAAAAGCA AAGATTAACT CTTGTACCTT
	10	GTAAGGGAAA CATGAAAGCA CTAAAAAGCA AAGATTAGAC CTTCTACCTT
	11	GCAAGGGAAA GATGAAAGTA CAAAACAGCA AAGACTAACA CTTTTACCTT
	12	GTAAGGGAAA GATGAAAGTA CAAGACAGCA AAGATCAGCC CTTTTACCTT
	13	GCAAGGGAAA GATGAAAGTA TAGAATAGCA AAGATCAACC ATTCTACCTT
	14	GTAAGGGAAC GATTGAAGTA AGAAATAGTA AAGATTAAAC CTTGTACCTT
	15	GTAAGGGAAT GATGAAAGTA ACAAATAGCA AAGCTTACCC CTTTTACCTT
	16
	17	TTGCATAATG ATTTAGCCAG TGCAAAAAGA TTATGCCCGA CATCCCGAAA
	18	TTGCATAATG GTTTAGCCAG TACAAAAAGA TTATGCCCGC CTTCCCGAAA
	19	TTGCATAATG AGTTAGCTAG AGCAAAGAGA TTCAGTCAAG CACCCCGAAA
	20	TTGCATAATG AGTCAACTAG ATCAAAGAGA TTAAGTAAGA TACCCCGAAT
	21	TCGCATAATG AATCAACCAG ATCAAAGAGA TTAAGTAAAA AACCCCGAAT
	22	TTGCATAATG ATTTAGCCAG AACAAAGAGA TAAAGCTAAT TACCCCGAAA
	23	TTGCATAATG AGTTAACTAG AACAAAGAGA TTAAGTTAAG TACCCCGAAA
	24
	25	TTAAGTGAGC TACTATAAGA CAGACTCATC TATGTAGCAA AATAGTGAGA
	26	TCAAGTGAGC TACTATAAAA CAGACTCGTC TATGTAGCAA AATAGTGAGA
	27	CCAGACGAGC TACTTATGGA CAGACTCGTC TATGTAGCAA AATAGTGAGA
	28	CCAGACGATC TACCTATAGA CAGATTCATC TATGTGGCAA AATAGTGAAA
	29	CCAGACGATC TACCTGCTGA CAGATTCATC TATGTGGCAA AATAGTGAAA
	30	CCAGACGAGC TATCTAATAA CAAACTCATC TATGTGGCAA AATAGTGAGA
	31	CCAGACGAGC TACCTGCGAG CAGACTCGTC TATGTGGCAA AATAGTGAGA
	32
	33	AGATTTTATA GTAGAAGTGA AAAACCTATC GAACTTAATG ATAGCTGGTT
	34	AGATTTTATA GTAGAGGTGA AAAGCCTACC GAACTTGATG ATAGCTGGTT
	35	AGATCTATAA GTAGAGGTGA AAAGCCAACC GAGCCTGGTG ATAGCTGGTT
	36	TGATCGATAG GTAGCGGTGA AAAGCCAATC GAGCCTGGTG ATAGCTGGTT
	37	CGATCAACAG GTAGCGGTGA AAAGCCAATC GAGCCTGGTG ATAGCTGGTT
	38	AGATTGCTAG ATAGAGGTGA AACGCCTATC GAGCCTGGTG ATAGCTGGTT
	39	AGACTCTCAG GTAGAGGTGA AAAGCCTACC GAGCCTGGTG ATAGCTGGTT
	40
	41	ATCCAGAATT TAAGTTCAAC TTTAAGAATA AGCTTAAAAG TTAGTCAAAG
	42	GTCCAGAATT TTAGTTCAAC TTTAAACTTA AATTTAAAAG CTAATCAAAA
	43	GTCCAGAATA TTAGTTCGAC TTAAAATGTA AATTTTAAAT ATACTCAATA
	44	GTCCAGAATA TAAGTTCAAC TTAAAATGTA AGTTTTAGAT ATAGTCAATA
	45	GTCCAGAATA TAAGTTCAGC TTAAAATGTA AGTTTTAAAC ATAGTCAATA
	46	GTCCAGAATT TTAGTTCAAC TTTAAATGTA AATTTAAAAG ATAATTCTAA
	47	GTCCAGAATA TAAGTTCAAC TTTAATTGTA AGATTAAATG ATAATCTAAA
	48
	49	AAGGGACAAC TTCTTTGCAA ACTACATTAG AGGGTAAATC GTTGGCTTAA
	50	GGGGGTCAGC TCTTTTGCAA ACTTTATTAG AGGATAAATT GTAGGCCTAA
	51	GGGGTACAGC CCTATTGAAA CCTAATTTGG AGAGTAAATT GTTGGCCTAA
	52	GGGGTACAGC CCTATTGAAA CCTTAATTAG AGAGTTCATA GTAGGCTTAA
	53	GGGGTACAGC CCTATTGAAA CCTTAATTAG AGAGTACATA GTAGGCTTAA
	54	GGGGGACAGC TCTTAGACAA CCCTAATCAG AGAGTACATA GTGGGCCTAA
	55	GAGGTACAGC CCTTTAGCAA CCTTAAACGG AGAGTAAATA GTTGGCCTAA
	56
	57	AAGCAGCCAT CAACTAAGAA AGCGTTAAAG CTCAAACCCC TTGGATGATT
	58	AAGCAGCCAC CAATTAAGAA AGCGTTAAAG CTCAAACTCC TTGGACGATT
	59	AAGCAGCCAC CAATAAAGAT AGCGTTCAAG CTCAAACTCC TTGGACCAAT
	60	AAGCAGCCAT CAATAAAGAA AGCGTTCAAG CTCAACCTCC CTGGACCAAT
	61	GAGCAGCCAC CAATAAAGAA AGCGTTTAAG CTCAACCTCC CTGGACTAAT
	62	AAGCAGCCAT CAATTAAGAA AGCGTTAAAG CTCAAACTCC TTGGACTAAT
	63	AAGCAGCCAC CAATTAAGAA AGCGTTCAAG CTCAACCTCC CTGGACTAAT
	64
	65	CTATTAGAAT ACATAATGCT AAAATTAGTA ATAAGACTCC GCACAAGCCT
	66	CTATTAGAAG ACATAATGCT AAAATGAGTA ACAAGACTCC GCACAAGCCT
	67	CTATTAGAAG AAATAATGCT AACATAAGTA ACAAGACTCC GCATAAGCTT
	68	CTATTAGAAG AGATAATGTT GATATGAGTA ACAAGACTCC GCACAAGTTT
	69	CTATTAGAAG AAACAATGTT GATATGAGTA ACAAGACTCC GCACAAGTTT
	70	CTATTAGAAG AAATACTGCT AATATGAGTA ACAAGACTCC GCATGATTCT
	71	CTATTAGAAG AGATACTGTT AATATGAGTA ACAAGACTCC GCACAAACCT
	72
	73	ATAGATAATA GTTAACATTG TTAATCCAAC ACAGGTGTGC GGAAAGATAT
	74	AAATTTAACA ATTAACATTG TTAACCCAAC ACAGGAGTGC GGAAAGATTA
	75	ACATCTGATA ATTAACATTG TTAACCCGAC ACAGGCATGC GGAAAGATTT
	76	ATATCTGATA GTTAACATTG TTAGCCCAAC ACAGGAGTGC GGAAAGATTA
	77	ATATCTGATA GTTAACATTG TTAGCCCAAC ACAGGAGTGC GGAAAGATTA
	78	ATATCTGATA GTTAACATTG TTAGCCCGAC ACAGGTATGC GGAAAGATTA
	79	ATATCTGATA ATTAACATTG TTAACCCAAC ACAGGCGTGC GGAAAGATTA
	80
	81	AAAAGAACAA AAGGAACTCG GCAAA
	82	AAAGGAATAA AAGGAACTCG GCAAA
	83	AAAGAAATAG AAGGAACTCG GCAAA
	84	AAAGAAACAA AAGGAACTCG GCAAA
	85	AAAGAAACAA AAGGAACTCG GCAAA
	86	AATAGGTCAG AAGGAACTCG GCAAA
	87	AAAGAAGTAA AAGGAACTCG GCAAA

+53

-0

libs/phylogeny/tests/optimizeBranchesJC_EM.cpp less more

	0	// $Id: optimizeBranchesJC_EM.cpp 370 2005-05-25 20:46:34Z ninio $
	1
	2	#include <fstream>
	3	#include <iostream>
	4	#include <string>
	5	#include <iomanip>
	6	using namespace std;
	7
	8	#include "nucJC.h"
	9	#include "sequence.h"
	10	#include "distribution.h"
	11	#include "stochasticProcess.h"
	12	#include "uniDistribution.h"
	13	#include "trivialAccelerator.h"
	14	#include "sequenceContainer.h"
	15	#include "nucleotide.h"
	16	#include "phylipFormat.h"
	17	#include "likelihoodComputation.h"
	18	#include "bblEM.h"
	19
	20	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	21
	22	int main(int argc,char*argv[]) {
	23	cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."<<endl;
	24	string seqFile = "nuc7.phylip.txt";
	25	distribution *dist = new uniDistribution;
	26	replacementModel *probMod=new nucJC;
	27	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	28	stochasticProcess sp(dist, pijAcc);
	29	ifstream in(seqFile.c_str());
	30	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	31	nucleotide myAlph;
	32	sequenceContainer original = phylipFormat::read(in,&myAlph);
	33
	34	const string treeFileName = "startTree.txt";
	35	tree myT(treeFileName);
	36	cout<<"computing the log likelihood of the tree..."<<endl;
	37
	38	MDOUBLE resL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(myT,original,sp);
	39	cout<<" starting L = "<<resL<<endl;
	40
	41	bblEM bblEM1(myT,original,sp,NULL,1000,0.05);
	42	resL = bblEM1.getTreeLikelihood();
	43
	44	cout<<" end L, after bbl = "<<setprecision(12)<<resL<<endl;
	45
	46	// ofstream out("tAfterBBL.txt");
	47	myT.output(cout);
	48
	49	delete dist;
	50	delete probMod;
	51	return 0;
	52	}

+5

-0

libs/phylogeny/tests/optimizeBranchesJC_EM.out.standard less more

	0	This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given).
	1	computing the log likelihood of the tree...
	2	starting L = -2228.09
	3	end L, after bbl = -2219.50591327
	4	(Aal:0.055486320897,(Ese:0.060836383576,(Ttt:0.018202550909,Mtr:0.048144922650):0.062910979704):0.037581468483,(Eco:0.092932108224,(Dvi:0.108160657949,Meu:0.042026386196):0.089296640074):0.033019500657);

+51

-0

libs/phylogeny/tests/optimizeBranchesJC_EM_gam_estimate_alp.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	#include <iomanip>
	4	using namespace std;
	5
	6	#include "nucJC.h"
	7	#include "sequence.h"
	8	#include "distribution.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "trivialAccelerator.h"
	12	#include "sequenceContainer.h"
	13	#include "nucleotide.h"
	14	#include "phylipFormat.h"
	15	#include "likelihoodComputation.h"
	16	#include "bestAlpha.h"
	17
	18	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	19
	20	int main(int argc,char*argv[]) {
	21	cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."<<endl;
	22	string seqFile = "nuc7.phylip.txt";
	23	distribution *dist = new gammaDistribution(1,8);
	24	replacementModel *probMod=new nucJC;
	25	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	26	stochasticProcess sp(dist, pijAcc);
	27	ifstream in(seqFile.c_str());
	28	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	29	nucleotide myAlph;
	30	sequenceContainer original = phylipFormat::read(in,&myAlph);
	31
	32	const string treeFileName = "startTree.txt";
	33	tree myT(treeFileName);
	34	cout<<"computing the log likelihood of the tree..."<<endl;
	35
	36	MDOUBLE resL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(myT,original,sp);
	37	cout<<" starting L = "<<resL<<endl;
	38	bestAlphaAndBBL bestAlphaAndBBL1(myT,original,sp);
	39
	40	resL = bestAlphaAndBBL1.getBestL();
	41	MDOUBLE resAlpha = bestAlphaAndBBL1.getBestAlpha();
	42	cout<<"final likelihood: "<<resL<<endl;
	43	cout<<"best Alpha: "<<resAlpha<<endl;
	44	// ofstream out("tAfterBBL.txt");
	45	myT.output(cout);
	46
	47	delete dist;
	48	delete probMod;
	49	return 0;
	50	}

+6

-0

libs/phylogeny/tests/optimizeBranchesJC_EM_gam_estimate_alp.out.standard less more

	0	This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given).
	1	computing the log likelihood of the tree...
	2	starting L = -2189.77
	3	final likelihood: -2172.99
	4	best Alpha: 0.501635
	5	(Aal:0.059683,(Ese:0.068312,(Ttt:0.017214,Mtr:0.052743):0.072642):0.043677,(Eco:0.114730,(Dvi:0.133475,Meu:0.039589):0.117996):0.033172);

+53

-0

libs/phylogeny/tests/optimizeBranchesJC_EM_gamma.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	#include <iomanip>
	4	using namespace std;
	5
	6	#include "nucJC.h"
	7	#include "sequence.h"
	8	#include "distribution.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "trivialAccelerator.h"
	12	#include "sequenceContainer.h"
	13	#include "nucleotide.h"
	14	#include "phylipFormat.h"
	15	#include "likelihoodComputation.h"
	16	#include "bblEM.h"
	17
	18	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	19
	20	int main(int argc,char*argv[]) {
	21	cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."<<endl;
	22	string seqFile = "nuc7.phylip.txt";
	23	distribution *dist = new gammaDistribution(1,8);
	24	replacementModel *probMod=new nucJC;
	25	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	26	stochasticProcess sp(dist, pijAcc);
	27	ifstream in(seqFile.c_str());
	28	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	29	nucleotide myAlph;
	30	sequenceContainer original = phylipFormat::read(in,&myAlph);
	31
	32	const string treeFileName = "startTree.txt";
	33	tree myT(treeFileName);
	34	cout<<"computing the log likelihood of the tree..."<<endl;
	35
	36	MDOUBLE resL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(myT,original,sp);
	37	cout<<" starting L = "<<resL<<endl;
	38
	39	bblEM bblEM1(myT,original,sp,NULL,1000,0.05);
	40	resL = bblEM1.getTreeLikelihood();
	41
	42
	43	cout<<" end L, after bbl = "<<setprecision(12)<<resL<<endl;
	44
	45	//ofstream out("tAfterBBL.txt");
	46	myT.output(cout);
	47
	48	delete dist;
	49	delete probMod;
	50	//system("PAUSE");
	51	return 0;
	52	}

+5

-0

libs/phylogeny/tests/optimizeBranchesJC_EM_gamma.out.standard less more

	0	This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given).
	1	computing the log likelihood of the tree...
	2	starting L = -2189.77
	3	end L, after bbl = -2179.45590941
	4	(Aal:0.059126005932,(Ese:0.066030633739,(Ttt:0.017928742924,Mtr:0.051557833141):0.069475141702):0.041343973825,(Eco:0.106662142475,(Dvi:0.124009000428,Meu:0.040988634764):0.105976705171):0.033194854881);

+69

-0

libs/phylogeny/tests/optimize_HKY_param.cpp less more

	0	#include <fstream>
	1	#include <iostream>
	2	#include <string>
	3	#include <iomanip>
	4	using namespace std;
	5
	6	#include "nucJC.h"
	7	#include "sequence.h"
	8	#include "distribution.h"
	9	#include "stochasticProcess.h"
	10	#include "gammaDistribution.h"
	11	#include "uniDistribution.h"
	12	#include "trivialAccelerator.h"
	13	#include "sequenceContainer.h"
	14	#include "nucleotide.h"
	15	#include "phylipFormat.h"
	16	#include "likelihoodComputation.h"
	17	#include "bestHKYparam.h"
	18	#include "evaluateCharacterFreq.h"
	19	// NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME!
	20
	21	int main(int argc,char*argv[]) {
	22
	23
	24	cout<<"This program computes for the HKY model, the ML estimate of a given tree (when the branch lengths are given)."<<endl;
	25	string seqFile = "nuc7.phylip.txt";
	26	// distribution *dist = new gammaDistribution(1,8);
	27	ifstream in(seqFile.c_str());
	28	if (!in) {errorMsg::reportError("unable to open input sequence file");}
	29	nucleotide myAlph;
	30	sequenceContainer original = phylipFormat::read(in,&myAlph);
	31
	32	// check
	33
	34	vector<MDOUBLE> myFreq = evaluateCharacterFreq(original);
	35	for (int j=0; j < myFreq.size(); ++j) {
	36	cout<<" the freq of nuc "<<j<<" is: "<<myFreq[j]<<endl;
	37	}
	38
	39	distribution *dist = new uniDistribution;
	40	replacementModel *probMod=new hky(myFreq[0],myFreq[1],myFreq[2],myFreq[3],2);
	41	pijAccelerator * pijAcc = new trivialAccelerator(probMod);
	42	stochasticProcess sp(dist, pijAcc);
	43
	44
	45
	46	// end check
	47	const string treeFileName = "startTree.txt";
	48	tree myT(treeFileName);
	49	cout<<"computing the log likelihood of the tree..."<<endl;
	50
	51	MDOUBLE resL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(myT,original,sp);
	52	cout<<" starting L = "<<resL<<endl;
	53	bestHkyParamAndBBL bestHkyParamAndBBL1(myT,original,sp);
	54
	55	resL = bestHkyParamAndBBL1.getBestL();
	56	MDOUBLE resAlpha = bestHkyParamAndBBL1.getBestHkyParam();
	57	cout<<"final likelihood: "<<resL<<endl;
	58	cout<<"best HKY parameter: "<<resAlpha<<endl;
	59	// ofstream out("tAfterBBL.txt");
	60	myT.output(cout);
	61
	62	delete dist;
	63	delete probMod;
	64	return 0;
	65	}
	66
	67	//C:\tal\semphyCheck\nuc7.phylip.txt
	68	//C:\tal\semphyCheck\startTree.txt

+10

-0

libs/phylogeny/tests/optimize_HKY_param.out.standard less more

	0	This program computes for the HKY model, the ML estimate of a given tree (when the branch lengths are given).
	1	the freq of nuc 0 is: 0.388844
	2	the freq of nuc 1 is: 0.176599
	3	the freq of nuc 2 is: 0.197551
	4	the freq of nuc 3 is: 0.237007
	5	computing the log likelihood of the tree...
	6	starting L = -2151.38
	7	final likelihood: -2138.61
	8	best HKY parameter: 1.5181
	9	(Aal:0.052945,(Ese:0.061027,(Ttt:0.015761,Mtr:0.050395):0.065493):0.039148,(Eco:0.095915,(Dvi:0.107929,Meu:0.044734):0.093162):0.034430);

+27

-0

libs/phylogeny/tests/readTreeWithComments.cpp less more

	0	// $Id: readTreeWithComments.cpp 753 2006-06-29 14:41:56Z ninio $
	1
	2
	3	#include <fstream>
	4	#include <iostream>
	5	#include <string>
	6	#include <iomanip>
	7	using namespace std;
	8
	9	#include "logFile.h"
	10	#include "tree.h"
	11	//#include "readTree.h"
	12
	13	int main(int argc,char*argv[]) {
	14	if(argc<2) exit(1);
	15	if(argc>2) myLog::setLog("-",atoi(argv[2]));
	16	string treeName(argv[1]);
	17	tree t(treeName);
	18	t.output(cout);
	19
	20	vector<tree::nodeP> nv;
	21	t.getAllNodes(nv, t.getRoot());
	22	cout <<"got "<<nv.size()<<" noded"<<endl;
	23	for (vector<tree::nodeP>::iterator i=nv.begin();i!=nv.end();++i)
	24	cout << (*i)->getComment()<<endl;
	25	exit(0);
	26	}

+103

-0

libs/phylogeny/tests/readTreeWithComments.out.standard less more

	0	(((((lip\|YALI0C06281g:19232.000000[&&NHX:D=N],((alb\|orf19.7378:33419.500000[&&NHX:D=N],han\|DEHA0D17479g:33419.500000[&&NHX:D=N]):58468.500000[&&NHX:D=N],(((cas\|Scas648.21:44761.000000[&&NHX:D=N],(gla\|CAGL0G01969g:33855.000000[&&NHX:D=N],(bay\|YKR072C-14957:6103.500000[&&NHX:D=N],(mik\|YKR072C-13959:5420.500000[&&NHX:D=N],(par\|YKR072C-13221:4495.000000[&&NHX:D=N],cer\|YKR072C:4495.000000[&&NHX:D=N]):5420.500000[&&NHX:D=N]):6103.500000[&&NHX:D=N]):33855.000000[&&NHX:D=N]):44761.000000[&&NHX:D=N]):2.000000[&&NHX:D=N],(cas\|Scas698.42:13667.000000[&&NHX:D=N],(gla\|CAGL0L10208g:35357.000000[&&NHX:D=N],(bay\|YOR054C-23024:9194.000000[&&NHX:D=N],(mik\|YOR054C-19996:6207.000000[&&NHX:D=N],((par\|YOR054C-20516:1368.000000[&&NHX:D=N],par\|YOR054C-20510:1.000000[&&NHX:D=N]):5566.250000[&&NHX:D=Y],cer\|YOR054C:5566.250000[&&NHX:D=N]):6207.000000[&&NHX:D=N]):9194.000000[&&NHX:D=N]):35357.000000[&&NHX:D=N]):13667.000000[&&NHX:D=N]):3317.500000[&&NHX:D=N]):4078.750000[&&NHX:D=Y],((gos\|AAR140W:34023.500000[&&NHX:D=N],lac\|KLLA0F16258g:34023.500000[&&NHX:D=N]):13983.000000[&&NHX:D=N],wal\|Kwal21677:13983.000000[&&NHX:D=N]):4078.750000[&&NHX:D=N]):58468.500000[&&NHX:D=N]):19232.000000[&&NHX:D=N]):227256.000000[&&NHX:D=N],(lip\|YALI0E06413g:38516.500000[&&NHX:D=N],((alb\|orf19.3260:33688.500000[&&NHX:D=N],han\|DEHA0E18040g:33688.500000[&&NHX:D=N]):15355.000000[&&NHX:D=N],(((gos\|ADR156C:45619.500000[&&NHX:D=N],lac\|KLLA0E18414g:45619.500000[&&NHX:D=N]):15530.500000[&&NHX:D=N],wal\|Kwal955:15530.500000[&&NHX:D=N]):11413.000000[&&NHX:D=N],(cas\|Scas649.3:18712.000000[&&NHX:D=N],(gla\|CAGL0L05302g:39440.000000[&&NHX:D=N],(bay\|YKL088W-14427:5889.000000[&&NHX:D=N],(mik\|YKL088W-13337:4161.000000[&&NHX:D=N],(par\|YKL088W-13810:4634.000000[&&NHX:D=N],cer\|YKL088W:4634.000000[&&NHX:D=N]):4161.000000[&&NHX:D=N]):5889.000000[&&NHX:D=N]):39440.000000[&&NHX:D=N]):18712.000000[&&NHX:D=N]):11413.000000[&&NHX:D=N]):15355.000000[&&NHX:D=N]):38516.500000[&&NHX:D=N]):1.000000[&&NHX:D=N]):102226.500000[&&NHX:D=Y],(((gra\|FG04568.1:41707.000000[&&NHX:D=N],cra\|NCU04237.2:41707.000000[&&NHX:D=N]):10636.500000[&&NHX:D=N],gri\|MG09544.4:10636.500000[&&NHX:D=N]):21219.000000[&&NHX:D=N],nid\|AN4305.2:21219.000000[&&NHX:D=N]):102226.500000[&&NHX:D=N]):9920238.000000[&&NHX:D=N],(((gra\|FG01063.1:14116.500000[&&NHX:D=N],cra\|NCU10053.2:14116.500000[&&NHX:D=N]):8249.000000[&&NHX:D=N],gri\|MG04458.4:8249.000000[&&NHX:D=N]):8645.000000[&&NHX:D=N],(lip\|YALI0C10901g:12509.000000[&&NHX:D=N],((alb\|orf19.3549:7192.000000[&&NHX:D=N],han\|DEHA0C06853g:7192.000000[&&NHX:D=N]):5174.500000[&&NHX:D=N],(((gos\|ADR219C:11666.000000[&&NHX:D=N],lac\|KLLA0A07843g:11666.000000[&&NHX:D=N]):5651.500000[&&NHX:D=N],wal\|Kwal21783:5651.500000[&&NHX:D=N]):552.500000[&&NHX:D=N],(cas\|Scas698.28:6090.500000[&&NHX:D=N],(gla\|CAGL0K05467g:7192.500000[&&NHX:D=N],(bay\|YOR074C-22960:3184.500000[&&NHX:D=N],(mik\|YOR074C-19931:2064.500000[&&NHX:D=N],(par\|YOR074C-20605:2032.000000[&&NHX:D=N],cer\|YOR074C:2032.000000[&&NHX:D=N]):2064.500000[&&NHX:D=N]):3184.500000[&&NHX:D=N]):7192.500000[&&NHX:D=N]):6090.500000[&&NHX:D=N]):552.500000[&&NHX:D=N]):5174.500000[&&NHX:D=N]):12509.000000[&&NHX:D=N]):8645.000000[&&NHX:D=N]):9842163.500000[&&NHX:D=N]):0.500000[&&NHX:D=Y],pom\|SPAC15E1.04:0.500000[&&NHX:D=N])[&&NHX:D=N];
	1	got 101 noded
	2	:D=N
	3	:D=Y
	4	:D=N
	5	:D=Y
	6	:D=N
	7	:D=N
	8	:D=N
	9	:D=N
	10	:D=N
	11	:D=N
	12	:D=N
	13	:D=Y
	14	:D=N
	15	:D=N
	16	:D=N
	17	:D=N
	18	:D=N
	19	:D=N
	20	:D=N
	21	:D=N
	22	:D=N
	23	:D=N
	24	:D=N
	25	:D=N
	26	:D=N
	27	:D=N
	28	:D=N
	29	:D=N
	30	:D=N
	31	:D=N
	32	:D=N
	33	:D=N
	34	:D=Y
	35	:D=N
	36	:D=N
	37	:D=N
	38	:D=N
	39	:D=N
	40	:D=N
	41	:D=N
	42	:D=N
	43	:D=N
	44	:D=N
	45	:D=N
	46	:D=N
	47	:D=N
	48	:D=N
	49	:D=N
	50	:D=N
	51	:D=N
	52	:D=N
	53	:D=N
	54	:D=N
	55	:D=N
	56	:D=N
	57	:D=N
	58	:D=N
	59	:D=N
	60	:D=N
	61	:D=N
	62	:D=N
	63	:D=N
	64	:D=N
	65	:D=N
	66	:D=N
	67	:D=N
	68	:D=N
	69	:D=N
	70	:D=N
	71	:D=N
	72	:D=N
	73	:D=N
	74	:D=N
	75	:D=N
	76	:D=N
	77	:D=N
	78	:D=N
	79	:D=N
	80	:D=N
	81	:D=N
	82	:D=N
	83	:D=N
	84	:D=N
	85	:D=N
	86	:D=N
	87	:D=N
	88	:D=N
	89	:D=N
	90	:D=N
	91	:D=N
	92	:D=N
	93	:D=N
	94	:D=N
	95	:D=N
	96	:D=N
	97	:D=N
	98	:D=N
	99	:D=N
	100	:D=N
	101	:D=N
	102	:D=N

+4

-0

libs/phylogeny/tests/s2l4_DNA.txt less more

	0	2 4
	1	Sequence1 ACAC
	2	Sequence2 GCGC
	3

+1

-0

libs/phylogeny/tests/sevenTaxaTree.txt less more

0

(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925);

+69

-0

libs/phylogeny/tests/splitMap_test.cpp less more

	0	using namespace std;
	1	//#include "bootstrap.h"
	2	#include "splitMap.h"
	3
	4	int main()
	5	{
	6
	7	// create a split one way
	8	split s1(5);
	9	s1.reverseMembership(0);
	10	s1.reverseMembership(1);
	11	s1.reverseMembership(4);
	12
	13
	14	// and an other split the other way
	15
	16	vector<int> v(3,0);
	17	v[0]=2; v[1]=0; v[2]=3;
	18	vector<int>::const_iterator vbeg = v.begin();
	19	vector<int>::const_iterator vend = v.end();
	20	split s2(vbeg,vend,5);
	21
	22	cout << endl << "Test the splitMap" << endl;
	23
	24	splitMap sm1;
	25
	26	cout <<"s1: ";
	27	s1.print();
	28	cout <<"s2: ";
	29	s2.print();
	30	cout << endl;
	31
	32	cout <<"add s1"<<endl;
	33	sm1.add(s1);
	34	sm1.print();
	35
	36	cout <<"add s2"<<endl;
	37	sm1.add(s2);
	38	sm1.print();
	39
	40	cout <<"add s1"<<endl;
	41	sm1.add(s1);
	42	sm1.print();
	43
	44	cout <<"add s1"<<endl;
	45	sm1.add(s1);
	46	sm1.print();
	47
	48	cout <<"add s1"<<endl;
	49	sm1.add(s1);
	50	sm1.print();
	51	cout << endl;
	52
	53	// print test
	54	cout << "print test"<<endl;
	55	cout << sm1;
	56	cout << endl;
	57
	58	// reverse
	59
	60	cout << "reverse the map"<<endl;
	61
	62	vector<pair<split,int> > rmap = sm1.sortSplits();
	63	for (vector<pair<split,int> >::const_iterator i=rmap.begin();i!=rmap.end();++i)
	64	cout <<i->second<<" "<<i->first<<endl;
	65
	66
	67	return (0);
	68	}

+35

-0

libs/phylogeny/tests/splitMap_test.out.standard less more

	0
	1	Test the splitMap
	2	s1: size =2 0 1 4 \| 2 3
	3	s2: size =2 0 2 3 \| 1 4
	4
	5	add s1
	6	1 size =2 0 1 4 \| 2 3
	7
	8	add s2
	9	1 size =2 0 1 4 \| 2 3
	10	1 size =2 0 2 3 \| 1 4
	11
	12	add s1
	13	2 size =2 0 1 4 \| 2 3
	14	1 size =2 0 2 3 \| 1 4
	15
	16	add s1
	17	3 size =2 0 1 4 \| 2 3
	18	1 size =2 0 2 3 \| 1 4
	19
	20	add s1
	21	4 size =2 0 1 4 \| 2 3
	22	1 size =2 0 2 3 \| 1 4
	23
	24
	25	print test
	26	4 size =2 0 1 4 \| 2 3
	27	1 size =2 0 2 3 \| 1 4
	28
	29
	30	reverse the map
	31	4 size =2 0 1 4 \| 2 3
	32
	33	1 size =2 0 2 3 \| 1 4
	34

+104

-0

libs/phylogeny/tests/split_test.cpp less more

	0	using namespace std;
	1	#include "split.h"
	2
	3	int main()
	4	{
	5	cout << " testing the \"split\" class" <<endl;
	6
	7	cout << "make set - max=5" <<endl;
	8	split s1(5);
	9	s1.print();
	10
	11	cout << "toggle(4)" <<endl;
	12	{
	13	s1.reverseMembership(4);
	14	}
	15	s1.print();
	16
	17	cout << "toggle(4)" <<endl;
	18	{
	19	s1.reverseMembership(4);
	20	}
	21	s1.print();
	22
	23	cout << "toggle(4)" <<endl;
	24	{
	25	s1.reverseMembership(4);
	26	}
	27	s1.print();
	28
	29	cout << "toggle(3)" <<endl;
	30	{
	31	s1.reverseMembership(3);
	32	}
	33	s1.print();
	34
	35	cout << "toggle(3)" <<endl;
	36	{
	37	s1.reverseMembership(3);
	38	}
	39	s1.print();
	40
	41	cout << "toggle(0)" <<endl;
	42	{
	43	s1.reverseMembership(0);
	44	}
	45	s1.print();
	46
	47	cout << "toggle(1);" <<endl;
	48	{
	49	s1.reverseMembership(1);
	50	}
	51	s1.print();
	52
	53	cout << "toggle(1);" <<endl;
	54	{
	55	s1.reverseMembership(1);
	56	}
	57	s1.print();
	58
	59	cout << "toggle(1);" <<endl;
	60	{
	61	s1.reverseMembership(1);
	62	}
	63	s1.print();
	64
	65	cout << "toggle(0)" <<endl;
	66	{
	67	s1.reverseMembership(0);
	68	}
	69	s1.print();
	70
	71	cout << "toggle(0)" <<endl;
	72	{
	73	s1.reverseMembership(0);
	74	}
	75	s1.print();
	76
	77
	78
	79	// part II - from iterator
	80
	81	cout <<endl << "test split constractor from iterator"<<endl;
	82	vector<int> v(3,0);
	83	v[0]=2; v[1]=0; v[2]=4;
	84	vector<int>::const_iterator vbeg = v.begin();
	85	vector<int>::const_iterator vend = v.end();
	86	split s2(vbeg,vend,5);
	87	s2.print();
	88
	89	v[0]=2; v[1]=3; v[2]=4;
	90	vbeg = v.begin();
	91	vend = v.end();
	92	split s3(vbeg,vend,5);
	93
	94
	95	cout << s3 <<endl;
	96
	97	cout <<endl<<"Testing competability"<<endl;
	98
	99	cout << s1<<" and "<<s2<<"\t:"<<s1.compatible(s2)<<endl;
	100	cout << s1<<" and "<<s3<<"\t:"<<s1.compatible(s3)<<endl;
	101
	102	return (0);
	103	}

+38

-0

libs/phylogeny/tests/split_test.out.standard less more

	0	testing the "split" class
	1	make set - max=5
	2	size =0 0 1 2 3 4 \|
	3	toggle(4)
	4	size =1 0 1 2 3 \| 4
	5	toggle(4)
	6	size =0 0 1 2 3 4 \|
	7	toggle(4)
	8	size =1 0 1 2 3 \| 4
	9	toggle(3)
	10	size =2 0 1 2 \| 3 4
	11	toggle(3)
	12	size =1 0 1 2 3 \| 4
	13	toggle(0)
	14	size =2 0 4 \| 1 2 3
	15	toggle(1);
	16	size =2 0 1 4 \| 2 3
	17	toggle(1);
	18	size =2 0 4 \| 1 2 3
	19	toggle(1);
	20	size =2 0 1 4 \| 2 3
	21	toggle(0)
	22	size =2 0 2 3 \| 1 4
	23	toggle(0)
	24	size =2 0 1 4 \| 2 3
	25
	26	test split constractor from iterator
	27	size =2 0 2 4 \| 1 3
	28	size =2 0 1 \| 2 3 4
	29
	30
	31	Testing competability
	32	size =2 0 1 4 \| 2 3
	33	and size =2 0 2 4 \| 1 3
	34	:0
	35	size =2 0 1 4 \| 2 3
	36	and size =2 0 1 \| 2 3 4
	37	:1

+1

-0

libs/phylogeny/tests/startTree.txt less more

0

(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925);

+1

-0

libs/phylogeny/tests/treeWithComments.tree less more

0

(((((lip|YALI0C06281g:19232.0[&&NHX:D=N],((alb|orf19.7378:33419.5[&&NHX:D=N],han|DEHA0D17479g:33419.5[&&NHX:D=N]):58468.5[&&NHX:D=N],(((cas|Scas648.21:44761.0[&&NHX:D=N],(gla|CAGL0G01969g:33855.0[&&NHX:D=N],(bay|YKR072C-14957:6103.5[&&NHX:D=N],(mik|YKR072C-13959:5420.5[&&NHX:D=N],(par|YKR072C-13221:4495.0[&&NHX:D=N],cer|YKR072C:4495.0[&&NHX:D=N]):5420.5[&&NHX:D=N]):6103.5[&&NHX:D=N]):33855.0[&&NHX:D=N]):44761.0[&&NHX:D=N]):2.0[&&NHX:D=N],(cas|Scas698.42:13667.0[&&NHX:D=N],(gla|CAGL0L10208g:35357.0[&&NHX:D=N],(bay|YOR054C-23024:9194.0[&&NHX:D=N],(mik|YOR054C-19996:6207.0[&&NHX:D=N],((par|YOR054C-20516:1368.0[&&NHX:D=N],par|YOR054C-20510:1.0[&&NHX:D=N]):5566.25[&&NHX:D=Y],cer|YOR054C:5566.25[&&NHX:D=N]):6207.0[&&NHX:D=N]):9194.0[&&NHX:D=N]):35357.0[&&NHX:D=N]):13667.0[&&NHX:D=N]):3317.5[&&NHX:D=N]):4078.75[&&NHX:D=Y],((gos|AAR140W:34023.5[&&NHX:D=N],lac|KLLA0F16258g:34023.5[&&NHX:D=N]):13983.0[&&NHX:D=N],wal|Kwal21677:13983.0[&&NHX:D=N]):4078.75[&&NHX:D=N]):58468.5[&&NHX:D=N]):19232.0[&&NHX:D=N]):227256.0[&&NHX:D=N],(lip|YALI0E06413g:38516.5[&&NHX:D=N],((alb|orf19.3260:33688.5[&&NHX:D=N],han|DEHA0E18040g:33688.5[&&NHX:D=N]):15355.0[&&NHX:D=N],(((gos|ADR156C:45619.5[&&NHX:D=N],lac|KLLA0E18414g:45619.5[&&NHX:D=N]):15530.5[&&NHX:D=N],wal|Kwal955:15530.5[&&NHX:D=N]):11413.0[&&NHX:D=N],(cas|Scas649.3:18712.0[&&NHX:D=N],(gla|CAGL0L05302g:39440.0[&&NHX:D=N],(bay|YKL088W-14427:5889.0[&&NHX:D=N],(mik|YKL088W-13337:4161.0[&&NHX:D=N],(par|YKL088W-13810:4634.0[&&NHX:D=N],cer|YKL088W:4634.0[&&NHX:D=N]):4161.0[&&NHX:D=N]):5889.0[&&NHX:D=N]):39440.0[&&NHX:D=N]):18712.0[&&NHX:D=N]):11413.0[&&NHX:D=N]):15355.0[&&NHX:D=N]):38516.5[&&NHX:D=N]):1.0[&&NHX:D=N]):102226.5[&&NHX:D=Y],(((gra|FG04568.1:41707.0[&&NHX:D=N],cra|NCU04237.2:41707.0[&&NHX:D=N]):10636.5[&&NHX:D=N],gri|MG09544.4:10636.5[&&NHX:D=N]):21219.0[&&NHX:D=N],nid|AN4305.2:21219.0[&&NHX:D=N]):102226.5[&&NHX:D=N]):9920238.0[&&NHX:D=N],(((gra|FG01063.1:14116.5[&&NHX:D=N],cra|NCU10053.2:14116.5[&&NHX:D=N]):8249.0[&&NHX:D=N],gri|MG04458.4:8249.0[&&NHX:D=N]):8645.0[&&NHX:D=N],(lip|YALI0C10901g:12509.0[&&NHX:D=N],((alb|orf19.3549:7192.0[&&NHX:D=N],han|DEHA0C06853g:7192.0[&&NHX:D=N]):5174.5[&&NHX:D=N],(((gos|ADR219C:11666.0[&&NHX:D=N],lac|KLLA0A07843g:11666.0[&&NHX:D=N]):5651.5[&&NHX:D=N],wal|Kwal21783:5651.5[&&NHX:D=N]):552.5[&&NHX:D=N],(cas|Scas698.28:6090.5[&&NHX:D=N],(gla|CAGL0K05467g:7192.5[&&NHX:D=N],(bay|YOR074C-22960:3184.5[&&NHX:D=N],(mik|YOR074C-19931:2064.5[&&NHX:D=N],(par|YOR074C-20605:2032.0[&&NHX:D=N],cer|YOR074C:2032.0[&&NHX:D=N]):2064.5[&&NHX:D=N]):3184.5[&&NHX:D=N]):7192.5[&&NHX:D=N]):6090.5[&&NHX:D=N]):552.5[&&NHX:D=N]):5174.5[&&NHX:D=N]):12509.0[&&NHX:D=N]):8645.0[&&NHX:D=N]):9842163.5[&&NHX:D=N]):0.5[&&NHX:D=Y],pom|SPAC15E1.04:0.5[&&NHX:D=N])[&&NHX:D=N];

+58

-0

libs/phylogeny/threeStateAlphabet.cpp less more

	0	#include "threeStateAlphabet.h"
	1
	2	threeStateAlphabet::threeStateAlphabet() {}
	3
	4	int threeStateAlphabet::fromChar(const char s) const{
	5	switch (s) {
	6	case '0': return 0; break;
	7	case '1': return 1; break;
	8	case '2': return 2; break;
	9	default:
	10	vector<string> err;
	11	err.push_back(" The threeStateAlphabet sequences contained the character: ");
	12	err[0]+=s;
	13	err.push_back(" threeStateAlphabet was not one of the following: ");
	14	err.push_back(" 0, 1, 2");
	15	errorMsg::reportError(err);
	16	}// end of switch
	17	return -99; // never suppose to be here.
	18	}// end of function
	19
	20	vector<int> threeStateAlphabet::fromString(const string &str) const {
	21	vector<int> vec;
	22	for (int i=0;i<str.size();i++)
	23	vec.push_back(fromChar(str[i]));
	24	return vec;
	25	}
	26
	27	string threeStateAlphabet::fromInt(const int in_id) const{
	28	char res = 0;
	29	switch (in_id) {
	30	case 0 : res = '0' ; break;
	31	case 1 : res = '1' ; break;
	32	case 2 : res = '2' ; break;
	33	default:
	34	vector<string> err;
	35	err.push_back("unable to print threeState_id. threeState_id was not one of the following: ");
	36	err.push_back("0,1,2");
	37	errorMsg::reportError(err);
	38	}//end of switch
	39	string vRes;
	40	vRes.append(1,res);
	41	return vRes;
	42	}// end of function
	43
	44	// There are no relations here.
	45	int threeStateAlphabet::relations(const int charInSeq, const int charToCheck) const{
	46	if (charInSeq == charToCheck)
	47	return 1;
	48	return 0;
	49	}
	50
	51	int threeStateAlphabet::fromChar(const string& str, const int pos) const{
	52	return fromChar(str[pos]);
	53	}
	54
	55
	56
	57

+26

-0

libs/phylogeny/threeStateAlphabet.h less more

	0	#ifndef ___3STATE_ALPH
	1	#define ___3STATE_ALPH
	2
	3	#include "alphabet.h"
	4	#include "errorMsg.h"
	5
	6
	7	class threeStateAlphabet : public alphabet {
	8	public:
	9	explicit threeStateAlphabet();
	10	virtual ~threeStateAlphabet() {}
	11	virtual alphabet* clone() const { return new threeStateAlphabet(*this); }
	12	int unknown() const {return -2;}
	13	int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
	14	int size() const {return 3;}
	15	int stringSize() const {return 1;} // one letter code.
	16	int relations(const int charInSeq, const int charToCheck) const;
	17	int fromChar(const string& str, const int pos) const;
	18	int fromChar(const char s) const;
	19	string fromInt(const int in_id) const;
	20	vector<int> fromString(const string& str) const;
	21	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	22
	23	};
	24
	25	#endif

+254

-0

libs/phylogeny/threeStateModel.cpp less more

	0	#include "threeStateModel.h"
	1	#include "matrixUtils.h"
	2	#include "someUtil.h"
	3
	4	///////////////////////////////////////////////////////////
	5	//non reversible model
	6	///////////////////////////////////////////////////////////
	7
	8	const MDOUBLE EPSILON_3STATEMODEL = 1e-04;
	9
	10
	11	threeStateModel::threeStateModel(const MDOUBLE m1, const MDOUBLE m2,
	12	const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting)
	13	:_gainState1(m1),_gainState0(m2), _lossState1(m3),_lossState0(m4),_freq(freq),_useMarkovLimiting(useMarkovLimiting){
	14	resizeMatrix(_Q,alphabetSize(),alphabetSize());
	15	resizeMatrix(_lastPtCalculated, alphabetSize(), alphabetSize());
	16	updateQ();
	17	}
	18
	19	threeStateModel& threeStateModel::operator=(const threeStateModel &other){
	20	_gainState1 = other._gainState1;
	21	_gainState0 = other._gainState0;
	22	_lossState1 = other._lossState1;
	23	_lossState0 = other._lossState0;
	24	_freq = other._freq;
	25	_useMarkovLimiting = other._useMarkovLimiting;
	26	_Q = other._Q;
	27	_bQchanged = other._bQchanged;
	28	_lastPtCalculated = other._lastPtCalculated;
	29	_lastTcalculated = other._lastTcalculated;
	30
	31	return *this;
	32	}
	33
	34	void threeStateModel::updateQ(){
	35	setEpsilonForZeroParams();
	36	_Q[0][0] = -_gainState1;
	37	_Q[0][1] = 0;
	38	_Q[0][2] = _gainState1;
	39	_Q[1][0] = 0;
	40	_Q[1][1] = -_gainState0;
	41	_Q[1][2] = _gainState0;
	42	_Q[2][0] = _lossState1;
	43	_Q[2][1] = _lossState0;
	44	_Q[2][2] = - _Q[2][0] - _Q[2][1];
	45	for (int i=0; i<_Q.size();i++) {
	46	MDOUBLE sum = _Q[i][0]+_Q[i][1]+_Q[i][2];
	47	if ((abs(sum)>err_allow_for_pijt_function()))
	48	errorMsg::reportError("Error in threeStateModel::updateQ, sum of row is not 0");
	49	}
	50	if ((!checkIsNullModel()) && (_useMarkovLimiting))
	51	computeMarkovLimitingDistribution();
	52	_bQchanged = true;
	53	}
	54
	55	// when Q matrix parameters are zero the lib code underflows and the likelihood is set to EPSILON
	56	void threeStateModel::setEpsilonForZeroParams(){
	57	if (DEQUAL(_gainState0,0.0,EPSILON_3STATEMODEL))
	58	_gainState0 = EPSILON_3STATEMODEL;
	59	if (DEQUAL(_gainState1,0.0,EPSILON_3STATEMODEL))
	60	_gainState1 = EPSILON_3STATEMODEL;
	61	if (DEQUAL(_lossState0,0.0,EPSILON_3STATEMODEL))
	62	_lossState0 = EPSILON_3STATEMODEL;
	63	if (DEQUAL(_lossState1,0.0,EPSILON_3STATEMODEL))
	64	_lossState1 = EPSILON_3STATEMODEL;
	65	}
	66
	67	void threeStateModel::setMu1(const MDOUBLE val) {
	68	_gainState1 = val;
	69	updateQ();
	70	}
	71
	72	void threeStateModel::setMu2(const MDOUBLE val) {
	73	_gainState0 = val;
	74	updateQ();
	75	}
	76
	77	void threeStateModel::setMu3(const MDOUBLE val) {
	78	_lossState1 = val;
	79	updateQ();
	80	}
	81
	82	void threeStateModel::setMu4(const MDOUBLE val) {
	83	_lossState0 = val;
	84	updateQ();
	85	}
	86
	87
	88
	89	bool threeStateModel::pijt_is_prob_value(MDOUBLE val) const {
	90	if ((abs(val)+err_allow_for_pijt_function()<0) \|\| (val>1+err_allow_for_pijt_function()))
	91	return false;
	92	else
	93	return true;
	94	}
	95
	96	bool threeStateModel::areFreqsValid(Vdouble freq) const{
	97	MDOUBLE sum=0.0;
	98	for (int i=0; i<freq.size(); ++i){
	99	if (freq[i]<0.0)
	100	return false;
	101	sum+=freq[i];
	102	}
	103	if (!DEQUAL(sum,1.0)) {
	104	return false;
	105	}
	106	return true;
	107	}
	108
	109	bool threeStateModel::checkIsNullModel(){
	110	if (_gainState0!=EPSILON_3STATEMODEL)
	111	return false;
	112	if (_gainState0!=EPSILON_3STATEMODEL)
	113	return false;
	114	if (!(DEQUAL(_freq[2],1.0,EPSILON_3STATEMODEL)))
	115	return false;
	116	return true;
	117	}
	118
	119	void threeStateModel::setFreq(const Vdouble &freq){
	120	if (freq.size()!=_freq.size()) {
	121	errorMsg::reportError("Error in threeStateModel::setFreq, size of freq is different than member");
	122	}
	123
	124	if (!areFreqsValid(freq)) {
	125	string strErr = "Error in threeStateModel::setFreq, sum of freq is different than 1 or negative freq value";
	126	errorMsg::reportError(strErr);
	127	}
	128	for (int i=0; i<freq.size(); ++i){
	129	_freq[i] = freq[i];
	130	}
	131	}
	132
	133
	134
	135
	136
	137
	138	void threeStateModel::computeMarkovLimitingDistribution(){
	139
	140	VVdouble P;
	141	int as = alphabetSize();
	142	resizeMatrix(P,as, as);
	143	// initializing P with P at time 1
	144	for (int i=0; i< as; ++i) {
	145	for (int j=0; j< as; ++j) {
	146	P[i][j]=Pij_t(i,j,1.0);
	147	}
	148	}
	149	VVdouble previous_P = P;
	150	int numIterations = 0;
	151	Vdouble freqs(3,-1.0);
	152	bool converged = false;
	153	MDOUBLE epsilon=0.000001;
	154	int row, col;
	155
	156	while ( converged==false ) {
	157	previous_P = P;
	158	P = multiplyMatrixes(P,P);
	159	// due to rounding errors, we set the diagonal to be 1-(the rest)
	160	P[0][0]=1.0-P[0][1]-P[0][2];
	161	P[1][1]=1.0-P[1][0]-P[1][2];
	162	P[2][2]=1.0-P[2][0]-P[2][1];
	163	for (int d=0; d<as;++d){
	164	freqs[d] = P[0][d];// ** taking the freqs as the first row; this is not necessarily correct if 3 rows are different
	165	}
	166	converged = true;
	167	for (row = 0; row < P.size(); ++row) {
	168	for (col = 0; col < P.size(); ++col)
	169	{
	170	MDOUBLE diff = abs(convert(previous_P[row][col] - P[row][col]));
	171	if ( ( ( ( !DEQUAL(diff,0.0,epsilon) ) \|\| (!areFreqsValid(freqs) ) ) )){
	172	converged = false;
	173	}
	174	}
	175	}
	176	numIterations++;
	177	if (numIterations>100) {
	178	string err = "Error in threeStateModel::computeMarkovLimitingDistribution, too many iterations =" + double2string(numIterations);
	179	errorMsg::reportError(err);
	180	}
	181
	182	}
	183	//making sure that the three rows are the same
	184	for (row =1; row < P.size(); ++row) {
	185	for (col = 0; col < P.size(); ++col)
	186	{
	187	if (!(DEQUAL(P[row][col],P[row-1][col],epsilon))) {
	188	errorMsg::reportError("Error in threeStateModel::computeMarkovLimitingDistribution, rows are not equal" );
	189
	190	}
	191
	192	}
	193
	194	}
	195
	196	setFreq(freqs);
	197	}
	198
	199	// new implementation copied from Itay Mayrose which saves the last values of t computed
	200	const MDOUBLE threeStateModel::Pij_t(const int i,const int j, const MDOUBLE d) const
	201	{
	202	if (!_bQchanged && DEQUAL(d, _lastTcalculated))
	203	return convert(_lastPtCalculated[i][j]);
	204	// converting Q into doubleRep format
	205	VVdoubleRep QdblRep;
	206	resizeMatrix(QdblRep,_Q.size(),_Q.size());
	207	for (int row=0;row<_Q.size();row++){
	208	for (int col=0;col<_Q[row].size();col++)
	209	QdblRep[row][col]=convert(_Q[row][col]);
	210	}
	211
	212	VVdoubleRep Qt = multiplyMatrixByScalar(QdblRep, d);
	213	VVdoubleRep unit;
	214	unitMatrix(unit,_Q.size());
	215	_lastPtCalculated = add(unit,Qt) ; // I + Qt
	216	VVdoubleRep Qt_power = Qt;
	217	VVdoubleRep prevIter_matrix = _lastPtCalculated;
	218	VVdoubleRep diffM = _lastPtCalculated; //init to whatever
	219	int n=2;
	220	bool bConverged = false;
	221	while (bConverged == false)
	222	{
	223	prevIter_matrix = _lastPtCalculated;
	224	VVdoubleRep tempQ = multiplyMatrixByScalar(Qt,1.0/n);
	225	Qt_power = multiplyMatrixes(Qt_power,tempQ);
	226	_lastPtCalculated = add(_lastPtCalculated,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n!
	227	//check if the difference between the cur and prev iteration is smaller than the allowed error of all matrix entries
	228	bConverged = true;
	229	for (int row = 0; row < _lastPtCalculated.size(); ++row) {
	230	for (int col = 0; col < _lastPtCalculated.size(); ++col)
	231	{
	232	MDOUBLE diff = abs(convert(_lastPtCalculated[row][col] - prevIter_matrix[row][col]));
	233	if ((diff > err_allow_for_pijt_function()) \|\| (!pijt_is_prob_value(convert(_lastPtCalculated[i][j]))))
	234	bConverged = false;
	235	}
	236	}
	237	n++;
	238	if (n>150) {
	239	string err = "Error in threeStateModel::Pij_t, too many iterations for t = " + double2string(d);
	240	//cerr<<diff<<endl;
	241	errorMsg::reportError(err);
	242	}
	243	}
	244	MDOUBLE val = convert(_lastPtCalculated[i][j]);
	245	if (!pijt_is_prob_value(val))
	246	errorMsg::reportError("Error in threeStateModel::Pij_t, pijt <0 or >1");
	247	if (val<0.0)
	248	val = EPSILON; // absolute zero creates a problem later on in computations
	249	if (val>1.0)
	250	val = 1.0;
	251	_bQchanged = false;
	252	return val;
	253	}

+131

-0

libs/phylogeny/threeStateModel.h less more

	0	#ifndef ___3STATE_MODEL
	1	#define ___3STATE_MODEL
	2
	3	#include "definitions.h"
	4	#include "replacementModel.h"
	5	#include "fromQtoPt.h"
	6	#include "errorMsg.h"
	7	#include "matrixUtils.h"
	8
	9	class threeStateModel : public replacementModel {
	10	public:
	11	explicit threeStateModel(const MDOUBLE m1, const MDOUBLE m2,
	12	const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting = true);
	13	threeStateModel(const threeStateModel& other) {*this = other;}
	14	virtual threeStateModel& operator=(const threeStateModel &other);
	15	virtual threeStateModel* clone() const { return new threeStateModel(*this); }
	16	virtual ~threeStateModel() {}
	17	const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
	18	const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
	19	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const ;
	20	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	21	if (d==0.0)
	22	return _Q[i][j];
	23	errorMsg::reportError("Error in threeStateModel, dPij_dt called");
	24	return 0.0; // not supposed to be here
	25	}
	26	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	27	errorMsg::reportError("Error in threeStateModel, d2Pij_dt2 called");
	28	return 0.0; // not supposed to be here
	29	}
	30	const MDOUBLE freq(const int i) const {
	31	if (i >= _freq.size())
	32	errorMsg::reportError("Error in threeStateModel::freq, i > size of frequency vector");
	33	return _freq[i];
	34	}
	35	const Vdouble getFreqs() const {return _freq;}
	36	void setFreq(const Vdouble &freq);
	37	void setMu1(const MDOUBLE val) ;
	38	void setMu2(const MDOUBLE val) ;
	39	void setMu3(const MDOUBLE val) ;
	40	void setMu4(const MDOUBLE val) ;
	41	const MDOUBLE getMu1() const {return _gainState1;}
	42	const MDOUBLE getMu2() const {return _gainState0;}
	43	const MDOUBLE getMu3() const {return _lossState1;}
	44	const MDOUBLE getMu4() const {return _lossState0;}
	45	void computeMarkovLimitingDistribution(); // compute P(infinity), which specifies the stationary distribution
	46
	47	private:
	48	virtual void updateQ();
	49	void setEpsilonForZeroParams();
	50	bool checkIsNullModel();
	51	bool pijt_is_prob_value(MDOUBLE val) const;
	52	bool areFreqsValid(Vdouble freq) const; // tests if frequencies are valid (>0, sum=1)
	53
	54	private:
	55
	56	MDOUBLE _gainState1; // _Q[0][2]
	57	MDOUBLE _gainState0; // _Q[1][2]
	58	MDOUBLE _lossState1; // _Q[2][0]
	59	MDOUBLE _lossState0; // _Q[2][1]
	60	VVdouble _Q;
	61	Vdouble _freq;
	62	bool _useMarkovLimiting; // should the markov limiting distribution be used to estimate the root frequencies
	63	mutable bool _bQchanged; //indicates whether the Q matrix was changed after the last Pij_t call
	64	mutable MDOUBLE _lastTcalculated;
	65	mutable VVdoubleRep _lastPtCalculated;
	66
	67
	68
	69	};
	70
	71	/*class gainLossModel : public replacementModel {
	72	public:
	73	explicit gainLossModel(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq);
	74	virtual replacementModel* clone() const { return new gainLossModel(*this); }
	75	gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;}
	76	virtual gainLossModel& operator=(const gainLossModel &other);
	77
	78	virtual ~gainLossModel() {if (_q2pt) delete _q2pt;}
	79	const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
	80	const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
	81	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	82	return _q2pt->Pij_t(i,j,d);
	83	}
	84	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	85	return _q2pt->dPij_dt(i,j,d);
	86	}
	87	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	88	return _q2pt->d2Pij_dt2(i,j,d);
	89	}
	90	const MDOUBLE freq(const int i) const {
	91	if (i >= _freq.size())
	92	errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector");
	93	return _freq[i];
	94	}
	95	void setMu1(const MDOUBLE val, bool isReversible=true) { _gainState1 = val; updateQ(isReversible);}
	96	void setMu2(const MDOUBLE val,bool isReversible=true) { _gainState0 = val; updateQ(isReversible);}
	97	const MDOUBLE getMu1() const {return _gainState1;}
	98	const MDOUBLE getMu2() const {return _gainState0;}
	99
	100
	101	protected:
	102	virtual void updateQ(bool isReversible=true);
	103	virtual void normalizeQ();
	104
	105
	106	protected:
	107	Vdouble _freq;
	108	MDOUBLE _gainState1;
	109	MDOUBLE _gainState0;
	110	VVdouble _Q;
	111	q2pt *_q2pt;
	112
	113
	114
	115	};
	116	*/
	117	/*
	118	Q is a matrix of the following form:
	119
	120	0 1 01
	121	0 1-m1 0 m1
	122	1 0 1-m2 m2
	123	01 (filled in assuming reversibility)
	124
	125	i.e. no direct change from state 0 to state 1 is allowed
	126	*/
	127
	128	#endif // ___3STATE_MODEL
	129
	130

+1364

-0

libs/phylogeny/tree.cpp less more

	0	// $Id: tree.cpp 10611 2012-05-13 19:56:18Z cohenofi $
	1	#include "definitions.h"
	2	#include "tree.h"
	3	#include "treeUtil.h"
	4	#include "logFile.h"
	5	#include "someUtil.h"
	6	#include <cassert>
	7	#include <algorithm>
	8	#include <iostream>
	9	#include <iomanip>
	10	#include <fstream>
	11	#include <ctime>
	12
	13	using namespace std;
	14
	15	const MDOUBLE tree::FLAT_LENGTH_VALUE = 0.3f;
	16	const int tree::TREE_NULL = -1;
	17	const MDOUBLE tree::SHORT_LENGTH_VALUE = 0.000001f;
	18
	19
	20	//removeSon: remove pSon from sons list.
	21	//does not delete pSon
	22	void tree::TreeNode::removeSon(TreeNode* pSon) {
	23	vector<nodeP>::iterator vec_iter = remove(_sons.begin(), _sons.end(), pSon);
	24	_sons.erase(vec_iter,_sons.end()); // pg 1170, primer.
	25	}
	26
	27	void tree::TreeNode::claimSons(){
	28	for(int i=0;i<getNumberOfSons();i++) {
	29	getSon(i)->setFather(this);
	30	}
	31	}
	32
	33	/********************************************************************************************
	34	getDistance2ROOT()
	35	*********************************************************************************************/
	36	MDOUBLE tree::TreeNode::getDistance2ROOT(){
	37	if(this->isRoot())
	38	return 0.0;
	39	else
	40	return ( this->dis2father() + this->father()->getDistance2ROOT() );
	41	}
	42	/********************************************************************************************
	43	getMinimalDistance2OTU()
	44	*********************************************************************************************/
	45	MDOUBLE tree::TreeNode::getMinimalDistance2OTU(){
	46	if(this->isLeaf())
	47	return 0.0;
	48	else{
	49	int numberOfSons = this->getNumberOfSons();
	50	switch (numberOfSons)
	51	{
	52	case 0:
	53	LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n");
	54	return -1;
	55	break;
	56	case 1:
	57	return ( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU() );
	58	break;
	59	case 2:
	60	return ( min(
	61	this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
	62	this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()
	63	) );
	64	break;
	65	case 3:
	66	return ( min(min(
	67	this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
	68	this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()),
	69	this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU() )
	70	);
	71	break;
	72	case 4:
	73	return ( min(min(min(
	74	this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(),
	75	this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()),
	76	this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU()),
	77	this->getSon(3)->dis2father() + this->getSon(3)->getMinimalDistance2OTU() )
	78	);
	79	break;
	80	default:
	81	LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalDistance2OTU\n");
	82	return -1;
	83	}
	84
	85
	86	}
	87	}
	88	/********************************************************************************************
	89	getMinimalDistance2OTU()
	90	This implementation is only for binary trees and tr-furcating.
	91	Can easily be generalized to arbitrary number of sons.
	92	*********************************************************************************************/
	93	int tree::TreeNode::getMinimalNumOfNodes2OTU(){
	94	//int minimalNumOfNodes2OTU = 1;
	95	if(this->isLeaf())
	96	return 0;
	97	else{
	98	int numberOfSons = this->getNumberOfSons();
	99	switch (numberOfSons)
	100	{
	101	case 0:
	102	LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n");
	103	return -1;
	104	break;
	105	case 1:
	106	return ( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU());
	107	break;
	108	case 2:
	109	return ( min(
	110	1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
	111	1 + this->getSon(1)->getMinimalNumOfNodes2OTU()
	112	) );
	113	break;
	114	case 3:
	115	return ( min(min(
	116	1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
	117	1 + this->getSon(1)->getMinimalNumOfNodes2OTU() ),
	118	1 + this->getSon(2)->getMinimalNumOfNodes2OTU() )
	119	);
	120	break;
	121	case 4:
	122	return ( min(min(min(
	123	1 + this->getSon(0)->getMinimalNumOfNodes2OTU(),
	124	1 + this->getSon(1)->getMinimalNumOfNodes2OTU()),
	125	1 + this->getSon(2)->getMinimalNumOfNodes2OTU()),
	126	1 + this->getSon(3)->getMinimalNumOfNodes2OTU() )
	127	);
	128	break;
	129	default:
	130	LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalNumOfNodes2OTU\n");
	131	return -1;
	132	}
	133	}
	134	}
	135
	136
	137
	138
	139	//*******************************************************************************
	140	// Constructors Destructors
	141	//*******************************************************************************
	142	tree::tree() {
	143	_root=NULL;
	144	}
	145
	146	// this function will accept "-" for cases where the input in from the standard input (cin)
	147	tree::tree(const string& treeFileName, vector<char>& isFixed) {
	148	ifstream in;
	149	istream* inPtr = &cin; // default
	150	if (treeFileName != "-"){
	151	in.open(treeFileName.c_str());
	152	if (! in.is_open())
	153	errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
	154	inPtr = &in;
	155	}
	156	if (readPhylipTreeTopology(*inPtr,isFixed)) {
	157	if (in.is_open())
	158	in.close();
	159	create_names_to_internal_nodes();
	160	makeSureAllBranchesArePositive();
	161	return;
	162	}
	163	if (in.is_open())
	164	in.close();
	165	errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
	166	}
	167
	168	// this function will accept "-" for cases where the input in from the standard input (cin)
	169	tree::tree(const string& treeFileName) {
	170	ifstream in;
	171	istream* inPtr = &cin; // default
	172	if (treeFileName != "-"){
	173	in.open(treeFileName.c_str());
	174	if (! in.is_open())
	175	errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
	176	inPtr = &in;
	177	}
	178	if (readPhylipTreeTopology(*inPtr)) {
	179	if (in.is_open())
	180	in.close();
	181	create_names_to_internal_nodes();
	182	makeSureAllBranchesArePositive();
	183	return;
	184	}
	185	if (in.is_open())
	186	in.close();
	187	errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
	188	}
	189
	190	tree::tree(istream &in) {
	191	if (readPhylipTreeTopology(in)) {
	192	create_names_to_internal_nodes();
	193	makeSureAllBranchesArePositive();
	194	return;
	195	}
	196	errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
	197	}
	198
	199	tree::tree(istream &in,vector<char>& isFixed) {
	200	if (readPhylipTreeTopology(in,isFixed)) {
	201	create_names_to_internal_nodes();
	202	makeSureAllBranchesArePositive();
	203	return;
	204	}
	205	errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
	206	}
	207
	208	tree::tree(const vector<char>& tree_contents) {
	209	readPhylipTreeTopology(tree_contents);
	210	create_names_to_internal_nodes();
	211	makeSureAllBranchesArePositive();
	212	return;
	213	}
	214
	215	tree::tree(const vector<char>& tree_contents, vector<char>& isFixed) {
	216	readPhylipTreeTopology(tree_contents,isFixed);
	217	create_names_to_internal_nodes();
	218	makeSureAllBranchesArePositive();
	219	return;
	220	}
	221
	222	tree::tree(const tree &otherTree) {
	223	_root = NULL;
	224	if (otherTree._root == NULL)
	225	return; // if tree to copy is empty.
	226	createRootNode();
	227	_root->setName(otherTree._root->name());
	228	_root->setID(otherTree._root->id());
	229	_root->setComment(otherTree._root->getComment());
	230	for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
	231	recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
	232	}
	233	}
	234
	235
	236	tree& tree::operator=(const tree &otherTree) {
	237	if (this == &otherTree)
	238	return *this;
	239	if (otherTree._root == NULL) {
	240	clear();
	241	return *this; // if tree to copy is empty.
	242	}
	243	createRootNode();
	244	_root->setName(otherTree._root->name());
	245	_root->setComment(otherTree._root->getComment());
	246	for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
	247	recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
	248	}
	249	return *this;
	250	}
	251
	252	void tree::clear() {
	253	vector<nodeP> vec;
	254	getAllNodes(vec, _root);
	255
	256	for (int k=0; k < vec.size(); k++) {
	257	delete(vec[k]);
	258	}
	259
	260	_nodes = 0;
	261	_leaves =0;
	262	_root = NULL;
	263
	264	}
	265
	266	//*******************************************************************************
	267	// questions on the tree topology
	268	//*******************************************************************************
	269
	270	//stores the father and sons of node inNodeP in vNeighbourVector
	271	void tree::getNeigboursOfNode(vector<nodeP> &vNeighbourVector, const nodeP inNodeP) const {
	272	vNeighbourVector.clear();
	273	for (int i=0; i < inNodeP->getNumberOfSons();++i) {
	274	vNeighbourVector.push_back(inNodeP->getSon(i));
	275	}
	276	if (getRoot() != inNodeP)
	277	vNeighbourVector.push_back(inNodeP->father());
	278	}
	279
	280
	281	// get nodePTR from name
	282	// "myNode" is a pointer to the root of the subtree in which we want to find the node "inName"
	283	tree::nodeP tree::findNodeByName(const string inName, nodeP myNode) const{
	284	if (myNode==NULL) myNode=_root;
	285	if (myNode->name() == inName) return myNode;
	286	for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
	287	nodeP answer = findNodeByName(inName, myNode->getSon(i));
	288	if (answer!=NULL) return answer;
	289	}
	290	return NULL;
	291	}
	292
	293
	294	// get nodePTR from id
	295	// similar to tree::findNodeByName
	296	// "myNode" is a pointer to the root of the subtree in which we want to find the node "inId"
	297	tree::nodeP tree::findNodeById(const int inId, nodeP myNode) const{
	298	if (myNode==NULL) myNode=_root;
	299	if (myNode->id() == inId) return myNode;
	300	for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
	301	nodeP answer = findNodeById(inId, myNode->getSon(i));
	302	if (answer!=NULL) return answer;
	303	}
	304	return NULL;
	305	}
	306
	307	//getPathBetweenAnyTwoNodes: store all nodes on the path from node1 to node2 in path
	308	//the first node in path is node1. the last node is node2
	309	//1. store all nodes from node1 to the root and node2 to the root
	310	//2. starting from the root - finds the first node (common_father) which is father to both node1 and node2
	311	//3. store in <path> all nodes in the path from node1 to common_father, from node2 to common_father and common_father itself
	312	void tree::getPathBetweenAnyTwoNodes(vector<nodeP> &path, const nodeP node1, const nodeP node2) const {
	313
	314	path.clear();
	315	vector<nodeP> pathMatrix1;
	316	vector<nodeP> pathMatrix2;
	317
	318	nodeP nodeup = node1;
	319	while (nodeup != _root) {
	320	pathMatrix1.push_back(nodeup);
	321	nodeup = nodeup->father();
	322	}
	323	pathMatrix1.push_back(_root);
	324
	325	nodeup = node2;
	326	while (nodeup != _root) {
	327	pathMatrix2.push_back(nodeup);
	328	nodeup = nodeup->father();
	329	}
	330	pathMatrix2.push_back(_root);
	331
	332	int tmp1 = pathMatrix1.size()-1;
	333	int tmp2 = pathMatrix2.size()-1;
	334
	335	while ((tmp1 >= 0) && (tmp2 >= 0)) {
	336	if (pathMatrix1[tmp1] != pathMatrix2[tmp2])
	337	break;
	338	tmp1--;
	339	tmp2--;
	340	}
	341
	342	for (int y=0; y <= tmp1; ++y)
	343	path.push_back(pathMatrix1[y]);
	344	path.push_back(pathMatrix1[tmp1+1]); // pushing once, the TreeNode that was common father to both.
	345	for (int j=tmp2; j >= 0; --j) {
	346	path.push_back(pathMatrix2[j]);
	347	}
	348	return;
	349	}
	350
	351
	352	void tree::getFromLeavesToRoot(vector<nodeP> &vNeighbourVector) const {
	353	getFromRootToLeaves(vNeighbourVector);
	354	reverse(vNeighbourVector.begin(),vNeighbourVector.end());
	355	}
	356
	357
	358	void tree::getFromRootToLeaves(vector<nodeP> &vec) const {
	359	getFromNodeToLeaves(vec,_root);
	360	}
	361
	362
	363	void tree::getFromNodeToLeaves(vector<nodeP> &vec, const nodeP fromHereDown) const {
	364	vec.push_back(fromHereDown);
	365	for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
	366	getFromNodeToLeaves(vec, fromHereDown->getSon(k));
	367	}
	368	return;
	369	}
	370
	371
	372	void tree::getAllHTUs(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	373	vec.clear();
	374	getAllHTUsPrivate(vec,fromHereDown);
	375	}
	376
	377
	378	void tree::getAllHTUsPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	379	if (fromHereDown == NULL) return;
	380	if (fromHereDown->isInternal()) vec.push_back(fromHereDown);
	381	for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
	382	getAllHTUsPrivate(vec,fromHereDown->getSon(k));
	383	}
	384	return;
	385	}
	386
	387
	388	void tree::getAllNodes(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	389	vec.clear();
	390	getAllNodesPrivate(vec,fromHereDown);
	391	}
	392
	393
	394	void tree::getAllNodesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	395	//DFS: depth first search
	396	if (fromHereDown == NULL)
	397	return;
	398	vec.push_back(fromHereDown);
	399	for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
	400	getAllNodesPrivate(vec,fromHereDown->getSon(k));
	401	}
	402	return;
	403	}
	404
	405
	406	void tree::getAllLeaves(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	407	vec.clear();
	408	getAllLeavesPrivate(vec,fromHereDown);
	409	}
	410
	411
	412	void tree::getAllLeavesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
	413	if (fromHereDown == NULL) return;
	414	if (fromHereDown->isLeaf()) vec.push_back(fromHereDown);
	415	for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
	416	getAllLeavesPrivate(vec,fromHereDown->getSon(k));
	417	}
	418	return;
	419	}
	420
	421	MDOUBLE tree::findLengthBetweenAnyTwoNodes(const nodeP node1, const nodeP node2) const {
	422	vector<nodeP> pathMatrix;
	423	MDOUBLE sumOfDistances =0;
	424	getPathBetweenAnyTwoNodes(pathMatrix, node1, node2);
	425	for (int i=0; i < pathMatrix.size() ; i++) {
	426	// two cases: first, the previous node is closer to the root
	427	// than the current one. NOTE: this can not be the case for the
	428	// first node in the path
	429	if (i>0 && pathMatrix[i]->father() == pathMatrix[i-1])
	430	sumOfDistances += pathMatrix[i]->dis2father();
	431	else
	432	// else: the next node is closer to the root than this node
	433	// again, it can not be the last node in the path
	434	if (i<pathMatrix.size()-1 && pathMatrix[i]->father() == pathMatrix[i+1])
	435	sumOfDistances += pathMatrix[i]->dis2father();
	436	// if both cases are false, then the current node is the
	437	// closest to the root over the path, and therefor the
	438	// distance to its father is not in the path at all.
	439	}
	440	return sumOfDistances;
	441	}
	442
	443	// simular to above, but for all nodes at once. O(n^3) or so, but this should not be an issue
	444	// in any reasonable scenario
	445	// only disTab[i][j] is filled. disTab[j][i] remains zero.
	446	void tree::getTreeDistanceTableAndNames(VVdouble& disTab, vector <string>& vNames) const {
	447	vector<nodeP> nodepV;
	448	getAllLeaves(nodepV, _root);
	449	disTab.resize(nodepV.size());
	450	vNames.resize(nodepV.size());
	451	for (int i=0;i<nodepV.size();++i) {
	452	disTab[i].resize(nodepV.size());
	453	vNames[i]=nodepV[i]->name();
	454	for(int j=i+1;j<nodepV.size();++j){
	455	disTab[i][j]=findLengthBetweenAnyTwoNodes(nodepV[i],nodepV[j]);
	456	}
	457	}
	458	}
	459
	460
	461	// find length between two neighbouring nodes only
	462	MDOUBLE tree::lengthBetweenNodes(const nodeP i, const nodeP j) const {
	463	if (i->father() == j)
	464	return i->dis2father();
	465	assert (j->father() == i);
	466	return j->dis2father();
	467	}
	468
	469	//*******************************************************************************
	470	// change tree topoplogy parameters - should be applied carefully
	471	//*******************************************************************************
	472
	473	//set the new root at p_iNewRoot
	474	// The method doesn't convert an "unrooted tree" = "a tree in which the root has 3 sons"
	475	// to a rooted one = "a tree in which the root has <= 2 sons".
	476	// The new root will still have 3 sons.
	477	void tree::rootAt(const nodeP p_iNewRoot) {
	478	if (_root == p_iNewRoot)
	479	return;
	480	vector<nodeP> pathMatrix;
	481	getPathBetweenAnyTwoNodes(pathMatrix, _root, p_iNewRoot);
	482	//pathMatrix size is always bigger than 2.
	483
	484	for (int i = 0; i < pathMatrix.size() - 1 ; i++) {
	485	pathMatrix[i]->_father = pathMatrix[i+1];
	486	pathMatrix[i]->setDisToFather( pathMatrix[i+1]->dis2father() );
	487	pathMatrix[i]->removeSon(pathMatrix[i+1]);
	488	pathMatrix[i+1]->_sons.push_back(pathMatrix[i+1]->father());
	489	pathMatrix[i+1]->_father = NULL;
	490	}
	491	_root = p_iNewRoot;
	492	}
	493
	494
	495	void tree::makeSureAllBranchesArePositive() {
	496	if (!withBranchLength()) {
	497	LOGnOUT(3,<<"\n WARN: Tree with no branch length! Create Flat tree with all branches= "<<tree::FLAT_LENGTH_VALUE<<endl);
	498	createFlatLengthMatrix(tree::FLAT_LENGTH_VALUE);
	499	return;
	500	}
	501	vector<nodeP> _nodevec;
	502	getAllNodes(_nodevec,_root);
	503	for (int i=0; i < _nodevec.size(); ++i) {
	504	if (_nodevec[i]!=_root) {
	505	if (_nodevec[i]->dis2father()<=0) {
	506	_nodevec[i]->setDisToFather(tree::SHORT_LENGTH_VALUE);
	507	}
	508	}
	509	}
	510	}
	511
	512
	513
	514	void tree::makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon) {
	515	vector<nodeP> _nodevec;
	516	getAllNodes(_nodevec,_root);
	517	for (int i=0; i < _nodevec.size(); ++i) {
	518	if (_nodevec[i]!=_root) {
	519	if (_nodevec[i]->dis2father()<epsilon) {
	520	LOGnOUT(4,<<" @@@ Warning: brachLength too short:"<<endl
	521	<<" - the node: "<<_nodevec[i]->name()<<", length: "<<_nodevec[i]->dis2father()<<" is changed to: "<<epsilon<<endl);
	522	_nodevec[i]->setDisToFather(epsilon);
	523	}
	524	}
	525	}
	526	}
	527
	528	MDOUBLE tree::getAllBranchesLengthSum() {
	529	MDOUBLE totalBranchLength = 0.0;
	530	vector<nodeP> _nodevec;
	531	getAllNodes(_nodevec,_root);
	532	for (int i=0; i < _nodevec.size(); ++i) {
	533	if (_nodevec[i]!=_root) {
	534	totalBranchLength += _nodevec[i]->dis2father();
	535	}
	536	}
	537	return totalBranchLength;
	538	}
	539
	540	//create new names to all internal nodes.
	541	//the new name will be NXX, where XX is htu number
	542	void tree::create_names_to_internal_nodes() {
	543	vector<nodeP> htuVec;
	544	getAllHTUs(htuVec,_root);
	545
	546	for (int i=0; i<htuVec.size(); ++i) {
	547	string name = int2string(i+1);
	548	htuVec[i]->setName((string)"N" + name);
	549	}
	550	}
	551
	552
	553	void tree::multipleAllBranchesByFactor(MDOUBLE InFactor) {
	554	vector<nodeP> vec;
	555	getAllNodes(vec,_root );
	556	for (int i = 0; i < vec.size(); ++i) {
	557	if (vec[i]->father() != NULL)
	558	vec[i]->setDisToFather(vec[i]->dis2father() * InFactor);
	559	}
	560	_root->setDisToFather(TREE_NULL);
	561	}
	562
	563
	564	void tree::createFlatLengthMatrix(const MDOUBLE newFlatDistance) {
	565	vector<nodeP> vec;
	566	getAllNodes(vec,_root );
	567	for (int i=0; i< vec.size(); ++i) {
	568	if (vec[i]->father() != NULL) vec[i]->setDisToFather(newFlatDistance);
	569	}
	570	}
	571
	572	/*
	573	void tree::set_length_to_father(nodeP iSon, MDOUBLE dLength) {
	574	iSon->setDisToFather(dLength);
	575	}
	576	*/
	577
	578	// helper function
	579	class eqNameVLOCAL {
	580	public:
	581	explicit eqNameVLOCAL(const string& x) : _x(x) {}
	582	const string& _x;
	583	bool operator() (const tree::nodeP y){
	584	return _x == y->name();
	585	}
	586	};
	587
	588	// removes sonNode from its father according to the name of sonNode
	589	// this function should ONLY be used when the node, sonNode, is to be recycled soon!
	590	// because this function does not change the number of leaves nor the number of nodes!
	591	// nor does it change the father of sonNode.
	592	void tree::removeNodeFromSonListOfItsFather(nodeP sonNode) {
	593	vector<tree::nodeP>::iterator vec_iter;
	594	vec_iter = remove_if(sonNode->_father->_sons.begin(), sonNode->_father->_sons.end(), eqNameVLOCAL(sonNode->name()));
	595	sonNode->father()->_sons.erase(vec_iter,sonNode->father()->_sons.end()); // pg 1170, primer.
	596	}
	597
	598
	599	//*******************************************************************************
	600	// Input-Output
	601	//*******************************************************************************
	602
	603
	604	void tree::output(string treeOutFile, TREEformats fmt, bool withHTU ) const {
	605	ofstream os(treeOutFile.c_str());
	606	output(os, fmt, withHTU);
	607	os.close();
	608	}
	609
	610	void tree::output(ostream& os, TREEformats fmt, bool withHTU) const {
	611	if (_root == NULL) {
	612	LOG(1,<<" empty tree ");
	613	return;
	614	}
	615	if (fmt == PHYLIP)
	616	outputInPhylipTreeFormat(os, withHTU);
	617	else if (fmt == PAML)
	618	outputInPamlTreeFormat(os, withHTU);
	619	else if (fmt == ANCESTOR)
	620	outputInAncestorTreeFormat(os,withHTU);
	621	else if (fmt == ANCESTORID)
	622	outputInAncestorIdTreeFormat(os,withHTU);
	623	os<<endl;
	624	//this returns the ostream properies to its previos ones (it was changed to ios::fixed in function outputInPhylipTreeFormat())
	625	os<<setiosflags(ios::scientific);
	626	}
	627
	628	void tree::outputInAncestorTreeFormat(ostream& treeOutStream, bool distances) const{
	629	time_t ltime;
	630	int i,k,spaces;
	631	vector<nodeP> vec;
	632	int maxNameLen = 0;
	633
	634	getAllLeaves(vec,_root);
	635	for (int w=0; w<vec.size();++w) {
	636	if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
	637	}
	638	maxNameLen++; // this is just the longest name of taxa plus one
	639
	640
	641
	642	time( &ltime );
	643	treeOutStream<<"# created on "<< ctime( &ltime ) ;
	644
	645	treeOutStream<<"name";
	646	spaces = maxNameLen-4;
	647	for (k=0;k<spaces;++k) treeOutStream<<" ";
	648
	649	treeOutStream<<" parent";
	650	spaces = 7-6;
	651	for (k=0;k<spaces;++k) treeOutStream<<" ";
	652
	653	if (distances) {
	654	treeOutStream<<"disance to father";
	655	treeOutStream<<" ";
	656	}
	657
	658	treeOutStream<<" child";
	659	spaces = maxNameLen-4;
	660	for (k=0;k<spaces;++k) treeOutStream<<" ";
	661
	662	treeOutStream<<endl;
	663
	664
	665	for (i=0; i<vec.size();++i) {
	666	treeOutStream<<vec[i]->name();
	667	spaces = maxNameLen-vec[i]->name().size();
	668	for (k=0;k<spaces;++k) treeOutStream<<" ";
	669
	670	if (vec[i] != _root) {
	671	treeOutStream<<vec[i]->father()->name();
	672	spaces = 7-vec[i]->father()->name().size();
	673	for (k=0;k<spaces;++k) treeOutStream<<" ";
	674	}
	675	else {
	676	treeOutStream<<"root!";
	677	spaces = 7-5;
	678	for (k=0;k<spaces;++k) treeOutStream<<" ";
	679	}
	680
	681	if ((vec[i] != _root) && distances) {
	682	treeOutStream<<vec[i]->dis2father();
	683	}
	684
	685	for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
	686	treeOutStream<<" "<<vec[i]->_sons[j]->name();
	687	}
	688	treeOutStream<<endl;
	689	}
	690
	691	vec.clear();
	692	getAllHTUs(vec,_root );
	693
	694	for (i=0; i<vec.size();++i) {
	695	treeOutStream<<vec[i]->name();
	696	spaces = maxNameLen-vec[i]->name().size();
	697	for (k=0;k<spaces;++k) treeOutStream<<" ";
	698
	699	if (vec[i] != _root) {
	700	treeOutStream<<vec[i]->father()->name();
	701	spaces = 7-vec[i]->father()->name().size();
	702	for (k=0;k<spaces;++k) treeOutStream<<" ";
	703	}
	704	else {
	705	treeOutStream<<"root!";
	706	spaces = maxNameLen-5;
	707	for (k=0;k<spaces;++k) treeOutStream<<" ";
	708	}
	709
	710	if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
	711
	712	for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
	713	treeOutStream<<" "<<vec[i]->_sons[j]->name();
	714	}
	715	treeOutStream<<endl;
	716	}
	717	}
	718
	719	void tree::outputInPhylipTreeFormat(ostream& os, bool withHTU ) const {
	720	// special case of a tree with 1 or 2 taxa.
	721	if (getLeavesNum() == 1) {
	722	os<<"("<<_root->name()<<")"<<endl;
	723	return;
	724	}
	725	else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
	726	os<<"("<<_root->name()<<":0.0";
	727	if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
	728	os<<",";
	729	os<<_root->getSon(0)->name()<<":" <<setiosflags(ios::fixed) <<_root->getSon(0)->dis2father();
	730	if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
	731	os <<")"<<endl;
	732	return;
	733	}
	734	// ========================================
	735	os<<"(";
	736	// going over all the son
	737	int i;
	738	for (i=0; i<_root->getNumberOfSons()-1; ++i)
	739	{
	740	print_from(_root->getSon(i),os, withHTU);
	741	os<<",";
	742	}
	743
	744	print_from(_root->getSon(i),os, withHTU);
	745	os<<")";
	746	if (withHTU==true) os<<_root->name();
	747	if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
	748	char c=';';// 59 is dot-line
	749	os<<c;
	750	}
	751
	752	string tree::stringTreeInPhylipTreeFormat(bool withHTU ) const {
	753	string treeString = "";
	754	// special case of a tree with 1 or 2 taxa.
	755	if (getLeavesNum() == 1) {
	756	treeString += "(" + _root->name() + ")" + "\n";
	757	return (treeString);
	758	}
	759	else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
	760	treeString += "(" + _root->name() + ":0.0";
	761	if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]";
	762	treeString += ",";
	763	treeString +=_root->getSon(0)->name() + ":" + double2string(_root->getSon(0)->dis2father());
	764	if (_root->getSon(0)->getComment().length()) treeString += "[&&NHX" + _root->getSon(0)->getComment() + "]";
	765	treeString += ")\n";
	766	return (treeString);
	767	}
	768	// ========================================
	769	treeString += "(";
	770	// going over all the son
	771	int i;
	772	for (i=0; i<_root->getNumberOfSons()-1; ++i)
	773	{
	774	string_print_from(_root->getSon(i),treeString, withHTU);
	775	treeString += ",";
	776	}
	777
	778	string_print_from(_root->getSon(i),treeString, withHTU);
	779	treeString += ")";
	780	if (withHTU==true) treeString += _root->name();
	781	if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]";
	782	treeString += ";";
	783	return (treeString);
	784	}
	785
	786	//this format is like phylip format except first line is the number of leaves in the tree and the number of trees (1)
	787	void tree::outputInPamlTreeFormat(ostream& os, bool withHTU ) const {
	788	// special case of a tree with 1 or 2 taxa.
	789	if (getLeavesNum() == 1) {
	790	os<<"("<<_root->name()<<")"<<endl;
	791	return;
	792	}
	793	else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
	794	os<<"("<<_root->name()<<":0.0";
	795	if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
	796	os<<",";
	797	os<<_root->getSon(0)->name()<<":" <<setiosflags(ios::fixed) <<_root->getSon(0)->dis2father();
	798	if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
	799	os <<")"<<endl;
	800	return;
	801	}
	802	// ========================================
	803	vector<nodeP> vec;
	804	getAllLeaves(vec, _root);
	805	int num = vec.size();
	806	os<<num<<" 1"<<endl;
	807	os<<"(";
	808	// going over all the son
	809	int i;
	810	for (i=0; i<_root->getNumberOfSons()-1; ++i)
	811	{
	812	print_from(_root->getSon(i),os, withHTU);
	813	os<<",";
	814	}
	815
	816	print_from(_root->getSon(i),os, withHTU);
	817	os<<")";
	818	if (withHTU==true) os<<_root->name();
	819	if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
	820	char c=';';// 59 is dot-line
	821	os<<c;
	822	}
	823
	824
	825	int tree::print_from(nodeP from_node, ostream& os, bool withHTU ) const {
	826	int i;
	827	if (from_node->isLeaf())
	828	os<<from_node->name();
	829	else {
	830	os<<"(";
	831	for (i=0; i<from_node->getNumberOfSons()-1; ++i) {
	832	print_from(from_node->getSon(i),os,withHTU);
	833	os<<",";
	834	}
	835	print_from(from_node->getSon(i),os,withHTU);
	836	os<<")";
	837	if (withHTU==true)
	838	os<<from_node->name();
	839	}
	840	os<<":"<<setiosflags(ios::fixed) <<from_node->dis2father();
	841	if (from_node->getComment().length()) os << "[&&NHX" << from_node->getComment() <<"]";
	842
	843	return 0;
	844	}
	845
	846	int tree::string_print_from(nodeP from_node, string& s, bool withHTU ) const {
	847	int i;
	848	if (from_node->isLeaf())
	849	s += from_node->name();
	850	else {
	851	s += "(";
	852	for (i=0; i<from_node->getNumberOfSons()-1; ++i) {
	853	string_print_from(from_node->getSon(i),s,withHTU);
	854	s += ",";
	855	}
	856	string_print_from(from_node->getSon(i),s,withHTU);
	857	s += ")";
	858	if (withHTU==true)
	859	s += from_node->name();
	860	}
	861	s += ":" + double2string(from_node->dis2father());
	862	if (from_node->getComment().length()) s += "[&&NHX" + from_node->getComment() + "]";
	863
	864	return 0;
	865	}
	866
	867	bool tree::readPhylipTreeTopology(istream &in) {
	868	const vector<char> tree_contents = PutTreeFileIntoVector(in);
	869	return readPhylipTreeTopology(tree_contents);
	870	}
	871
	872	bool tree::readPhylipTreeTopology(istream &in,vector<char>& isFixed) {
	873	const vector<char> tree_contents = PutTreeFileIntoVector(in);
	874	return readPhylipTreeTopology(tree_contents,isFixed);
	875	}
	876
	877
	878
	879	bool tree::readPhylipTreeTopology(const vector<char>& tree_contents) {
	880	vector<char> isFixed;
	881	return readPhylipTreeTopology(tree_contents,isFixed);
	882	}
	883
	884	string getName(vector<char>::const_iterator& p_itCurrent) {
	885	string tmpname;
	886	tmpname.erase();
	887	while (((*p_itCurrent)!=')') &&
	888	((*p_itCurrent)!='(') &&
	889	((*p_itCurrent)!=':') &&
	890	((*p_itCurrent)!=',') &&
	891	((*p_itCurrent)!='}') &&
	892	((*p_itCurrent)!='{')) {
	893	tmpname +=(*p_itCurrent);
	894	++p_itCurrent;
	895	}
	896	return tmpname;
	897	}
	898
	899	bool tree::readPhylipTreeTopology(const vector<char>& tree_contents,vector<char>& isFixed) {
	900
	901
	902	int nextFreeID =0; // to give id's for nodes.
	903	_leaves = GetNumberOfLeaves(tree_contents);
	904	_root = new TreeNode(nextFreeID);
	905	if (_leaves == 1) {// very special case of a tree that is only 1 leaf...
	906	vector<char>::const_iterator itCurrent = tree_contents.begin();
	907	itCurrent++;
	908	_root->setName(getName(itCurrent));
	909	return true;
	910	}
	911
	912	++nextFreeID;
	913	_nodes = GetNumberOfInternalNodes(tree_contents) + _leaves;
	914
	915	isFixed.resize(_nodes,0); // 0 = not fixed, 1 = fixed.
	916	nodeP conection2part=NULL;
	917	vector<char>::const_iterator itCurrent = tree_contents.begin();
	918
	919	if (verifyChar(itCurrent,OPENING_BRACE)\|\|verifyChar(itCurrent,OPENING_BRACE2)){
	920	do {
	921	itCurrent++;
	922	conection2part = readPart(itCurrent,nextFreeID,isFixed);
	923	// readPart returns a pointer to himself
	924	_root->_sons.push_back(conection2part);
	925	conection2part->_father = _root;
	926
	927	} while (verifyChar(itCurrent, COMMA));
	928	}
	929	if (!(verifyChar(itCurrent, CLOSING_BRACE)\|\|verifyChar(itCurrent, CLOSING_BRACE2))) {
	930	errorMsg::reportError("Bad format in tree file.",1); // also quit
	931	} else itCurrent++; // skip closing brace
	932	_root->setComment(readPosibleComment(itCurrent));
	933	if (verifyChar(itCurrent, SEMI_COLLON)) itCurrent++;
	934	// this part is for the cases where all the edges are fixed. In such case - this part changes
	935	// all the branches to not fixed.
	936	int z=0;
	937	bool allFixed = true;
	938	for (z=1; z< isFixed.size(); ++z) {
	939	if (isFixed[z] == 0) {
	940	allFixed = false;
	941	break;
	942	}
	943	}
	944	if (allFixed) {
	945	for (z=1; z< isFixed.size(); ++z) {
	946	isFixed[z] = 0;
	947	}
	948	}
	949
	950
	951	return true;
	952	}
	953
	954
	955
	956	// isFixed is actually a bool vector. Sometimes we want to fix a subtree of the tree, for example
	957	// "human and chimp" so we won't try any topologies that interrupt with this constraint.
	958	// When isFixed[i] == 1, it means that the branch above node i is fixed. This happens for every leaf,
	959	// and for nodes indicated by CLOSING_BRACE2 which is '}'.
	960	tree::nodeP tree::readPart( vector<char>::const_iterator& p_itCurrent,
	961	int& nextFreeID,
	962	vector<char> & isFixed) {
	963	if ( IsAtomicPart(p_itCurrent) ) {
	964	// read the name, i.e. - the content from the file
	965	nodeP newLeaf = new TreeNode(nextFreeID);
	966	isFixed[nextFreeID] = 1; // all edges to the leaves are fixed...
	967	++nextFreeID;
	968
	969	string tmpname = getName(p_itCurrent);
	970	newLeaf->setName(tmpname);
	971
	972	// if a number(==distance) exists on the right-hand, update the distance table
	973	if ( DistanceExists(p_itCurrent) )
	974	newLeaf->setDisToFather(getDistance(p_itCurrent));
	975	// clearPosibleComment(p_itCurrent);
	976	newLeaf->setComment(readPosibleComment(p_itCurrent));
	977	return newLeaf;
	978
	979	}
	980	else // this is a complex part
	981	{
	982	nodeP newHTU = new TreeNode(nextFreeID);
	983	++nextFreeID;
	984	nodeP conection2part=NULL;
	985
	986	do {
	987	++p_itCurrent;
	988	conection2part = readPart(p_itCurrent,nextFreeID,isFixed);
	989	conection2part->_father = newHTU;
	990	newHTU->_sons.push_back(conection2part);
	991	} while (verifyChar(p_itCurrent, COMMA));
	992	if (verifyChar(p_itCurrent, CLOSING_BRACE)) {
	993	isFixed[newHTU->id()] = 1;
	994	} else if (verifyChar(p_itCurrent, CLOSING_BRACE2)) {
	995	isFixed[newHTU->id()] = 0;
	996	} else {
	997	errorMsg::reportError("Bad format in tree file (2)");
	998	}
	999	++p_itCurrent;
	1000
	1001	// if a number(==distance) exists on the right-hand, update the distance table
	1002	if ( DistanceExists(p_itCurrent) )
	1003	newHTU->setDisToFather(getDistance(p_itCurrent));
	1004	// clearPosibleComment(p_itCurrent);
	1005	newHTU->setComment(readPosibleComment(p_itCurrent));
	1006	return newHTU;
	1007
	1008	}
	1009	}
	1010
	1011	//copy the information from other_nodePTR to a new node, and set the father to father_nodePTR
	1012	//does not update the number of nodes and leaves
	1013	tree::nodeP tree::recursiveBuildTree(tree::nodeP father_nodePTR, const tree::nodeP other_nodePTR) {
	1014
	1015	tree::nodeP childPTR = createNode(father_nodePTR, other_nodePTR->id());
	1016	childPTR->setName(other_nodePTR->name());
	1017	childPTR->setComment(other_nodePTR->getComment());
	1018	childPTR->setDisToFather(other_nodePTR->dis2father());
	1019	for (int k = 0 ; k < other_nodePTR->getNumberOfSons() ; ++k) {
	1020	recursiveBuildTree(childPTR, other_nodePTR->getSon(k));
	1021	}
	1022	return childPTR;
	1023	}
	1024
	1025
	1026
	1027	void tree::updateNumberofNodesANDleaves() {
	1028	vector<nodeP> vec;
	1029	getAllLeaves(vec,getRoot());
	1030	_leaves = vec.size();
	1031	vec.clear();
	1032	getAllNodes(vec,getRoot());
	1033	_nodes = vec.size();
	1034	}
	1035
	1036	//removeLeaf: removes nodePTR from tree. also deletes nodePTR
	1037	void tree::removeLeaf(nodeP nodePTR) {
	1038	if (!(nodePTR->isLeaf())) {
	1039	errorMsg::reportError("Error in function deleteLeaf - Unable to remove a node, which is not a leaf ");
	1040	}
	1041
	1042	if (getNodesNum() == 1) {
	1043	delete getRoot();
	1044	_root = NULL;
	1045	}
	1046
	1047	if (nodePTR->isRoot()) {
	1048	assert (nodePTR->getNumberOfSons() == 1);
	1049	nodeP sonOfRoot = nodePTR->getSon(0);
	1050	rootAt(sonOfRoot);
	1051	}
	1052
	1053	// leaf is not the root:
	1054	nodeP fatheOfLeafToRemove = nodePTR->father();
	1055	fatheOfLeafToRemove->removeSon(nodePTR);
	1056	delete nodePTR;
	1057
	1058	int tmpSons = fatheOfLeafToRemove->getNumberOfSons();
	1059
	1060	if ((_root == fatheOfLeafToRemove) && (tmpSons == 1)) {
	1061	//in case the tree was rooted and the removed leaf was one of the root' sons:
	1062	//we have to remove the root and reroot the tree at the second root son
	1063	nodeP newRoot = _root->getSon(0);
	1064	delete fatheOfLeafToRemove;
	1065	_root = NULL;
	1066	rootAt(newRoot);
	1067	}
	1068	else if (tmpSons == 1)
	1069	shrinkNode(fatheOfLeafToRemove);
	1070	else if ((_root == fatheOfLeafToRemove) && (tmpSons == 2)) {
	1071	nodeP tmp = _root;
	1072	rootAt(_root->getSon(0));
	1073	shrinkNode(tmp);
	1074	}
	1075	if (_root->isLeaf() && _root->getNumberOfSons() >0 )
	1076	rootAt(_root->getSon(0));
	1077	updateNumberofNodesANDleaves();
	1078	return;
	1079	}
	1080
	1081
	1082	//getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i]
	1083	void tree::getAllBranches(vector<nodeP> &nodesUp, vector<nodeP> & nodesDown){
	1084	vector<nodeP> localVec;
	1085	getAllNodes(localVec, _root);
	1086	for (int i=0 ; i < localVec.size() ; i++) {
	1087	if (localVec[i]->father() != NULL) {
	1088	nodesUp.push_back(localVec[i]->father());
	1089	nodesDown.push_back(localVec[i]);
	1090	}
	1091	}
	1092	return;
	1093	}
	1094
	1095
	1096
	1097
	1098
	1099	// the idea is that if we have a node with only one son (a tree like: node1---node2---node3)
	1100	// we can eliminate node2 (which is nodePTR)
	1101	void tree::shrinkNode(nodeP nodePTR) {
	1102
	1103	if (nodePTR->getNumberOfSons() != 1) {
	1104	vector<string> err;
	1105	err.push_back("you requested to eliminate a node with more than 1 sons.");
	1106	err.push_back(" error in function shrink node");
	1107	errorMsg::reportError(err); // also quit the program.
	1108	}
	1109
	1110
	1111	nodeP fatherNode = nodePTR->father();
	1112	nodeP sonNode = nodePTR->getSon(0);
	1113
	1114	if( (nodePTR->isRoot())&&(nodePTR->getNumberOfSons() == 1) ) // refering the root to be sonNode.
	1115	{
	1116	MDOUBLE dis2root = sonNode->dis2father();
	1117	sonNode->setFather(NULL);
	1118	delete(_root);
	1119	_root = sonNode;
	1120
	1121	for (int i=0; i < sonNode->getNumberOfSons(); ++i)
	1122	{
	1123	MDOUBLE oldDis2Father = sonNode->getSon(i)->dis2father();
	1124	sonNode->getSon(i)->setDisToFather(oldDis2Father + dis2root);
	1125	}
	1126
	1127	_root->setDisToFather(TREE_NULL);
	1128
	1129	updateNumberofNodesANDleaves();
	1130	return;
	1131	}
	1132
	1133	// taking care of the son node:
	1134	sonNode->_father = fatherNode;
	1135	sonNode->setDisToFather(sonNode->dis2father() + nodePTR->dis2father());//if it is the root dont add the distance
	1136
	1137	// takind car of father node
	1138	fatherNode->removeSon(nodePTR);
	1139	fatherNode->_sons.push_back(sonNode);
	1140
	1141	// delete the nodePTR
	1142	delete nodePTR;
	1143	updateNumberofNodesANDleaves();
	1144	}
	1145
	1146
	1147	//createRootNode: erase the current tree and create a tree with one node.
	1148	void tree::createRootNode() {
	1149	clear();
	1150	_root = new TreeNode(0);
	1151	_leaves=1;
	1152	_nodes=1;
	1153	}
	1154
	1155
	1156	tree::nodeP tree::createNode(nodeP fatherNode, const int id) {
	1157	nodeP tmp = new TreeNode(id);
	1158	_nodes++;
	1159	if (!fatherNode->isLeaf()) {
	1160	// if fatherNode is a leaf then we remove one leaf and add one leaf, so no change.
	1161	++_leaves;
	1162	}
	1163	// there is one case when your father IS a leaf and yet you have to increase the number of leaves
	1164	// this is when you father is the root, and you add the first child
	1165	if (fatherNode->isRoot() && fatherNode->getNumberOfSons()==0) {
	1166	++_leaves;
	1167	}
	1168	tmp->_father = fatherNode;
	1169	fatherNode->setSon(tmp);
	1170	return tmp;
	1171	}
	1172
	1173	// check whether the tree contains information about branch length
	1174	bool tree::withBranchLength() const{
	1175	if (_root->_sons.empty()) return false;
	1176	else if (_root->getSon(0)->dis2father() != TREE_NULL) return true;
	1177	return false;
	1178	}
	1179
	1180	ostream &operator<<(ostream &out, const tree &tr){
	1181	tr.output(out,tree::ANCESTOR);
	1182	return out;
	1183	}
	1184
	1185	/*
	1186	void tree::fillNodesID() {
	1187	vector<nodeP> vec;
	1188	getAllNodes(vec,_root );
	1189	for (int i=0; i< vec.size(); ++i) {
	1190	vec[i]->setID( i);
	1191	}
	1192	}
	1193	*/
	1194
	1195
	1196
	1197	/*
	1198	void tree::cut_tree_in_two_leaving_interMediate_node(nodeP node2split,tree &small1,tree &small2) const {
	1199	tree tmpCopyOfThisTree = (*this);
	1200	nodeP node2splitOnNewTree = tmpCopyOfThisTree.getNodeByName(node2split->name());
	1201	string interNode = "interNode";
	1202	assert(node2split->father() != NULL);
	1203	nodeP tmp = tmpCopyOfThisTree.makeNodeBetweenTwoNodes(node2splitOnNewTree->father(),node2splitOnNewTree, interNode);
	1204	tmpCopyOfThisTree.rootAt(tmp);
	1205	tmpCopyOfThisTree.cut_tree_in_two_special(tmp, small1,small2);
	1206	nodeP toDel1 = small1.getNodeByName(interNode);
	1207	};
	1208	*/
	1209
	1210
	1211	void tree::outputInAncestorIdTreeFormat(
	1212	ostream& treeOutStream, bool distances) const{
	1213	time_t ltime;
	1214	int i,k,spaces;
	1215	vector<nodeP> vec;
	1216	int maxNameLen = 0;
	1217
	1218	getAllLeaves(vec,_root);
	1219	for (int w=0; w<vec.size();++w) {
	1220	if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
	1221	}
	1222	maxNameLen++; // this is just the longest name of taxa plus one
	1223	maxNameLen+=5; // MN
	1224
	1225
	1226	time( &ltime );
	1227	treeOutStream<<"# created on "<< ctime( &ltime ) ;
	1228
	1229	treeOutStream<<"name";
	1230	spaces = maxNameLen-4;
	1231	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1232
	1233	treeOutStream<<"father";
	1234	spaces = 7-6;
	1235	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1236
	1237	if (distances) {
	1238	treeOutStream<<"disance to father";
	1239	treeOutStream<<" ";
	1240	}
	1241
	1242	treeOutStream<<" sons";
	1243	spaces = maxNameLen-4;
	1244	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1245
	1246	treeOutStream<<endl;
	1247
	1248
	1249	for (i=0; i<vec.size();++i) {
	1250	treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
	1251	int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
	1252	spaces = maxNameLen-vec[i]->name().size()-len;
	1253	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1254
	1255	if (vec[i] != _root) {
	1256	treeOutStream<<vec[i]->father()->name();
	1257	spaces = 7-vec[i]->father()->name().size();
	1258	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1259	}
	1260	else {
	1261	treeOutStream<<"root!";
	1262	spaces = 7-5;
	1263	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1264	}
	1265
	1266	if ((vec[i] != _root) && distances) {
	1267	treeOutStream<<vec[i]->dis2father();
	1268	}
	1269	//else treeOutStream<<" ";
	1270
	1271	for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
	1272	treeOutStream<<" "<<vec[i]->_sons[j]->name();
	1273	}
	1274	treeOutStream<<endl;
	1275	}
	1276
	1277	vec.clear();
	1278	getAllHTUs(vec,_root );
	1279
	1280	for (i=0; i<vec.size();++i) {
	1281	treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
	1282	int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
	1283	spaces = maxNameLen-vec[i]->name().size()-len;
	1284	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1285
	1286	if (vec[i] != _root) {
	1287	treeOutStream<<vec[i]->father()->name();
	1288	spaces = 7-vec[i]->father()->name().size();
	1289	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1290	}
	1291	else {
	1292	treeOutStream<<"root!";
	1293	spaces = maxNameLen-5;
	1294	for (k=0;k<spaces;++k) treeOutStream<<" ";
	1295	}
	1296
	1297	if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
	1298
	1299	for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
	1300	treeOutStream<<" "<<vec[i]->_sons[j]->name();
	1301	}
	1302	treeOutStream<<endl;
	1303	}
	1304	}
	1305
	1306	//1. remove one of the root's sons. this node is called "toRemove"
	1307	//2. attach the sons of toRemove to the root.
	1308	//toRemove must have 2 sons so that the the root will have 3 sons.
	1309	//3. change the distToFather of the root's other son to be the sum of the distances of the root and its two sons
	1310	//in practice: this func erase the root and makes toRemove the new root
	1311	void tree::rootToUnrootedTree() {
	1312	if (getRoot()->getNumberOfSons() > 2) return; // tree is already unrooted!
	1313	if (getLeavesNum() <= 2) return; // Cannot be unrooted if the tree has less than 3 leaves.
	1314
	1315	if (getRoot()->getSon(0)->getNumberOfSons() == 0) {
	1316	tree::nodeP toRemove = getRoot()->getSon(1);
	1317	getRoot()->getSon(0)->setDisToFather(getRoot()->getSon(1)->dis2father() + getRoot()->getSon(0)->dis2father());
	1318	getRoot()->setSon(toRemove->getSon(0));
	1319	for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
	1320	getRoot()->setSon(toRemove->getSon(k));
	1321	}
	1322	delete toRemove;
	1323	getRoot()->removeSon(getRoot()->getSon(1));
	1324	getRoot()->claimSons();
	1325	}
	1326	else {
	1327	tree::nodeP toRemove = getRoot()->getSon(0);
	1328	getRoot()->getSon(1)->setDisToFather(getRoot()->getSon(0)->dis2father() + getRoot()->getSon(1)->dis2father());
	1329	getRoot()->setSon(toRemove->getSon(0));
	1330	for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
	1331	getRoot()->setSon(toRemove->getSon(k));
	1332	}
	1333	delete toRemove;
	1334	getRoot()->removeSon(getRoot()->getSon(0));
	1335	getRoot()->claimSons();
	1336	}
	1337	updateNumberofNodesANDleaves();
	1338	}
	1339
	1340	//check if the distances from the root to all leaves are equal up to the given tollerance
	1341	bool tree::isUltrametric(MDOUBLE tol, bool bErrorIfNot) const
	1342	{
	1343	vector<nodeP> nodes;
	1344	getAllLeaves(nodes, _root);
	1345	MDOUBLE dist0 = getDistanceFromNode2ROOT(nodes[0]);
	1346	for (int t = 1; t < nodes.size(); ++t)
	1347	{
	1348	MDOUBLE dist = getDistanceFromNode2ROOT(nodes[t]);
	1349	if (!DEQUAL(dist, dist0, tol))
	1350	{
	1351	if (bErrorIfNot)
	1352	{
	1353	string error = "Error: tree is not ultrametric\n";
	1354	error += "the distance from " + nodes[0]->name() + " to the root is: " + double2string(dist0) +"\n";
	1355	error += "the distance from " + nodes[t]->name() + " to the root is: " + double2string(dist) +"\n";
	1356	errorMsg::reportError(error);
	1357	}
	1358	return false;
	1359	}
	1360	}
	1361	return true;
	1362	}
	1363

+218

-0

libs/phylogeny/tree.h less more

	0	// $Id: tree.h 9777 2011-08-08 20:09:42Z rubi $
	1
	2	#ifndef ___TREE
	3	#define ___TREE
	4
	5	#include "definitions.h"
	6	#include "readTree.h"
	7	#include "errorMsg.h"
	8	#include "logFile.h"
	9
	10
	11	//***********************************************************************************
	12	// class tree represents only the topology. It has no MSA and assumes no model of evolution.
	13	//***********************************************************************************
	14
	15
	16	class tree {
	17	public:
	18	static const MDOUBLE FLAT_LENGTH_VALUE;// = 0.3;
	19	static const int TREE_NULL;// = -1;
	20	static const MDOUBLE SHORT_LENGTH_VALUE;// = 0.000001f;
	21
	22	//---------------------------- TREE NODE ----------------------
	23	public:
	24	class TreeNode {
	25	public:
	26	explicit TreeNode(const int id) :_sons(0),_father(NULL),_id(id),_name( (string)"" ),_dis2father(TREE_NULL),_comment((string)"") {}
	27	const int id() const {return _id;}
	28	const string name() const {return _name;}
	29	const MDOUBLE dis2father() const {return _dis2father;}
	30	MDOUBLE getDistance2ROOT();
	31	MDOUBLE getMinimalDistance2OTU();
	32	int getMinimalNumOfNodes2OTU();
	33	TreeNode* father() {return _father;}
	34	void setName(const string &inS) {_name = inS;}
	35	void setID(const int inID) {_id = inID;}
	36	void setDisToFather(const MDOUBLE dis) {_dis2father = dis;}
	37	void setFather(TreeNode* tn){_father=tn;}
	38	int getNumberOfSons() const {return _sons.size();}
	39	TreeNode* getSon (int i) {return _sons[i];}
	40	TreeNode* getLastSon () {return _sons.back();}
	41	void removeLastSon() {_sons.pop_back();}
	42	void removeSon(TreeNode* pSon);
	43	//setSon: updates only the father pointer to the son!
	44	void setSon(TreeNode* pSon) {_sons.push_back(pSon);}
	45	void setSon(TreeNode* pSon, int i) {_sons[i]=pSon;} // this will overwrite previous pointer!
	46	bool isRoot() const {return (_father == NULL);}
	47	bool isLeaf() const {
	48	return (
	49	(getNumberOfSons() ==0) \|\|
	50	(isRoot() && (getNumberOfSons() ==1))
	51	) ;
	52	}
	53	bool isInternal() const {return (!isLeaf());}
	54	//claimSons: sets the _father pointer of all sons to (this)
	55	//this function is used after setSon has been called without updating the son pointer.
	56	void claimSons();
	57	void removeAllSons() {_sons.clear();}
	58	void copySons(TreeNode* other) {//copy the vector of nodeP only from one node to the other
	59	_sons=other->_sons;
	60	}
	61	void setComment(string comment) {_comment = comment;
	62	if (comment.length())
	63	LOG(16,<<"comment for "<<_name<<" set to "<<comment<<endl );}
	64	const string getComment(void) const {return _comment;}
	65	private:
	66	vector<TreeNode*> _sons;
	67	TreeNode* _father;
	68	int _id;
	69	string _name;
	70	MDOUBLE _dis2father;
	71	string _comment;
	72	friend class tree;
	73	};
	74	//------------------------------------------------------------
	75
	76
	77	public:
	78	//NEWICK is the standard format
	79	//ANCESTOR/ANCESTORID are for debugging purposes: output a list of nodes one for each line.
	80	//for each node print the name, dist2father and its sons. id are printed only in ANCESTORID.
	81	//PAML is like Newick format but with extra line: #of leaves space and #of trees
	82	typedef enum { PHYLIP, ANCESTOR, ANCESTORID, PAML } TREEformats;
	83	typedef TreeNode* nodeP;
	84
	85	public:
	86	//*******************************************************************************
	87	// constructors
	88	//*******************************************************************************
	89	tree();
	90	tree(const string& treeFileName);
	91	tree(istream &treeFile);
	92	tree(const vector<char>& tree_contents);
	93
	94	tree(const string& treeFileName,vector<char>& isFixed);
	95	tree(const vector<char>& tree_contents, vector<char>& isFixed);
	96	tree(istream &in, vector<char>& isFixed);
	97
	98	tree(const tree &otherTree);
	99	tree& operator=(const tree &otherTree);
	100
	101	virtual ~tree() {clear();};
	102
	103	//*******************************************************************************
	104	// questions on the tree topology
	105	//*******************************************************************************
	106
	107	nodeP getRoot() const {return _root;};
	108	inline int getLeavesNum() const;
	109	inline int getNodesNum() const;
	110	inline int getInternalNodesNum() const;
	111	//findNodeByName: searches the subtree of myNode for a node with a specified name.
	112	//if myNode==NULL: the search starts from the root
	113	nodeP findNodeByName(const string inName, nodeP myNode=NULL) const;
	114	nodeP findNodeById(const int inId, nodeP myNode=NULL) const;
	115	bool withBranchLength() const;
	116	//getNeigboursOfNode: stores into neighbourVec the father and sons of myNode
	117	void getNeigboursOfNode(vector<nodeP> &neighbourVec, const nodeP myNode) const;
	118	void getTreeDistanceTableAndNames(VVdouble& disTab, vector <string>& vNames) const;
	119	MDOUBLE findLengthBetweenAnyTwoNodes(const nodeP node1,const nodeP node2) const;
	120	//lengthBetweenNodes: find length between two neighbouring nodes only
	121	MDOUBLE lengthBetweenNodes(const nodeP i, const nodeP j) const;
	122	//check if the distances from the root to all leaves are equal up to the given tollerance
	123	bool isUltrametric(MDOUBLE tol, bool bErrorIfNot) const;
	124
	125	void getPathBetweenAnyTwoNodes(vector<nodeP> &path,const nodeP node1, const nodeP node2) const;
	126	void getFromLeavesToRoot(vector<nodeP> &vNeighbourVector) const;
	127	void getFromRootToLeaves(vector<nodeP> &vec) const;
	128	void getFromNodeToLeaves(vector<nodeP> &vec, const nodeP fromHereDown) const;
	129
	130	void getAllHTUs(vector<nodeP> &vec,const nodeP fromHereDown) const ;
	131	void getAllNodes(vector<nodeP> &vec,const nodeP fromHereDown) const ;
	132	void getAllLeaves(vector<nodeP> &vec,const nodeP fromHereDown) const;
	133
	134	//*******************************************************************************
	135	// change tree topoplogy parameters - should be applied carefully
	136	//*******************************************************************************
	137	//rootAt: sets newRoot as the root. updates the iterator order lists.
	138	void rootAt(const nodeP newRoot);
	139	void rootToUnrootedTree();
	140	void multipleAllBranchesByFactor(const MDOUBLE InFactor);
	141	void create_names_to_internal_nodes();
	142	void makeSureAllBranchesArePositive();
	143	void makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon);
	144	MDOUBLE getAllBranchesLengthSum();
	145
	146	// removeNodeFromSonListOfItsFather:
	147	// removes sonNode from its father according to the name of sonNode
	148	// this function should ONLY be used when sonNode is to be recycled soon!
	149	// because this function does not change the number of leaves nor the number of nodes!
	150	// nor does it change the father of sonNode.
	151	void removeNodeFromSonListOfItsFather(nodeP sonNode);
	152
	153	void shrinkNode(nodeP nodePTR);
	154	//removeLeaf: removes nodePTR from tree. also deletes nodePTR
	155	void removeLeaf(nodeP nodePTR);
	156	//getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i]
	157	void getAllBranches(vector<nodeP> &nodesUP, vector<nodeP> & nodesDown);
	158	//createRootNode: erase the current tree and create a tree with one node.
	159	void createRootNode();
	160	nodeP createNode(nodeP fatherNode, const int id);
	161	void updateNumberofNodesANDleaves();
	162
	163	// **********************************************************
	164	// initialization
	165	// **********************************************************
	166
	167	//createFlatLengthMatrix: sets the distance of all branches to newFlatDistance
	168	void createFlatLengthMatrix(const MDOUBLE newFlatDistance = FLAT_LENGTH_VALUE);
	169	//recursiveBuildTree: copy the information from other_nodePTR to a new node, and set the father to father_nodePTR
	170	//used by treeUtil
	171	nodeP recursiveBuildTree(tree::nodeP father_nodePTR,const tree::nodeP other_nodePTR);
	172
	173	//*******************************************************************************
	174	// Input-Output
	175	//*******************************************************************************
	176	void output(string treeOutFile, TREEformats fmt= PHYLIP,bool withHTU=false) const;
	177	void output(ostream& os, TREEformats fmt= PHYLIP,bool withHTU=false) const;
	178	string stringTreeInPhylipTreeFormat(bool withHTU=false) const;
	179
	180
	181	private:
	182	void clear();
	183
	184	void outputInAncestorTreeFormat(ostream& treeOutStream, bool withDist = false) const;
	185	void outputInPhylipTreeFormat(ostream& treeOutStream,bool withHTU=false) const;
	186	void outputInAncestorIdTreeFormat(ostream& treeOutStream, bool withDist = false) const;
	187	void outputInPamlTreeFormat(ostream& treeOutStream, bool withHTU = false) const;
	188	int print_from(nodeP from_node, ostream& os, bool withHTU) const;
	189	int print_from(nodeP from_node, ostream& os, bool withHTU);
	190	int string_print_from(nodeP from_node, string& s, bool withHTU) const;
	191
	192	bool readPhylipTreeTopology(istream& in,vector<char>& isFixed); //same as the constructor with file name
	193	bool readPhylipTreeTopology(const vector<char>& tree_contents,vector<char>& isFixed);
	194	bool readPhylipTreeTopology(istream& in); //same as the constructor with file name
	195	bool readPhylipTreeTopology(const vector<char>& tree_contents);
	196	nodeP readPart(vector<char>::const_iterator& p_itCurrent, int& nextFreeID, vector<char> & isFixed);
	197
	198	void getAllHTUsPrivate(vector<nodeP> &vec,nodeP fromHereDown) const ;
	199	void getAllNodesPrivate(vector<nodeP> &vec,nodeP fromHereDown) const ;
	200	void getAllLeavesPrivate(vector<nodeP> &vec,nodeP fromHereDown) const;
	201
	202
	203	protected:
	204	TreeNode *_root;
	205	int _leaves;
	206	int _nodes;
	207	};
	208
	209	inline int tree::getLeavesNum() const {return _leaves;}
	210	inline int tree::getNodesNum() const {return _nodes;}
	211	inline int tree::getInternalNodesNum() const {return getNodesNum() - getLeavesNum();}
	212
	213	ostream &operator<<(ostream &out, const tree &tr);
	214
	215	#endif
	216
	217

+16

-0

libs/phylogeny/treeInference.cpp less more

	0	// $Id: treeInference.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "treeInference.h"
	2	#include "likeDist.h"
	3	#include "distanceTable.h"
	4
	5	tree treeInference::computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc,
	6	const tree * const constraintTreePtr, const vector<MDOUBLE> * const weights) {
	7
	8	likeDist ld( sp, 0.01);
	9	VVdouble disTab;
	10	vector<string> vNames;
	11	giveDistanceTable(&ld,sc,disTab,vNames,weights);
	12	NJalg nj1;
	13	return (nj1.computeTree(disTab,vNames,constraintTreePtr));
	14	}
	15

+26

-0

libs/phylogeny/treeInference.h less more

	0	// $Id: treeInference.h 962 2006-11-07 15:13:34Z privmane $
	1	//
	2
	3	// version 1.01
	4	// last modified 23 May 2005
	5
	6	#ifndef ___TREE_INFERENCE
	7	#define ___TREE_INFERENCE
	8
	9	#include "definitions.h"
	10	#include "tree.h"
	11	#include "sequenceContainer.h"
	12	#include "stochasticProcess.h"
	13	#include "nj.h"
	14	#include <vector>
	15	using namespace std;
	16
	17	class treeInference {
	18	public:
	19	static tree computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc,
	20	const tree * const constraintTreePtr = NULL, const vector<MDOUBLE> * const weights = NULL);
	21
	22	};
	23	#endif
	24
	25

+6

-0

libs/phylogeny/treeIt.cpp less more

	0	// $Id: treeIt.cpp 962 2006-11-07 15:13:34Z privmane $
	1
	2	#include "definitions.h"
	3	#include "treeIt.h"
	4
	5

+128

-0

libs/phylogeny/treeIt.h less more

	0	// $Id: treeIt.h 962 2006-11-07 15:13:34Z privmane $
	1
	2	#ifndef ___TREE_IT
	3	#define ___TREE_IT
	4	#include "definitions.h"
	5	#include "errorMsg.h"
	6	#include "tree.h"
	7
	8
	9	class treeIterTopDown{
	10	public:
	11	treeIterTopDown(tree& t) : _t(t) , _current(_t.getRoot()) {
	12	_childCheck.push_back(0);
	13	}
	14	tree::nodeP first() {
	15	_childCheck.clear();
	16	_childCheck.push_back(0);
	17	_current = _t.getRoot();
	18	return _t.getRoot();
	19	}
	20	tree::nodeP next() {
	21	if (_childCheck.empty()) return NULL;
	22	if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
	23	_current = _current->getSon(_childCheck[_childCheck.size()-1]);
	24	_childCheck[_childCheck.size()-1]++;
	25	_childCheck.push_back(0);
	26	}
	27	else {
	28	_current = _current->father();
	29	_childCheck.pop_back();
	30	return next();
	31	}
	32	return _current;
	33	}
	34	tree::nodeP operator++(int) {return next();}
	35	tree::nodeP operator++() {return next();}
	36	tree::nodeP end(){ return NULL;}
	37	tree::nodeP operator-> (){ return _current;}
	38	tree::TreeNode& operator* (){return *_current;}
	39	bool operator!= (tree::nodeP t) {return (t != this->_current);}
	40	private:
	41	vector<int> _childCheck;
	42	tree& _t;
	43	tree::nodeP _current;
	44	};
	45
	46	class treeIterTopDownConst{
	47	public:
	48	treeIterTopDownConst(const tree& t) : _t(t) , _current(_t.getRoot()) {
	49	_childCheck.push_back(0);
	50	}
	51	tree::nodeP first() {
	52	_childCheck.clear();
	53	_childCheck.push_back(0);
	54	_current = _t.getRoot();
	55	return _t.getRoot();
	56	}
	57	tree::nodeP next() {
	58	if (_childCheck.empty()) return NULL;
	59	if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
	60	_current = _current->getSon(_childCheck[_childCheck.size()-1]);
	61	_childCheck[_childCheck.size()-1]++;
	62	_childCheck.push_back(0);
	63	}
	64	else {
	65	_current = _current->father();
	66	_childCheck.pop_back();
	67	return next();
	68	}
	69	return _current;
	70	}
	71	tree::nodeP operator++(int) {return next();}
	72	tree::nodeP operator++() {return next();}
	73	tree::nodeP end(){ return NULL;}
	74	tree::nodeP operator-> (){ return _current;}
	75	tree::TreeNode& operator* (){return *_current;}
	76	bool operator!= (tree::nodeP t) {return (t != this->_current);}
	77	private:
	78	vector<int> _childCheck;
	79	const tree& _t;
	80	tree::nodeP _current;
	81	};
	82
	83	class treeIterDownTopConst{
	84	public:
	85	treeIterDownTopConst(const tree& t) : _t(t) , _current(_t.getRoot()) {
	86	_childCheck.push_back(0);
	87	}
	88	const tree::nodeP first() {
	89	_childCheck.clear();
	90	_childCheck.push_back(0);
	91	_current = _t.getRoot();
	92	return next();
	93	}
	94	const tree::nodeP next() {
	95	if (_childCheck[_childCheck.size()-1]>_current->getNumberOfSons()) {//checked
	96	_current = _current->father();
	97	if (!_current) return NULL;
	98	_childCheck.pop_back();
	99	_childCheck[_childCheck.size()-1]++;
	100	return next();
	101	}
	102	else if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
	103	_current = _current->getSon(_childCheck[_childCheck.size()-1]);
	104	_childCheck.push_back(0);
	105	return next();
	106	}
	107	// else //if (_childCheck[_childCheck.size()-1]==_current->getNumberOfSons())
	108	// {
	109	_childCheck[_childCheck.size()-1]++;
	110	return _current;
	111	// }
	112
	113	// return next();
	114	}
	115	const tree::nodeP operator++(int) {return next();}
	116	const tree::nodeP operator++() {return next();}
	117	const tree::nodeP end(){ return NULL;}
	118	const tree::nodeP operator-> (){ return _current;}
	119	const tree::TreeNode& operator* (){return *_current;}
	120	bool operator!= (tree::nodeP t) {return (t != this->_current);}
	121	private:
	122	vector<int> _childCheck;
	123	const tree& _t;
	124	tree::nodeP _current;
	125	};
	126
	127	#endif

+430

-0

libs/phylogeny/treeUtil.cpp less more

	0	// $Id: treeUtil.cpp 10477 2012-03-18 07:58:05Z itaymay $
	1
	2	#include "definitions.h"
	3	#include "treeUtil.h"
	4	#include "treeIt.h"
	5	#include "someUtil.h"
	6	#include <fstream>
	7	#include <iostream>
	8	#include <cassert>
	9	#include <map>
	10	using namespace std;
	11
	12	vector<tree> getStartingTreeVecFromFile(string fileName) {
	13	vector<tree> vecT;
	14	ifstream in;
	15	istream* inPtr = &cin; // default
	16	if (fileName != "-"){
	17	in.open(fileName.c_str());
	18	if (! in.is_open())
	19	errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1);
	20	inPtr = &in;
	21	}
	22
	23	while (!inPtr->eof()) {
	24	//inputf.eatwhite();// do not remove. Tal: 1.1.2003
	25	vector<char> myTreeCharVec = PutTreeFileIntoVector(*inPtr);
	26	if (myTreeCharVec.size() >0) {
	27	tree t1(myTreeCharVec);
	28	//LOGDO(5,t1.output(myLog::LogFile()));
	29	vecT.push_back(t1);
	30	}
	31	}
	32	if (in.is_open())
	33	in.close();
	34	return vecT;
	35	}
	36
	37	void getStartingTreeVecFromFile(string fileName,
	38	vector<tree>& vecT,
	39	vector<char>& constraintsOfT0) {
	40	ifstream in;
	41	istream* inPtr = &cin; // default
	42	if (fileName != "-"){
	43	in.open(fileName.c_str());
	44	if (! in.is_open())
	45	errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1);
	46	inPtr = &in;
	47	}
	48	//inputf.eatwhite();
	49	for (int i=0; !inPtr->eof() ; ++i) {
	50	// while (!inPtr->eof()) {
	51	vector<char> myTreeCharVec = PutTreeFileIntoVector(*inPtr);
	52	if (myTreeCharVec.size() >0) {
	53	if (i==0) {
	54	tree t1(myTreeCharVec,constraintsOfT0);
	55	vecT.push_back(t1);
	56	}
	57	else {
	58	tree t1(myTreeCharVec);
	59	vecT.push_back(t1);
	60	}
	61
	62	}
	63	}
	64	if (in.is_open())
	65	in.close();
	66	}
	67
	68
	69
	70
	71
	72
	73
	74	#include <algorithm>
	75	using namespace std;
	76
	77	bool sameTreeTolopogy(tree t1, tree t2){
	78	if (t1.getNodesNum() != t2.getNodesNum()) {
	79	errorMsg::reportError("error in function same tree topology (1)");
	80	}
	81	tree::nodeP x = t2.getRoot();
	82	while (x->getNumberOfSons() > 0) x= x->getSon(0);
	83	t1.rootAt(t1.findNodeByName(x->name())->father()); // now they have the same root
	84	t2.rootAt(t2.findNodeByName(x->name())->father()); // now they have the same root
	85	map<int,string> names1;
	86	treeIterDownTopConst tit1(t1);
	87	for (tree::nodeP nodeM = tit1.first(); nodeM != tit1.end(); nodeM = tit1.next()) {
	88	vector<string> nameOfChild;
	89	for (int i=0; i < nodeM->getNumberOfSons();++i) {
	90	nameOfChild.push_back(names1[nodeM->getSon(i)->id()]);
	91	}
	92	if (nodeM->getNumberOfSons()==0) nameOfChild.push_back(nodeM->name());
	93	sort(nameOfChild.begin(),nameOfChild.end());
	94	string res = "(";
	95	for (int k=0; k < nameOfChild.size(); ++k) {
	96	res += nameOfChild[k];
	97	}
	98	res += ")";
	99	names1[nodeM->id()] = res;
	100	}
	101
	102	map<int,string> names2;
	103	treeIterDownTopConst tit2(t2);
	104	for (tree::nodeP nodeM2 = tit2.first(); nodeM2 != tit2.end(); nodeM2 = tit2.next()) {
	105	vector<string> nameOfChild;
	106	for (int i=0; i < nodeM2->getNumberOfSons();++i) {
	107	nameOfChild.push_back(names2[nodeM2->getSon(i)->id()]);
	108	}
	109	if (nodeM2->getNumberOfSons()==0) nameOfChild.push_back(nodeM2->name());
	110	sort(nameOfChild.begin(),nameOfChild.end());
	111	string res = "(";
	112	for (int k=0; k < nameOfChild.size(); ++k) {
	113	res += nameOfChild[k];
	114	}
	115	res += ")";
	116	names2[nodeM2->id()] = res;
	117	}
	118	return names1[t1.getRoot()->id()] == names2[t2.getRoot()->id()];
	119
	120
	121
	122	}
	123
	124	// bigTree is passed by value and not by reference. Therefore, this method doens't change the original bigTree,
	125	// but allocates a new bigTree to be split.
	126	bool cutTreeToTwo(tree bigTree,
	127	const string& nameOfNodeToCut,
	128	tree &small1,
	129	tree &small2){// cutting above the NodeToCut.
	130	// we want to cut the tree in two.
	131	// first step: we make a new node between the two nodes that have to be splited,
	132	tree::nodeP node2splitOnNewTree = bigTree.findNodeByName(nameOfNodeToCut);
	133	string interNode = "interNode";
	134	if (node2splitOnNewTree->father() == NULL) return(false);
	135	// assert(node2splitOnNewTree->father() != NULL);
	136	tree::nodeP tmp = makeNodeBetweenTwoNodes(bigTree,node2splitOnNewTree->father(),node2splitOnNewTree, interNode);
	137	bigTree.rootAt(tmp); // tmp is the interNode and it's now the root of the tree. Its sons are node2splitOnNewTree and its father.
	138	string allNodes = "Runs/testBifurcating/beforeCut.tree";
	139	bigTree.output(allNodes, tree::PHYLIP, true);
	140	cutTreeToTwoSpecial(bigTree,tmp, small1,small2);
	141
	142	if (small1.getNodesNum() < 5 \|\| small2.getNodesNum() < 5) return (false);
	143	LOGDO(15,small1.output(myLog::LogFile(),tree::ANCESTORID));
	144	LOGDO(15,small2.output(myLog::LogFile(),tree::ANCESTORID));
	145
	146
	147	tree::nodeP toDel1 = small1.findNodeByName(interNode);
	148	small1.removeLeaf(toDel1);
	149	tree::nodeP toDel2 = small2.findNodeByName(interNode);
	150	small2.removeLeaf(toDel2);
	151	// this part fix the ids.
	152	treeIterTopDown tIt(small1);
	153	int newId =0;
	154	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	155	mynode->setID(newId);
	156	newId++;
	157	}
	158	treeIterTopDown tIt2(small2);
	159	int newId2 =0;
	160	for (tree::nodeP mynode2 = tIt2.first(); mynode2 != tIt2.end(); mynode2 = tIt2.next()) {
	161	mynode2->setID(newId2);
	162	newId2++;
	163	}
	164	return (true); // successes!
	165
	166	};
	167
	168	// pre-request:
	169	// the intermediateNode is the root.
	170	// and it has two sons.
	171	// resultT1PTR & resultT2PTR are empty trees (root=NULL);
	172	void cutTreeToTwoSpecial(const tree& source, tree::nodeP intermediateNode,
	173	tree &resultT1PTR, tree &resultT2PTR) {
	174	// make sure that you got two empty trees:
	175	if (resultT1PTR.getRoot() != NULL)
	176	errorMsg::reportError("got a non empty tree1 in function cutTreeToTwoSpecial");
	177	else if (resultT2PTR.getRoot() != NULL)
	178	errorMsg::reportError("got a non empty tree2 in function cutTreeToTwoSpecial");
	179
	180	// make sure the the intermediateNode is really an intermediate Node;
	181	if ((intermediateNode->getNumberOfSons() !=2 ) \|\| (source.getRoot() != intermediateNode)) {
	182	errorMsg::reportError("intermediateNode in function cutTreeToTwoSpecial, is not a real intermediate node ");
	183	}
	184
	185	resultT1PTR.createRootNode();
	186	resultT1PTR.getRoot()->setName(intermediateNode->name());
	187
	188	resultT2PTR.createRootNode();
	189	resultT2PTR.getRoot()->setName(intermediateNode->name());
	190
	191
	192	resultT1PTR.recursiveBuildTree(resultT1PTR.getRoot(),intermediateNode->getSon(0));
	193	resultT2PTR.recursiveBuildTree(resultT2PTR.getRoot(),intermediateNode->getSon(1));
	194	}
	195
	196
	197
	198
	199
	200	//insert a new node between fatherNode and sonNode
	201	tree::nodeP makeNodeBetweenTwoNodes(tree& et,
	202	tree::nodeP fatherNode,
	203	tree::nodeP sonNode,
	204	const string &interName){
	205	//make sure that fatherNode is indeed the father and sonNode is the son (and not the opposite).
	206	if (fatherNode->father() == sonNode) {
	207	tree::nodeP tmp = fatherNode;
	208	fatherNode = sonNode;
	209	sonNode = tmp;
	210	}
	211	else if (sonNode->father() != fatherNode) {
	212	errorMsg::reportError("Error in function 'cut_tree_in_two'. the two nodes are not neighbours ");
	213	}
	214
	215	tree::nodeP theNewNodePTR = new tree::TreeNode(et.getNodesNum());
	216
	217	//fix the tree information for the new node.
	218	theNewNodePTR->setName(interName);
	219	MDOUBLE tmpLen = sonNode->dis2father() * 0.5;
	220	theNewNodePTR->setDisToFather(tmpLen);
	221	theNewNodePTR->setFather(fatherNode);
	222	theNewNodePTR->setSon(sonNode);
	223
	224	//fix the tree information for the father node.
	225	fatherNode->removeSon(sonNode);
	226	fatherNode->setSon(theNewNodePTR);
	227
	228	//fix the tree information for the sonNode.
	229	sonNode->setFather(theNewNodePTR);
	230	sonNode->setDisToFather(tmpLen);
	231	return theNewNodePTR;
	232	}
	233
	234	vector<string> getSequencesNames(const tree& t){
	235	vector<tree::nodeP> vleaves;
	236	t.getAllLeaves(vleaves,t.getRoot());
	237	vector<string> res;
	238	vector<tree::nodeP>::const_iterator i = vleaves.begin();
	239	for ( ; i<vleaves.end(); ++i) {
	240	res.push_back((*i)->name());
	241	}
	242	return res;
	243	}
	244
	245	tree starTree(const vector<string>& names) {
	246	tree et;
	247	et.createRootNode();
	248	for (int k=0 ; k < names.size(); ++k) {
	249	tree::nodeP tmpNode;
	250	tmpNode = et.createNode(et.getRoot(),et.getNodesNum());
	251	tmpNode->setDisToFather(tree::FLAT_LENGTH_VALUE);
	252	tmpNode->setName(names[k]);
	253	}
	254	et.create_names_to_internal_nodes();
	255	return et;
	256	}
	257
	258
	259	MDOUBLE getSumOfBranchLengths(const tree &t){
	260	treeIterDownTopConst tIt(t);
	261	MDOUBLE sum = 0;
	262	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	263	if (!mynode->isRoot()){
	264	sum+=mynode->dis2father();
	265	}
	266	}
	267	return sum;
	268	}
	269
	270	MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode){
	271	if(myNode->isRoot())
	272	return 0.0;
	273	else
	274	return ( myNode->dis2father() + getDistanceFromNode2ROOT(myNode->father()) );
	275	}
	276
	277	void fillAllNodesNames(Vstring& Vnames,const tree& tr){
	278	vector<tree::nodeP> vAllNodes;
	279	tr.getAllNodes(vAllNodes,tr.getRoot());
	280	Vnames.resize(vAllNodes.size());
	281	for (int i = 0; i<vAllNodes.size();++i)
	282	Vnames[vAllNodes[i]->id()] = vAllNodes[i]->name();
	283	}
	284
	285	void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to) {
	286	printTreeWithValuesAsBP(out,tr.getRoot(), values,probs,from,to);
	287	out<<"["<<values[tr.getRoot()->id()]<<"];";
	288	}
	289
	290	void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to) {
	291	int fatherNodeIndex,sonNodeIndex;
	292	if (myNode->isLeaf()) {
	293	out<< myNode->name();
	294	if(probs){
	295	for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){
	296	for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){
	297	if((from == fatherNodeIndex)&&(to == sonNodeIndex)){
	298	out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<<myNode->dis2father();
	299	}
	300	}
	301	}
	302	}
	303	return;
	304	} else {
	305	out <<"(";
	306	for (int i=0;i<myNode->getNumberOfSons();++i) {
	307	if (i>0) out <<",";
	308	printTreeWithValuesAsBP(out, myNode->getSon(i), values,probs,from,to);
	309	}
	310	out <<")";
	311	if (myNode->isRoot()==false) {
	312	out<< myNode->name();
	313	if(probs){
	314	for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){
	315	for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){
	316	if((from == fatherNodeIndex)&&(to == sonNodeIndex)){
	317	out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<<myNode->dis2father(); //< "["<<values[myNode->id()]<<"]";
	318	}
	319	}
	320	}
	321	}
	322	}
	323	}
	324	}
	325
	326	void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) {
	327	printDataOnTreeAsBPValues(out,data, tr.getRoot());
	328	out<<";";
	329	}
	330
	331	void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) {
	332	if (myNode->isLeaf()) {
	333	out << myNode->name()<< ":"<<myNode->dis2father();
	334	return;
	335	} else {
	336	out <<"(";
	337	for (int i=0;i<myNode->getNumberOfSons();++i) {
	338	if (i>0) out <<",";
	339	printDataOnTreeAsBPValues(out,data,myNode->getSon(i));
	340	}
	341	out <<")";
	342	// out.precision(3);
	343	// out<<data[myNode->id()];
	344	// if (myNode->isRoot()==false) {
	345	out.precision(3);
	346	out<<data[myNode->id()];
	347	out<<":"<<myNode->dis2father();
	348	// }
	349	}
	350	}
	351
	352	vector<tree> getNexusTreesFromFile (const string& nexusTreesFile)
	353	{
	354	ifstream treesFile(nexusTreesFile.c_str());
	355	if (!treesFile) {
	356	errorMsg::reportError("could not open nexus tree file");
	357	}
	358
	359	vector<tree> treeVec;
	360	vector<string> fileData;
	361	putFileIntoVectorStringArray(treesFile , fileData);
	362	treesFile.close();
	363	vector<string>::const_iterator it = fileData.begin();
	364
	365	// first line start with "#NEXUS"
	366	if (it->find("#NEXUS") == -1)
	367	errorMsg::reportError("NEXUS tree format must start with 'NEXUS' in the first line");
	368	++it;
	369
	370	string::const_iterator itStrStart = it->begin();
	371	string::const_iterator itStrEnd = it->end();
	372	// second line start as [ID: 0759674699]
	373	//if (((itStrStart++) != '[') \|\| ((itStrStart++) != 'I')
	374	// \|\| ((itStrStart++) != 'D') \|\| ((itStrStart++) != ':'))
	375	//{
	376	// errorMsg::reportError("Cannot find proper ID format in first line of alphaFile");
	377	//}
	378	//int idStart = it->find_first_of("1234567890");
	379	//int idEnd = it->find_last_of("]");
	380	//string treeFileID = it->substr(idStart, idEnd-idStart);
	381	//it += 2; //skipp also 3rd line
	382
	383	while ( ( (it).find("Translate") == -1) && ((it).find("translate") == -1) &&(it != fileData.end()))
	384	++it;
	385	//translate table [id name]
	386	vector<string> nameTable(0);
	387	vector<int> idTable(0);
	388
	389	for(++it; (it->find(";") == -1) && (it->find("tree") == -1) ; ++it)
	390	{
	391	if (it->find(";") != -1) {
	392	break;
	393	}
	394
	395	int idStartPos = it->find_first_of("0123456789");
	396	int idEndPos = it->find_first_not_of("0123456789", idStartPos);
	397	string idStr = it->substr(0, idEndPos);
	398	int id = atoi(idStr.c_str());
	399	int nameStartPos = it->find_first_not_of(" ", idEndPos);
	400	int nameEndPos = it->find_first_of(",;", idEndPos);
	401	string nameStr = it->substr(nameStartPos, nameEndPos - nameStartPos);
	402	nameTable.push_back(nameStr);
	403	idTable.push_back(id);
	404	}
	405	while (it->find("tree") == -1)
	406	++it;
	407
	408	for (; it->find("tree") != -1 ; ++it)
	409	{
	410	int pos = it->find_first_of("(");
	411	string treeStr = it->substr(pos);
	412	vector<char> treeContents;
	413	for (string::iterator itStr = treeStr.begin(); itStr != treeStr.end(); ++itStr)
	414	{
	415	if (!isspace(*itStr))
	416	treeContents.push_back((*itStr));
	417	}
	418	tree tr(treeContents);
	419	for(int i=0 ; i < idTable.size(); ++i)
	420	{
	421	tree::nodeP node = tr.findNodeByName(int2string(idTable[i]));
	422	node->setName(nameTable[i]);
	423	}
	424	treeVec.push_back(tr);
	425	}
	426
	427	return treeVec;
	428	}
	429

+50

-0

libs/phylogeny/treeUtil.h less more

	0	// $Id: treeUtil.h 10476 2012-03-18 07:57:33Z itaymay $
	1
	2	#ifndef ___TREE_UTIL
	3	#define ___TREE_UTIL
	4	#include "definitions.h"
	5	#include "tree.h"
	6
	7	vector<tree> getStartingTreeVecFromFile(string fileName);
	8
	9	tree starTree(const vector<string>& names);
	10
	11	void getStartingTreeVecFromFile(string fileName,
	12	vector<tree>& vecT,
	13	vector<char>& constraintsOfT0);
	14
	15	vector<tree> getNexusTreesFromFile (const string& nexusTreesFile);
	16
	17	bool sameTreeTolopogy(tree t1, tree t2);
	18
	19	bool cutTreeToTwo(tree bigTree,
	20	const string& nameOfNodeToCut,
	21	tree &small1,
	22	tree &small2);
	23
	24	tree::nodeP makeNodeBetweenTwoNodes( tree& et,
	25	tree::nodeP nodePTR1,
	26	tree::nodeP nodePTR2,
	27	const string &interName);
	28
	29	void cutTreeToTwoSpecial(const tree& source,
	30	tree::nodeP intermediateNode,
	31	tree &resultT1PTR,
	32	tree &resultT2PTR);
	33
	34	vector<string> getSequencesNames(const tree& t);
	35
	36	MDOUBLE getSumOfBranchLengths(const tree &t);
	37
	38	void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) ;
	39	void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) ;
	40
	41	MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode);
	42	void fillAllNodesNames(Vstring& Vnames,const tree& tr);
	43
	44	void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to);
	45	void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to);
	46
	47
	48	#endif
	49

+32

-0

libs/phylogeny/trivialAccelerator.h less more

	0	// $Id: trivialAccelerator.h 1925 2007-04-04 16:40:22Z privmane $
	1
	2	#ifndef ___TRIVIAL_ACCELERATOR
	3	#define ___TRIVIAL_ACCELERATOR
	4
	5	#include "pijAccelerator.h"
	6	#include "replacementModel.h"
	7
	8	class trivialAccelerator : public pijAccelerator {
	9	public:
	10
	11	explicit trivialAccelerator(const replacementModel* pb): _pb(pb->clone()) {};
	12	trivialAccelerator(const trivialAccelerator& other):_pb(NULL){if (other._pb != NULL) _pb = other._pb->clone();}
	13	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_t(i,j,d);}
	14	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->dPij_dt(i,j,d);};
	15	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->d2Pij_dt2(i,j,d);};
	16	const MDOUBLE freq(const int i) const{return _pb->freq(i);}
	17	virtual pijAccelerator* clone() const { return new trivialAccelerator(*this);}
	18	virtual ~trivialAccelerator() {delete _pb;}
	19	virtual const int alphabetSize() const {return _pb->alphabetSize();}
	20	virtual replacementModel* getReplacementModel() const {return (_pb);}
	21
	22	private:
	23	replacementModel* _pb;
	24	};
	25
	26	#endif
	27
	28	// There is no distribution in the trivial accelerator. Actually, it's just an interface
	29	// to the replacement Model and it doesn't accelerate anything.
	30	// Every method retruns exactly the replacementModel corresponding method result.
	31

+82

-0

libs/phylogeny/unObservableData.cpp less more

	0	#include "unObservableData.h"
	1	#include "likelihoodComputation.h"
	2	#include "likelihoodComputationGL.h"
	3	#include <math.h>
	4
	5
	6	using namespace std;
	7
	8	unObservableData::unObservableData(const sequenceContainer& sc,const stochasticProcess* sp ,const gainLossAlphabet alph, const int minNumOfOnes, const int minNumOfZeros)
	9	{
	10	_scZero.startZeroSequenceContainerGL(sc,alph, minNumOfOnes, minNumOfZeros);
	11	_LforMissingDataPerCat.resize(sp->categories());
	12	}
	13
	14	unObservableData::unObservableData(const unObservableData& other) //const
	15	{
	16	_scZero = other._scZero;
	17	_pi = other._pi;
	18	_logLforMissingData = other._logLforMissingData;
	19	_LforMissingDataPerCat = other._LforMissingDataPerCat;
	20	}
	21	Vdouble* unObservableData::getpLforMissingDataPerCat(){return &_LforMissingDataPerCat;}
	22	Vdouble unObservableData::getLforMissingDataPerCat(){return _LforMissingDataPerCat;}
	23	MDOUBLE unObservableData::getlogLforMissingData(){return _logLforMissingData;}
	24	int unObservableData::getNumOfUnObservablePatterns(){return _scZero.seqLen();}
	25
	26
	27	//void unObservableData::setLforMissingData(const tree& _tr, const stochasticProcess* _sp){
	28	// _pi.fillPij(_tr,*_sp);
	29	//// NOTE: The "perCat" is out
	30	// _LforMissingDataPerCat = likelihoodComputation::getLofPosPerCat(0,_tr,_scZero,_pi,_sp); // L sp.ratesProb(i)
	31	// _logLforMissingData = 0;
	32	// for (int i=0; i < _sp->categories();++i) {
	33	// _logLforMissingData += _LforMissingDataPerCat[i];
	34	// }
	35	// _logLforMissingData = log(_logLforMissingData);
	36	//}
	37
	38	/********************************************************************************************
	39	*********************************************************************************************/
	40	void unObservableData::setLforMissingData(const tree& tr, const stochasticProcess* sp){
	41	_pi.fillPij(tr,*sp);
	42	_logLforMissingData = 0;
	43	for(int pos=0; pos<_scZero.seqLen(); ++pos){
	44	_logLforMissingData += convert(likelihoodComputation::getLofPos(pos,tr,_scZero,_pi,*sp));
	45	}
	46	_logLforMissingData = log(_logLforMissingData);
	47	}
	48	/********************************************************************************************
	49	*********************************************************************************************/
	50	void unObservableData::setLforMissingData(const tree& tr, const vector<vector<stochasticProcess*> >& spVVec,
	51	const distribution* distGain, const distribution* distLoss)
	52	{
	53
	54	_logLforMissingData = 0;
	55	int numOfRateCategories = spVVec[0][0]->categories();
	56	vector<computePijGam> pi_vec(numOfRateCategories);
	57	vector<suffStatGlobalGam> ssc_vec(numOfRateCategories);
	58	vector<computeUpAlg> cup_vec(numOfRateCategories);
	59	likelihoodComputationGL::fillPijAndUp(tr,_scZero, spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec);
	60
	61	for (int k=0; k < _scZero.seqLen(); ++k) {
	62	MDOUBLE resGivenRate = 0.0;
	63	MDOUBLE lnL = 0;
	64	for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
	65	lnL = log(likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(k,//pos,
	66	tr,//const tree&
	67	_scZero,// sequenceContainer& sc,
	68	spVVec, // only needed for sp.freq(let)
	69	ssc_vec[rateIndex][k],//const computePijGam& ,
	70	distGain, distLoss)); // distributions
	71	resGivenRate += lnL * spVVec[0][0]->ratesProb(rateIndex);
	72	}
	73	_logLforMissingData += exp(resGivenRate);
	74	}
	75	_logLforMissingData = log(_logLforMissingData);
	76	//for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
	77	// _logLforMissingData += likelihoodComputationGL::getTreeLikelihoodFromUp2(tr,_scZero,spVVec,ssc_vec[rateIndex], distGain,distLoss,NULL)
	78	// * spVVec[0][0]->ratesProb(rateIndex);
	79	//}
	80	}
	81

+45

-0

libs/phylogeny/unObservableData.h less more

	0	#ifndef ___unObservableData___GL
	1	#define ___unObservableData___GL
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "sequenceContainer.h"
	7	#include "gainLossAlphabet.h"
	8	#include "computePijComponent.h"
	9
	10	/********************************************************************************************
	11	unObservableData
	12	*********************************************************************************************/
	13	class unObservableData{
	14	public:
	15	explicit unObservableData(const sequenceContainer& sc,const stochasticProcess* sp ,const gainLossAlphabet alph, const int minNumOfOnes, const int minNumOfZeros);
	16	unObservableData(const unObservableData& other); //const
	17	virtual ~unObservableData(){};
	18	virtual unObservableData* clone() const {return new unObservableData(*this);}
	19	Vdouble* getpLforMissingDataPerCat();
	20	Vdouble getLforMissingDataPerCat();
	21	MDOUBLE getlogLforMissingData();
	22	int getNumOfUnObservablePatterns();
	23	void setLforMissingData(const tree& _tr, const stochasticProcess* _sp);
	24	//void setLforMissingData(const tree& _tr, const stochasticProcess* _sp);
	25	void setLforMissingData(const tree& _tr, const vector<vector<stochasticProcess> >& spVVec, const distribution distGain, const distribution* distLoss);
	26
	27
	28
	29	//MDOUBLE getCorrectedLikelihood(MDOUBLE likePre){return }
	30
	31
	32	protected:
	33	//func
	34
	35	protected:
	36	//members
	37	sequenceContainer _scZero;
	38	Vdouble _LforMissingDataPerCat; // used foreach rate category
	39	MDOUBLE _logLforMissingData;
	40	computePijGam _pi;
	41	};
	42
	43
	44	#endif

+11

-0

libs/phylogeny/uniDistribution.cpp less more

	0	// $Id: uniDistribution.cpp 2711 2007-11-19 14:49:54Z itaymay $
	1
	2	#include "uniDistribution.h"
	3	#include "errorMsg.h"
	4
	5
	6	void uniDistribution::change_number_of_categories(int in_number_of_categories)
	7	{
	8	if (in_number_of_categories != 1)
	9	errorMsg::reportError("error in uniDistribution::change_number_of_categories() - number of categories is not 1");
	10	}

+37

-0

libs/phylogeny/uniDistribution.h less more

	0	// $Id: uniDistribution.h 2812 2007-11-25 10:32:11Z itaymay $
	1
	2	// version 2.00
	3	// last modified 21 Mar 2004
	4	#ifndef ___UNIFORM_DIST
	5	#define ___UNIFORM_DIST
	6
	7	#include "distribution.h"
	8
	9	/***********************************************************
	10	This represents a distribution of one line over the value 1:
	11	\|
	12	________\|________
	13	1
	14	_globalRate represents the rate for two joint genes.
	15	************************************************************/
	16
	17	class uniDistribution : public distribution {
	18
	19	public:
	20	uniDistribution() {_globalRate=1;}
	21	virtual const int categories() const { return 1;}
	22	virtual void change_number_of_categories(int in_number_of_categories);
	23	virtual const MDOUBLE rates(const int i) const { return _globalRate;};
	24	virtual const MDOUBLE ratesProb(const int i) const { return 1.0;};
	25	virtual distribution* clone() const { return new uniDistribution(*this); }
	26	virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
	27	virtual MDOUBLE getGlobalRate() const{return _globalRate;}
	28	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const {
	29	if (x<1.0) return 0.0; else return 1.0;
	30	}
	31
	32	MDOUBLE _globalRate;
	33	};
	34
	35	#endif
	36

+64

-0

libs/phylogeny/uniformDistribution.cpp less more

	0	// $Id: uniformDistribution.cpp 2712 2007-11-19 14:50:12Z itaymay $
	1
	2	#include "uniformDistribution.h"
	3
	4
	5	uniformDistribution::uniformDistribution(const int numOfCategories, MDOUBLE lowerBound,
	6	MDOUBLE upperBound) :distribution() {
	7	_globalRate=1.0;
	8	setUniformParameters(numOfCategories, lowerBound, upperBound);
	9	}
	10
	11
	12	//copy constructor
	13	uniformDistribution::uniformDistribution(const uniformDistribution& other) :
	14	_rates(other._rates),
	15	_ratesProb(other._ratesProb),
	16	_globalRate(other._globalRate),
	17	_interval(other._interval),
	18	_upperBound(other._upperBound),
	19	_lowerBound(other._lowerBound)
	20	{
	21	}
	22
	23
	24
	25	void uniformDistribution::setUniformParameters(const int number_of_categories,
	26	MDOUBLE lowerBound, MDOUBLE upperBound){
	27	_upperBound = upperBound;
	28	_lowerBound = lowerBound;
	29
	30	_interval = ((upperBound - lowerBound) / (number_of_categories+0.0));
	31	_rates.clear();
	32	_rates.resize(number_of_categories);
	33	_ratesProb.erase(_ratesProb.begin(),_ratesProb.end());
	34	_ratesProb.resize(number_of_categories, 1.0/number_of_categories);
	35	//setting _rates[i] as the middle value of each category
	36	for (int i = 0; i < number_of_categories; ++i) {
	37	_rates[i] = _lowerBound + (_interval * (i + 0.5));
	38	}
	39	}
	40
	41	//returns the ith border between categories
	42	//getBorder(0) = _lowerBound, getBorder(categories()) = _upperBound
	43	MDOUBLE uniformDistribution::getBorder(int i) const {
	44	return (i == categories()) ? _upperBound : (_rates[i] - (_interval/2));
	45	}
	46
	47	const MDOUBLE uniformDistribution::getCumulativeProb(const MDOUBLE x) const
	48	{
	49	if (x<_lowerBound)
	50	return 0;
	51	else if (x>= _upperBound)
	52	return 1;
	53	else
	54	return ((x-_lowerBound) / (_upperBound - _lowerBound));
	55	}
	56
	57	void uniformDistribution::change_number_of_categories(int in_number_of_categories)
	58	{
	59	if (in_number_of_categories == categories())
	60	return;
	61	setUniformParameters(in_number_of_categories, _lowerBound, _upperBound);
	62	}
	63

+66

-0

libs/phylogeny/uniformDistribution.h less more

	0	// $Id: uniformDistribution.h 5807 2009-01-20 09:23:51Z adido $
	1
	2	// version 2.00
	3	// last modified 21 Mar 2004
	4	#ifndef ___FLAT_DIST
	5	#define ___FLAT_DIST
	6
	7	/************************************************************
	8	This represents a uniform distribution of one column (rectangular distribution) between
	9	a (lower_bound) and b (upper_bound)
	10
	11	\|---\|
	12	________\|___\|_____
	13	a b
	14	the distribution (or rather (a,b)) is divided into categories (portions of the distribution)
	15	, where _rates is a vector with the median value for each category. _ratesProb represents
	16	the probability of each category.
	17	_globalRate represents the rate for two joint genes.
	18	************************************************************/
	19
	20
	21	#include "definitions.h"
	22	#include "distribution.h"
	23
	24	class uniformDistribution : public distribution {
	25
	26	public:
	27	explicit uniformDistribution(const int numOfCategories, MDOUBLE lowerBound,
	28	MDOUBLE upperBound);
	29	explicit uniformDistribution(){_globalRate=1.0;};
	30	explicit uniformDistribution(const uniformDistribution& other);
	31
	32	virtual ~uniformDistribution() {};
	33
	34	const int categories() const {return _rates.size();}
	35	virtual void change_number_of_categories(int in_number_of_categories);
	36	virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
	37	virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
	38	virtual distribution* clone() const { return new uniformDistribution(*this); }
	39	virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
	40	virtual MDOUBLE getGlobalRate() const {return _globalRate;}
	41
	42	virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
	43	MDOUBLE getBorder(const int i) const ; //return the ith border. Note: _bonderi[0] = m_lowerLimit, _bondery[categories()] = m_upperLimit
	44
	45	void setUniformParameters(const int numOfCategories, MDOUBLE lowerBound, MDOUBLE upperBound);
	46
	47
	48
	49	private:
	50	Vdouble _rates;
	51	Vdouble _ratesProb;
	52	MDOUBLE _globalRate;
	53
	54	MDOUBLE _interval;
	55	MDOUBLE _upperBound;
	56	MDOUBLE _lowerBound;
	57	};
	58
	59
	60	#endif
	61
	62	//TO DO:
	63	//1. change categories() to numOfCategories()
	64
	65

+125

-0

libs/phylogeny/ussrvModel.cpp less more

	0	// $Id: ussrvModel.cpp 962 2006-11-07 15:13:34Z privmane $
	1	#include "ussrvModel.h"
	2
	3	ussrvModel::ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f)
	4	: _f(f),_baseSp(NULL),_ssrvSp(NULL)
	5	{
	6	_baseSp = new stochasticProcess(baseSp);
	7	_ssrvSp = new stochasticProcessSSRV(ssrvSp);
	8
	9	// get alpha from sp
	10	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel());
	11	_alpha = static_cast<gammaDistribution*>(pMulRM->getDistribution())->getAlpha();
	12
	13	// check that alpha is equal the baseSp alpha
	14	MDOUBLE baseSpAlpha = static_cast<gammaDistribution*>(baseSp.distr())->getAlpha();
	15	if (_alpha != baseSpAlpha)
	16	errorMsg::reportError("Error in the constructor of ussrvModel. alpha of the ssrv stochastic process is different from that of the base model");
	17	}
	18
	19	ussrvModel::~ussrvModel()
	20	{
	21	if (_baseSp) delete _baseSp;
	22	if (_ssrvSp) delete _ssrvSp;
	23	}
	24
	25	ussrvModel::ussrvModel(const ussrvModel& other)
	26	{
	27	_f = other._f;
	28	_baseSp = new stochasticProcess(*other._baseSp);
	29	_ssrvSp = new stochasticProcessSSRV(*other._ssrvSp);
	30	}
	31
	32	ussrvModel& ussrvModel::operator=(const ussrvModel& other)
	33	{
	34	if (_baseSp) delete _baseSp;
	35	if (_ssrvSp) delete _ssrvSp;
	36
	37	_f = other._f;
	38	_alpha = other._alpha;
	39
	40	_baseSp = new stochasticProcess(*other._baseSp);
	41	_ssrvSp = new stochasticProcessSSRV(*other._ssrvSp);
	42
	43	return *this;
	44	}
	45
	46	void ussrvModel::updateAlpha(const MDOUBLE& alpha)
	47	{
	48	_alpha = alpha;
	49	if (alpha<0)
	50	{
	51	LOG(4, << "ussrvModel::updateAlpha , alpha is < 0 " << endl);
	52	return;
	53	}
	54	// update alpha of the ssrv model
	55	replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel());
	56	gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
	57	gammaDist->setAlpha(alpha);
	58	pMulRM->updateQ();
	59
	60	// update alpha of the base model
	61	(static_cast<gammaDistribution*>(_baseSp->distr()))->setAlpha(alpha);
	62	}
	63
	64	void ussrvModel::updateNu(const MDOUBLE& nu)
	65	{
	66	if (nu<0)
	67	{
	68	LOG(4,<<"ussrvModel::updateNu , nu is < 0 " <<endl);
	69	return;
	70	}
	71	static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel())->setRateOfRate(nu);
	72	}
	73
	74	MDOUBLE ussrvModel::getNu() const
	75	{
	76	return (static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel())->getRateOfRate());
	77	}
	78
	79	void ussrvModel::updateF(const MDOUBLE& f)
	80	{
	81	if ((f<0) \|\| (f>1))
	82	{
	83	LOG(4,<<"ussrvModel::updateF , f must be between 0 to 1. f is: "<< f << endl);
	84	return;
	85	}
	86	_f=f;
	87	}
	88
	89	// In order for the branch lengths and the nu parameter to be meaningfull, one must normalize the
	90	// matrices of both the replacement models (the base model and the ssrv model)
	91	// so that fSigma[i](PiQij) + (1-f)Sigma[i](P`iQ`ij) = 1 (for i!=j)
	92	// where Q and P belong to the ssrv model, P` and Q` belong to the base model. (Q` doesn't include the rates)
	93	// The normalization doesn't affect the likelihood.
	94	// see below for more explanations.
	95	// Theoretically, we should therefore calculate this weighted sumPijQij (Denote by x), and then:
	96	// 1) devide nu by x.
	97	// 2) devide all the rates (of the base model and of the ssrv model) by x.
	98	// (this could be done using the _globalRate member of the gammaDistribution class)
	99	// 3) multiply every branch length by x.
	100	// Instead, we just report x, so that the user can do all this whenever he wishes to.
	101
	102	MDOUBLE ussrvModel::calcNormalizeFactor()
	103	{
	104	// calculate sumPijQij
	105	MDOUBLE sumPijQij = 0.0;
	106	int i;
	107	// of the base model
	108	int baseAlphabetSize = _baseSp->alphabetSize();
	109	for (i=0; i < baseAlphabetSize; ++i)
	110	sumPijQij-= _baseSp->freq(i) * _baseSp->dPij_dt(i,i,0);
	111	sumPijQij*=(1-_f);
	112
	113	// of the ssrv model
	114	sumPijQij+=_fstatic_cast<replacementModelSSRV>(_ssrvSp->getPijAccelerator()->getReplacementModel())->sumPijQij();
	115
	116	return sumPijQij;
	117	}
	118
	119	// This is not done when using normal sp (instead of ussrvModel), since:
	120	// average(rates)=1 -->
	121	// (for 2 categories, f=0.5, 1-f =0.5) 0.5r1Sigma[i](PiQij) + 0.5r2Sigma[i](PiQij) = 1 -->
	122	// (since (r1+r2)*0.5 = 1) Sigma[i](PiQij) = 1 . This is always true, and taken care of in the readMatrix
	123	// method.
	124

+41

-0

libs/phylogeny/ussrvModel.h less more

	0	// $Id: ussrvModel.h 962 2006-11-07 15:13:34Z privmane $
	1	#ifndef _USSRV_MODEL
	2	#define _USSRV_MODEL
	3
	4	#include "stochasticProcessSSRV.h"
	5	#include "stochasticProcess.h"
	6	#include "errorMsg.h"
	7	#include "gammaDistribution.h"
	8	#include "replacementModelSSRV.h"
	9	#include "logFile.h"
	10	class ussrvModel
	11	{
	12	public:
	13	explicit ussrvModel(){errorMsg::reportError("This constractor shold never be used");}
	14	explicit ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f);
	15	virtual ~ussrvModel();
	16	explicit ussrvModel(const ussrvModel& other);
	17	ussrvModel& operator=(const ussrvModel& other);
	18	// const int alphabetSize() const ;
	19	MDOUBLE getF() const {return _f;}
	20	MDOUBLE getAlpha() const {return _alpha;}
	21	MDOUBLE getNu() const ;
	22	const stochasticProcessSSRV& getSSRVmodel() const {return *_ssrvSp;}
	23	const stochasticProcess& getBaseModel() const {return *_baseSp;}
	24	int noOfCategor() const {return _baseSp->categories();}
	25	MDOUBLE getCategorProb(int i) const {return _baseSp->distr()->ratesProb(i);}
	26
	27	void updateF(const MDOUBLE& f);
	28	void updateAlpha(const MDOUBLE& alpha);
	29	void updateNu(const MDOUBLE& nu);
	30
	31	MDOUBLE calcNormalizeFactor(); // return the factor according to which the model should be normalized.
	32
	33	private:
	34	MDOUBLE _f; //probability of SSRV model. The probability of the base model, i.e. no SSRV, is 1-_f .
	35	MDOUBLE _alpha; // should be always equal to the _baseSp alpha and the _ssrvSp alpha.
	36	stochasticProcess* _baseSp; // for the base model
	37	stochasticProcessSSRV* _ssrvSp; // for the SSRV model
	38	};
	39
	40	#endif // _USSRV_MODEL

+96

-0

libs/phylogeny/wYangModel.cpp less more

	0	#include "wYangModel.h"
	1	#include "codon.h"
	2	#include "readDatMatrix.h" // for the normalizeQ function.
	3
	4	wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlph):
	5	_w(inW),_k(inK),_globalW(globalW),_coAlpha(NULL){
	6	_coAlpha = (codon*)(coAlph->clone());
	7	codonUtility::initSubMatrices(*_coAlpha);
	8	homogenousFreq();
	9	_Q.resize(alphabetSize());
	10	for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
	11	updateQ();
	12	}
	13
	14	wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon * coAlph):
	15	_w(inW),_k(inK),_globalW(globalW),_freq(freq),_coAlpha(NULL){
	16	_coAlpha = (codon*)(coAlph->clone());
	17	_Q.resize(alphabetSize());
	18	codonUtility::initSubMatrices(*_coAlpha);
	19	for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
	20	updateQ();
	21	}
	22
	23
	24	wYangModel& wYangModel::operator=(const wYangModel &other) {
	25	_w = other._w;
	26	_k = other._k;
	27	_q2pt = other._q2pt;
	28	_Q = other._Q;
	29	_globalW = other._globalW;
	30	_freq = other._freq;
	31	if (_coAlpha) delete _coAlpha;
	32	if (other._coAlpha)
	33	_coAlpha = (codon*)(other._coAlpha->clone());
	34	else
	35	_coAlpha = NULL;
	36	return *this;
	37
	38	}
	39
	40
	41
	42	void wYangModel::updateQ() {
	43	int i,j;
	44	MDOUBLE sum=0.0;
	45	for (i=0; i < _Q.size();++i) {
	46	for (j=i+1; j < _Q.size();++j) {
	47	MDOUBLE val;
	48	if (codonUtility::codonReplacement(i,j) == codonUtility::non_synonymous) {
	49	if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k*_w;
	50	else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = _w;
	51	else val = 0;//more than one substitution.
	52	}
	53	else {//synonymous
	54	if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k;
	55	else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = 1;
	56	else val = 0;//more than one substitution.
	57	}
	58	_Q[i][j] = val * _freq[j];
	59	_Q[j][i] = val * _freq[i];
	60	}
	61	_Q[i][i] = 0.0; //temporary value
	62	}
	63	// filling the diagonal
	64	for (i=0; i < _Q.size(); ++i){
	65	sum = 0.0;
	66	for (j=0; j < _Q.size(); ++j) {
	67	sum += _Q[i][j];
	68	}
	69	_Q[i][i] = -sum;
	70	}
	71	if (_globalW == true) // w is not distributed, only one Q matrix
	72	normalizeQ(_Q,_freq);
	73
	74	_q2pt.fillFromRateMatrix(_freq,_Q);
	75	}
	76
	77
	78	void wYangModel::norm(MDOUBLE scale){
	79	for (int i=0; i < _Q.size(); ++i) {
	80	for (int j=0; j < _Q.size(); ++j) {
	81	_Q[i][j] *=scale;
	82
	83	}
	84	}
	85	_q2pt.fillFromRateMatrix(_freq,_Q);
	86	}
	87
	88
	89	MDOUBLE wYangModel::sumPijQij(){
	90	MDOUBLE sum=0.0;
	91	for (int i=0; i < _Q.size(); ++i) {
	92	sum -= (_Q[i][i])*_freq[i];
	93	}
	94	return sum;
	95	}

+59

-0

libs/phylogeny/wYangModel.h less more

	0	#ifndef _W_YANG_MODEL
	1	#define _W_YANG_MODEL
	2
	3	#include "replacementModel.h"
	4	#include "fromQtoPt.h"
	5	#include "codon.h"
	6
	7
	8	class wYangModel : public replacementModel {
	9	public:
	10	explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlpha);
	11	explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon *coAlpha);
	12	explicit wYangModel(const wYangModel &other): _coAlpha(NULL) {(*this) = other;}
	13	virtual wYangModel& operator=(const wYangModel &other);
	14	virtual wYangModel* clone() const { return new wYangModel(*this); }
	15	virtual ~wYangModel() {
	16	if (_coAlpha)
	17	delete _coAlpha;
	18	}
	19
	20	const int alphabetSize() const {return _freq.size();}
	21	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
	22	return _q2pt.Pij_t(i,j,d);
	23	}
	24	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	25	return _q2pt.dPij_dt(i,j,d);
	26	}
	27	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	28	return _q2pt.d2Pij_dt2(i,j,d);
	29	}
	30	const MDOUBLE freq(const int i) const {return _freq[i];};
	31	void setK(const MDOUBLE newK) { _k = newK; updateQ();}
	32	void setW(const MDOUBLE newW) { _w = newW;updateQ();}
	33	void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(alphabetSize(),1.0/alphabetSize());}
	34
	35	MDOUBLE getK() const {return _k;}
	36	MDOUBLE getW() const {return _w;}
	37
	38	MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];}
	39	void setGlobalW(bool globalW){_globalW = globalW;}
	40	void norm(MDOUBLE scale);
	41	MDOUBLE sumPijQij();
	42	private:
	43	void updateQ();
	44
	45
	46	private:
	47
	48	MDOUBLE _w; //selection factor.
	49	MDOUBLE _k; // Tr/Tv ratio.
	50	q2pt _q2pt;
	51	VVdouble _Q;
	52	bool _globalW; //false when compute w per site
	53	Vdouble _freq;
	54	codon *_coAlpha;
	55	};
	56
	57
	58	#endif

+42

-0

libs/phylogeny/wag.dat.q less more

	0	" "
	1	" 0.551571 "
	2	" 0.509848 0.635346 "
	3	" 0.738998 0.147304 5.429420 "
	4	" 1.027040 0.528191 0.265256 0.0302949 "
	5	" 0.908598 3.035500 1.543640 0.616783 0.0988179 "
	6	" 1.582850 0.439157 0.947198 6.174160 0.021352 5.469470 "
	7	" 1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717 "
	8	" 0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410 "
	9	" 0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190 "
	10	" 0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970 "
	11	" 0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555 "
	12	" 0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276 "
	13	" 0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630 "
	14	" 1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444 "
	15	" 3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280 "
	16	" 2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020 "
	17	" 0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864 "
	18	" 0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390 "
	19	" 2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730 "
	20	" 0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956 "
	21	" A R N D C Q E G H I L K M F P S T W Y V "
	22	" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
	23	" "
	24	" Symmetrical part of the rate matrix and aa frequencies, "
	25	" estimated from 3905 globular protein amino acid sequences forming 182 "
	26	" protein families. "
	27	" The first part above indicates the symmetric 'exchangeability' "
	28	" parameters, where s_ij = s_ji. The s_ij above are not scaled, but the "
	29	" PAML package will perform this scaling. "
	30	" The second part gives the amino acid frequencies (pi_i) "
	31	" estimated from the 3905 sequences. The net replacement rate from i to "
	32	" j is Q_ij = s_ij*pi_j. "
	33	" Prepared by Simon Whelan and Nick Goldman, September 2000. "
	34	" Citation: "
	35	" Whelan, S. and N. Goldman. In press. A general empirical model of "
	36	" protein evolution derived from multiple protein families using "
	37	" a maximum likelihood approach. Molecular Biology and "
	38	" Evolution. "
	39	" See the following reference for notation used here: "
	40	" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
	41	" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "

+36

-0

programs/Makefile less more

	0	# $Id: Makefile 11987 2014-01-30 10:23:04Z haim $
	1
	2	# this split is vital becouse of a bug in make 3.80.1 - see
	3	# http://www.cygwin.com/ml/cygwin/2004-09/msg01659.html
	4
	5	PROGRAMS1= fastml gainLoss
	6	PROGRAMS2= indelCoder
	7	PROGRAMS = $(PROGRAMS1) $(PROGRAMS2)
	8
	9	# all has to be the FIRST task!
	10	TASKS= all clean test depend debug All install doubleRep
	11	.PHONY: $(TASKS) $(PROGRAMS)
	12
	13	define TASKS_template1
	14	$(1): $$(addsuffix .$(1),$(PROGRAMS1))
	15	endef
	16
	17	define TASKS_template2
	18	$(1): $$(addsuffix .$(1),$(PROGRAMS2))
	19	endef
	20
	21	$(foreach task,$(TASKS),$(eval $(call TASKS_template1,$(task))))
	22	$(foreach task,$(TASKS),$(eval $(call TASKS_template2,$(task))))
	23
	24	define PROGRAM_template
	25	$(1).%:
	26	+cd $(1) && make $$(*)
	27	endef
	28
	29	$(foreach prog,$(PROGRAMS),$(eval $(call PROGRAM_template,$(prog))))
	30
	31
	32
	33	$(PROGRAMS):
	34	+cd $@ && make
	35

+244

-0

programs/Makefile.generic less more

	0	# this looks better in -- Makefile -- mode
	1	# $Id: Makefile.generic 11979 2014-01-30 09:48:52Z haim $
	2
	3	DEBUGEXEC = $(EXEC:=.debug)
	4
	5
	6	#TEST_EXEC_SUB =
	7	TEST_EXEC = $(addprefix tests/,$(TEST_EXEC_SUB))
	8
	9	ifdef LIBNAME
	10	ifneq ($(LIBNAME),"")
	11	LIB = lib$(LIBNAME).a
	12	endif
	13	endif
	14	DEBUGLIB = $(LIB:.a=Debug.a)
	15	DOUBLEREPLIB = $(LIB:.a=DoubleRep.a)
	16
	17	all: lib $(EXEC)
	18
	19	#CC=g++
	20	CXX=g++
	21	CC=$(CXX)
	22
	23	libDir=../../libs/phylogeny
	24	binDir=../../bin
	25
	26	ifndef libEvol
	27	libEvol=$(libDir)/libEvolTree.a
	28	#libEvol=-lEvolTree
	29	libEvolDebug=$(libDir)/libEvolTreeDebug.a
	30	libEvolDoubleRep=$(libDir)/libEvolTreedoubleRep.a
	31	endif
	32
	33	vpath % $(libDir)
	34
	35
	36	#CPPFLAGS+= -I/usr/include/g++-v3
	37
	38	LDFLAGS += -L$(libDir)
	39
	40	#LDLIBS = -lEvolTree
	41	#debug: LDLIBS = -lEvolTreeDebug
	42	# LOADLIBES = $(LIB)
	43
	44	#LDFLAGS=
	45	#CPPFLAGS+= -DLOG -DLOGCLS -DMEMCHK
	46
	47
	48	#GENGETOPT=/cs/++/phd/ninio/gengetopt-2.11/src/gengetopt
	49	#GENGETOPT = /opt/local/bin/gengetopt
	50	#GENGETOPT = ~privmane/code/gengetopt
	51	GENGETOPT = gengetopt
	52
	53	.SECONDARY: $(addsuffix _cmdline.c,$(EXEC)) $(addsuffix _cmdline.h,$(EXEC)) $(addsuffix .ggo,$(EXEC))
	54
	55	CPPFLAGS= -O3 -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG -ftemplate-depth-32
	56	CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG -ftemplate-depth-32
	57
	58	LDFLAGSDEBUG := $(LDFLAGS) -g
	59	# sources
	60	sources= $(Libsources) $(LibCsources) $(addsuffix .cpp,$(EXEC) $(TEST_EXEC))
	61
	62	.PHONY: tests lib test debug %.debug DOUBLEREP doubleRep
	63
	64	ifdef DOUBLEREP
	65	CPPFLAGS+= -DDOUBLEREP
	66	CPPFLAGSDEBUG += -DDOUBLEREP
	67	LDFLAGSDEBUG += -DDOUBLEREP
	68	endif
	69
	70	test: all tests
	71	+cd tests && make -k
	72
	73	debug: $(DEBUGLIB) $(DEBUGEXEC)
	74
	75	debug: CPPFLAGS = $(CPPFLAGSDEBUG)
	76	#debug: LDLIBS = -lEvolTreeDebug
	77	debug: LIB = $(DEBUGLIB)
	78	# debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG
	79	# debug: all
	80
	81
	82
	83
	84	#$(libEvol) le:
	85	# +cd $(libDir);make -f Makefile all
	86
	87	#$(libEvolDebug):
	88	# +cd $(libDir);make -f Makefile debug
	89
	90	lib: $(LIB)
	91
	92	#lib$(LIBNAME).a: lib$(LIBNAME).a($(Libsources:.cpp=.o) $(LibCsources:.c=.o))
	93	lib$(LIBNAME).a: $(Libsources:.cpp=.o) $(LibCsources:.c=.o)
	94	ar rv $@ $?
	95	ranlib $@
	96
	97	tags: .cpp .h
	98	etags --members --language=c++ $^
	99	EVOLLIB=-lEvolTree
	100	libEvolDebug=-lEvolTreeDebug
	101	libEvolDoubleRep=-lEvolTreeDoubleRep
	102
	103	debug: EVOLLIB=$(libEvolDebug)
	104
	105	ifdef LIBNAME
	106	# LocalLib = -l$(LIBNAME)
	107	LocalLib = lib$(LIBNAME).a
	108	endif
	109
	110	#$(EXEC): LDLIBS += $(EVOLLIB)
	111	#$(EXEC) $(TEST_EXEC): $(LIB) #$(EVOLLIB)
	112	#$(EXEC) $(TEST_EXEC): $(LIB) $(EVOLLIB)
	113	$(EXEC) $(TEST_EXEC): $(LocalLib) $(libEvol)
	114	$(DEBUGEXEC) $(TEST_EXEC): $(DEBUGLIB) $(libEvolDebug)
	115
	116	tests: $(TEST_EXEC) $(EXEC)
	117
	118	-include make.dep
	119
	120	install: $(addprefix $(binDir)/,$(EXEC))
	121	$(binDir)/%: %
	122	cp $< $@
	123
	124
	125
	126	clean:
	127	-rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) $(DEBUGEXEC) $(DOUBLEREPEXEC) *.o
	128
	129
	130	ifneq ($(wildcard make.dep), make.dep)
	131	make.dep: depend
	132	endif
	133
	134
	135	depend makedep: _make.dep
	136	@mv -f _make.dep make.dep
	137
	138	_make.dep: $(sources)
	139	@echo making depend
	140	# $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ \| sed '\''s/$$$\.o[ :]/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] \|\| rm -f $@'
	141	# @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ > $@'
	142	@$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ \| sed "s/$^[^.]*$\.o/\1.o \1.debug.o/g" > $@'
	143
	144	_fast:
	145	+cd fast && make -k all
	146
	147	fast.% _fast.%:
	148	+cd fast && make -k $(*)
	149
	150	$(libEvol):
	151	+cd $(libDir)&&make -f Makefile all
	152
	153	$(libEvolDebug):
	154	+cd $(libDir)&&make -f Makefile debug
	155
	156	define ggo_template
	157	ifeq ($(wildcard $(1).ggo), $(1).ggo)
	158	$(1): $(1)_cmdline.o
	159	endif
	160	endef
	161
	162	$(foreach exec,$(EXEC),$(eval $(call ggo_template,$(exec))))
	163
	164	#$(EXEC): $(addsuffix _cmdline.o,$(EXEC))
	165
	166	define ggo_template_debug
	167	$(1).debug: $(1)_cmdline.debug.o
	168	endef
	169
	170	$(foreach exec,$(EXEC),$(eval $(call ggo_template_debug,$(exec))))
	171
	172	define ggo_template_doublerep
	173	ifeq ($(wildcard $(1).ggo), $(1).ggo)
	174	$(1).doubleRep: $(1)_cmdline.o
	175	endif
	176	endef
	177
	178	$(foreach exec,$(EXEC),$(eval $(call ggo_template_doublerep,$(exec))))
	179
	180	#$(addsuffix .debug,$(EXEC)): $(addsuffix _cmdline.debug.o,$(EXEC))
	181
	182	%.ggo: %.args $(libDir)/evolObjs.args
	183	cat $^ > $@
	184
	185
	186	# commandline (gengetopts)
	187	%_cmdline.h %_cmdline.c: %.ggo
	188	$(GENGETOPT) -i$< -F$(*)_cmdline
	189
	190
	191	debug: CPPFLAGS = $(CPPFLAGSDEBUG)
	192	debug: $(addsuffix .debug,$(EXEC))
	193	#$(addsuffix .debug,$(EXEC)): $(libEvolDebug)
	194	pl:
	195	echo $(LIB)
	196
	197
	198	%.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -I../.. -DLOG -ftemplate-depth-25
	199
	200	%.debug: %.o
	201
	202
	203
	204	#debug: LDLIBS = -lEvolTreeDebug
	205	debug: LIB = $(DEBUGLIB)
	206
	207	%.debug: CPPFLAGS = $(CPPFLAGSDEBUG)
	208	%.debug: LDFLAGS = $(LDFLAGSDEBUG)
	209	#%.debug: %
	210	# @echo "made \""$(*)"\" in debug mode"
	211
	212
	213	%.debug.o: %.c
	214	$(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@
	215
	216	%.debug.o: %.cpp
	217	$(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@
	218
	219	#$(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
	220
	221	lib$(LIBNAME)Debug.a: $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
	222	ar rv $@ $?
	223	ranlib $@
	224
	225	DOUBLEREPEXEC = $(EXEC:=.doubleRep)
	226
	227	doubleRep: LOGREP=t
	228	doubleRep: CPPFLAGS+= -DLOGREP
	229	doubleRep: $(DOUBLEREPLIB) $(DOUBLEREPEXEC)
	230	# echo $@
	231	$(DOUBLEREPEXEC): $(DOUBLEREPLIB) $(libEvolDoubleRep)
	232
	233	%.doubleRep.o: %.c
	234	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
	235
	236	%.doubleRep.o: %.cpp
	237	$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
	238
	239	$(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o)
	240	ar rv $@ $?
	241	ranlib $@
	242
	243	# DO NOT DELETE

+18

-0

programs/fastml/Makefile less more

	0	#! /usr/local/bin/gmake
	1	# $Id: Makefile 1215 2006-11-28 15:53:23Z osnatz $
	2
	3	# In order to compile with doubleRep run make like this: make doubleRep
	4
	5	Libsources= fastml.cpp bbAlg.cpp bbComputeDownAlg.cpp bbComputeUpAlg.cpp bbEvaluateSpecificAV.cpp bbfindBestAVDynProg.cpp bbNodeOrderAlg.cpp bb_options.cpp bbReport.cpp computeMarginalReconstruction.cpp jointNoGamma.cpp mainbb.cpp sequenceDataDiff.cpp suffStatComponentJointNoGamma.cpp
	6
	7	#Libsources=
	8	LIBNAME = fastml
	9
	10	# LibCsources= cmdline.c
	11	# LibCsources += getopt.c getopt1.c
	12
	13	EXEC = fastml
	14
	15
	16
	17	include ../Makefile.generic

+276

-0

programs/fastml/bbAlg.cpp less more

	0	#include "bbAlg.h"
	1	#include "computeUpAlg.h"
	2	#include "likelihoodComputation.h"
	3	#include "maseFormat.h"
	4	#include <cmath>
	5
	6	bbAlg::bbAlg(const tree& et,
	7	vector<stochasticProcess> &spVec,
	8	const sequenceContainer& sc,
	9	const bbAlg::boundMethod boundType,
	10	const string& reportFileName,
	11	const MDOUBLE computeAgainExactTreshold,
	12	const distribution * forceDistr) :
	13	_reportFileName(reportFileName),
	14	BandBReportAllPos1(reportFileName,et.getInternalNodesNum()spVec[0].alphabetSize()sc.seqLen()),
	15	_et(et), _spVec(spVec), _sc(sc)
	16	{
	17	cout<<"in bbAlg"<<endl;
	18	_boundMethod = boundType;
	19	_alphabetSize=_spVec[0].alphabetSize();
	20	_seqLen=_sc.seqLen();
	21	if (_spVec.size()>1) {//w codon model + gamma special case
	22	_cpij._V.resize(forceDistr->categories());
	23	for (int i=0; i < _spVec.size(); ++i)
	24	_cpij._V[i].fillPij(_et,_spVec[i]);
	25	_spVec[0].setDistribution(forceDistr);//update the first process with gamma distr
	26	//for all the functions that needs number catregor and categor probabilty
	27	}
	28	else{
	29	cout<<"no codon model"<<endl;
	30	_cpij.fillPij(_et,_spVec[0]);
	31	}
	32
	33	_bbesavp1 = new bbEvaluateSpecificAV(_et,_spVec[0],_sc,_cpij);
	34
	35	_bbNodeOrderAlg1 = new bbNodeOrderAlg(_et,_spVec[0],_sc,_cpij,computeAgainExactTreshold);
	36	cout<<"after bbNodeOrderAlg"<<endl;
	37	_bbfindBestAVDynProg1 = new bbfindBestAVDynProg(&_et,&_spVec[0],_sc,&_cpij);
	38	cout<<"after bbfindBestAVDynProg"<<endl;
	39	sequence tmp(_sc.getAlphabet());
	40	const int startingVal = -2;
	41	tmp.resize(_seqLen,&startingVal);
	42	cout<<"after resize"<<endl;
	43	_internalSequences.resize(_et.getNodesNum(),tmp);
	44	cout<<"after _internalSequences resize"<<endl;
	45	_bestReconstruction.resize(_et.getNodesNum(),tmp);
	46	cout<<"afetr _bestReconstruction resize"<<endl;
	47
	48	}
	49
	50	void bbAlg::outputTheJointProbAtEachSite(const string & outputFileProbJoint) {
	51	ofstream jointProbOutput(outputFileProbJoint.c_str());
	52	MDOUBLE totalLogLikelihood =0;
	53	for (int j=0; j < _jointL.size(); ++j) {
	54	totalLogLikelihood+=log(_jointL[j]);
	55	jointProbOutput<<"Joint log likelihood of position "<<j+1;// j+1 so that positions start from 1, and not from 0.
	56	jointProbOutput<<": "<<log(_jointL[j])<<endl;
	57	}
	58	jointProbOutput<<"total log likelihood of joint reconstruction: "<<totalLogLikelihood<<endl;
	59
	60	jointProbOutput<<endl<<"++++++++++++++++++++++++ joing log likelihood +++++++++++++++++++++++++++++++"<<endl<<endl;
	61	for (int j=0; j < _jointL.size(); ++j) {
	62	jointProbOutput<<j+1<<",";// j+1 so that positions start from 1, and not from 0.
	63	jointProbOutput<<log(_jointL[j])<<endl;
	64	}
	65
	66	jointProbOutput<<endl<<"++++++++++++++++++++++++ joint probs +++++++++++++++++++++++++++++++"<<endl<<endl;
	67	for (int j=0; j < _jointL.size(); ++j) {
	68	jointProbOutput<<j+1<<",";// j+1 so that positions start from 1, and not from 0.
	69	jointProbOutput<<_jointL[j]<<endl;
	70	}
	71
	72	jointProbOutput.close();
	73	}
	74
	75	MDOUBLE bbAlg::bbReconstructAllPositions(sequenceContainer& res){
	76	cout<<"in bbAlg::bbReconstructAllPositions"<<endl;
	77	int alphabetSize = _spVec[0].alphabetSize();
	78	MDOUBLE sumLogLikelihood=0;
	79	computePijGam cpij;
	80	cout<<"Gamma model. Branch and Bound.\nReconstructing position: ";
	81	_jointL.clear();
	82	for (int i=0 ; i < _seqLen ; ++i) {
	83	fillProbOfPosition(i);
	84	_bbReport = new BandBReport(_reportFileName,i,_spVec[0].alphabetSize());
	85	cout<<"bbl report size = "<<_bbReport->size()<<endl;
	86	MDOUBLE tmp = bbReconstructPositions(i);
	87	_jointL.push_back(tmp);
	88	cout<<"tmp = "<<tmp<<endl;
	89	assert(tmp>0);
	90	sumLogLikelihood+=log(tmp);
	91	if (_reportFileName!="") {
	92	if (_bbReport->size()>alphabetSize*_et.getInternalNodesNum()) {
	93	cout<<_bbReport->size()<<_et.getInternalNodesNum()<<endl;
	94	_bbReport->makeReport();
	95	} else if (_bbReport->size()<alphabetSize*_et.getInternalNodesNum()) {
	96	cout<<_bbReport->size()<<_et.getInternalNodesNum()<<endl;
	97	errorMsg::reportError("error in function bbReconstructAllPositions");
	98	}
	99	BandBReportAllPos1.totalNumberOfNodeVisited += _bbReport->size();
	100	}
	101	delete _bbReport;
	102	}
	103	res = fromAncestralSequenceToSeqData(); // returning the ancestral sequences
	104	BandBReportAllPos1.printReport();
	105	return sumLogLikelihood;
	106	}
	107
	108	MDOUBLE bbAlg::bbReconstructPositions(const int pos){
	109	_bestRecord=0;
	110	return bbReconstructPositions(pos,1); // 1 - start the first node in the search tree.
	111
	112	}
	113
	114	MDOUBLE bbAlg::bbReconstructPositions(const int pos,
	115	const int nodeNum) {
	116	tree::nodeP node2check=NULL;
	117	vector<int> charOrder;
	118	doubleRep exactVal=0;
	119	if (nodeNum == 1) {
	120	_bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check,
	121	charOrder,
	122	_internalSequences,
	123	pos,
	124	true,
	125	exactVal);
	126	}
	127	else {
	128	_bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check,
	129	charOrder,
	130	_internalSequences,
	131	pos,
	132	false,
	133	exactVal);
	134	}
	135	int k;
	136	for (k = 0; k < charOrder.size(); k++) {
	137	_internalSequences[node2check->id()][pos] = charOrder[k];
	138	bool haveToGoDown=false;
	139	if (nodeNum<_et.getInternalNodesNum()) {
	140	MDOUBLE boundSigma,boundMax;
	141	haveToGoDown =decideIfHaveToGoDown(pos,boundSigma,boundMax);
	142	_bbReport->report( node2check->name(),
	143	charOrder[k],
	144	nodeNum,
	145	_bestRecord/_pOfPos,
	146	0.00,
	147	boundSigma/_pOfPos,
	148	boundMax/_pOfPos);
	149	};
	150	if (haveToGoDown == true) {
	151	bbReconstructPositions(pos,(nodeNum+1));
	152	}
	153
	154
	155	if (nodeNum==_et.getInternalNodesNum()) {
	156	MDOUBLE tmp = _bbesavp1->evaluateSpecificAv(pos,&_internalSequences);
	157	if (tmp > _bestRecord) {
	158	vector<tree::nodeP> allNodes;
	159	_et.getAllHTUs(allNodes,_et.getRoot());
	160	for (int j = 0 ; j < allNodes.size(); j++) {
	161	_bestReconstruction[allNodes[j]->id()][pos]=_internalSequences[allNodes[j]->id()][pos];
	162	}
	163	_bestRecord = tmp;
	164	}
	165	_bbReport->report( node2check->name(),
	166	charOrder[k],
	167	nodeNum,
	168	_bestRecord/_pOfPos,
	169	tmp/_pOfPos,
	170	0.0,
	171	0.0);
	172	}
	173	}
	174
	175	_internalSequences[node2check->id()][pos] = -2;
	176	_bbNodeOrderAlg1->putBack(node2check,exactVal);
	177	return _bestRecord;
	178	}
	179
	180
	181
	182	bbAlg::~bbAlg() { delete _bbNodeOrderAlg1;
	183	delete _bbesavp1;
	184	delete _bbfindBestAVDynProg1;}
	185
	186	void bbAlg::fillProbOfPosition(const int pos) {
	187
	188	_pOfPos = likelihoodComputation::getLofPos(pos,_et,_sc,_cpij,_spVec[0]);
	189	}
	190
	191
	192
	193	sequenceContainer bbAlg::fromAncestralSequenceToSeqData() {
	194	int j=0;
	195	sequenceContainer sD;
	196	for (j=0; j < _sc.numberOfSeqs(); ++j) {
	197	sD.add(_sc[j]);
	198	}
	199	vector<tree::nodeP> HTUs;
	200	_et.getAllHTUs(HTUs,_et.getRoot());
	201	for (j=0; j < HTUs.size(); ++j) {
	202	sequence tmpSeq(_sc.getAlphabet());
	203	for (int pos=0; pos<_seqLen;++pos) {
	204	tmpSeq.push_back(_bestReconstruction[HTUs[j]->id()][pos]);
	205	}
	206	tmpSeq.setID(sD.numberOfSeqs());
	207	tmpSeq.setName(HTUs[j]->name());
	208	sD.add(tmpSeq);
	209	}
	210	return sD;
	211	}
	212
	213
	214
	215
	216
	217	bool bbAlg::decideIfHaveToGoDown(const int pos,
	218	MDOUBLE& boundSigma,
	219	MDOUBLE& boundMax) const {
	220	//---------------------------------------------------------------------
	221	// checkBoundSigma and checkBoundMax return true, if we have to go down
	222	// in the search tree. This is also the ouput of this function.
	223	// i.e., the bound is always an upper bound on the results.
	224	// it is compared with the best score so far, i.e., the lower bound,
	225	// and if the upperbound<lowerbound that there is no need going down.
	226	// When the two bounds are used,
	227	// it is enough that one is false to indicate no need to go down.
	228	//---------------------------------------------------------------------
	229
	230	bool acor1 = false;
	231	bool acor2 = false;
	232	switch (_boundMethod) {
	233	case max: return checkBoundMax(pos,boundMax);
	234	break;
	235	case sum: return checkBoundSigma(pos,boundSigma);
	236	break;
	237	case both:
	238	acor1 = checkBoundSigma(pos,boundSigma);
	239	acor2 = checkBoundMax(pos,boundMax);
	240
	241	// if ((acor1 == true) && (acor2 == false)) {
	242	// cerr<<"max is better"<<endl;
	243	// } else if ((acor2 == true) && (acor1 == false)) {
	244	// cerr<<"sum is better"<<endl;
	245	// }
	246	return (acor1 && acor2);
	247	break;
	248	default: errorMsg::reportError("Error in function decideIfHaveToGoDown");
	249	}
	250
	251	errorMsg::reportError("Error in function decideIfHaveToGoDown");
	252	return true;
	253	}
	254
	255	bool bbAlg::checkBoundSigma(const int pos,
	256	MDOUBLE& inBoundSigma) const {
	257	inBoundSigma = _bbesavp1->evaluateSpecificAv(pos,&_internalSequences);
	258	if (inBoundSigma < _bestRecord) return false;
	259	else return true;
	260	}
	261
	262	bool bbAlg::checkBoundMax(const int pos, MDOUBLE& inboundMax) const {
	263	// to make
	264	inboundMax = 0.0;
	265	// MDOUBLE rate;
	266	for (int rateCategor=0; rateCategor < _spVec[0].categories(); rateCategor++) {
	267	inboundMax+= (
	268	_bbfindBestAVDynProg1->evaluateSpecificAvDP(pos,&_internalSequences,rateCategor)*
	269	_spVec[0].ratesProb(rateCategor));
	270	}
	271	if (inboundMax < _bestRecord) return false;
	272	else return true;
	273	}
	274
	275

+67

-0

programs/fastml/bbAlg.h less more

	0	#if !defined ___BB__ALG__
	1	#define ___BB__ALG__
	2
	3	#include "computePijComponent.h"
	4	#include "bbNodeOrderAlg.h"
	5	#include "bbEvaluateSpecificAV.h"
	6	#include "bbfindBestAVDynProg.h"
	7	#include "bbReport.h"
	8	#include "sequenceContainer.h"
	9	#include "stochasticProcess.h"
	10	#include "distribution.h"
	11
	12	class bbAlg {
	13	public:
	14	enum boundMethod {max,sum,both};
	15	explicit bbAlg( const tree& et,
	16	vector<stochasticProcess> &spVec,
	17	const sequenceContainer &sc,
	18	const boundMethod boundType,
	19	const string& reportFileName,
	20	const MDOUBLE computeAgainExactTreshold,
	21	const distribution * forceDistr);
	22	virtual ~bbAlg();
	23	MDOUBLE bbReconstructAllPositions(sequenceContainer& res);
	24	sequenceContainer fromAncestralSequenceToSeqData();
	25	void outputTheJointProbAtEachSite(const string & outputFileProbJoint);
	26
	27	private:
	28	const tree& _et;
	29	vector<stochasticProcess> &_spVec;
	30	const sequenceContainer& _sc;
	31	bbEvaluateSpecificAV* _bbesavp1;
	32	computePijGam _cpij;
	33	bbNodeOrderAlg* _bbNodeOrderAlg1;
	34	bbfindBestAVDynProg* _bbfindBestAVDynProg1;
	35
	36	boundMethod _boundMethod;
	37
	38	int _alphabetSize;
	39	int _seqLen;
	40	MDOUBLE _bestRecord; // for 1 position. =0 when new pos is started...
	41	Vdouble _jointL; // the likelihood of the reconstruction, per position.
	42	void fillProbOfPosition(const int pos);
	43	MDOUBLE bbReconstructPositions(const int pos);
	44	MDOUBLE bbReconstructPositions(const int pos, const int nodeNum);
	45
	46	vector<sequence> _bestReconstruction; // the sequences (nodes * seqLen)
	47	vector<sequence> _internalSequences; // the sequences (nodes * seqLen)
	48
	49	bool decideIfHaveToGoDown(const int pos,
	50	MDOUBLE& boundSigma,
	51	MDOUBLE& boundMax) const;
	52	bool checkBoundSigma(const int pos,
	53	MDOUBLE& inBoundSigma) const;
	54	bool checkBoundMax(const int pos, MDOUBLE& inboundMax) const;
	55
	56
	57	// reporting:
	58	BandBReport* _bbReport; // report per position.
	59	BandBReportAllPos BandBReportAllPos1; // report for all positions.
	60	const string& _reportFileName;
	61	doubleRep _pOfPos;
	62
	63	};
	64
	65
	66	#endif

+191

-0

programs/fastml/bbComputeDownAlg.cpp less more

	0	#include "bbComputeDownAlg.h"
	1	#include "seqContainerTreeMap.h"
	2
	3	void BBfillComputeDown(const tree& et,
	4	const sequenceContainer& sc,
	5	const int pos,
	6	const computePijHom& pi,
	7	suffStatGlobalHomPos& ssc,
	8	const suffStatGlobalHomPos& cup,
	9	const vector<sequence>& ancS){
	10	ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize());
	11	treeIterTopDownConst tIt(et);
	12	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	13	int letter,letterInFather,bro,letterInSon;
	14	if (mynode->father()==NULL) {// if root
	15	for(letter=0; letter<pi.alphabetSize();letter++) {
	16	ssc.set(mynode->id(),letter,1.0);
	17	}
	18	mynode = tIt.next(); //continue
	19	}
	20	tree::nodeP fatherNode=mynode->father();
	21	const int n_bro=fatherNode->getNumberOfSons();
	22	for(letter=0; letter<pi.alphabetSize();letter++) {
	23	if ((ancS[mynode->father()->id()][pos]!=-2)&&(ancS[mynode->father()->id()][pos]!=letter)){
	24	ssc.set(mynode->id(),letter,0);
	25	continue;
	26	} // this if takes care of internal node assignments...
	27
	28	doubleRep totalProb=1.0;
	29	doubleRep fatherTerm=0;
	30	if (fatherNode->father()!=NULL) {
	31	for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)
	32	fatherTerm += pi.getPij(fatherNode->id(),letter,letterInFather)*
	33	ssc.get(fatherNode->id(),letterInFather);
	34	}
	35	else {
	36	fatherTerm=1.0;
	37	}
	38	doubleRep brotherTerm=1.0;
	39	for(bro = 0; bro < n_bro; bro++) {
	40	tree::nodeP brother = fatherNode->getSon(bro);
	41	if (brother != mynode) {
	42	doubleRep tmp_bro=0.0;
	43	for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
	44	tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
	45	cup.get(brother->id(),letterInSon);
	46	}
	47	brotherTerm *=tmp_bro;
	48	}
	49	}
	50	totalProb = fatherTerm * brotherTerm;
	51	ssc.set(mynode->id(),letter,totalProb);
	52	}
	53	}
	54	}
	55	/*
	56	const evolTree* bbComputeDownAlg::_et=NULL;
	57	const stochasticProcess* bbComputeDownAlg::_sp=NULL;
	58	const suffStatComponent* bbComputeDownAlg::_cup=NULL;
	59	const computePij* bbComputeDownAlg::_cpij=NULL;
	60	suffStatComponent* bbComputeDownAlg::_ssc=NULL;
	61	const vector<sequence>* bbComputeDownAlg::_ancS = NULL;
	62
	63	void bbComputeDownAlg::bbFillComputeDown(const evolTree* et,
	64	const stochasticProcess* sp,
	65	const suffStatComponent* cup,
	66	const computePij* cpij,
	67	suffStatComponent* ssc,
	68	vector<sequence>* ancS) {
	69
	70
	71	_et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS;
	72	_ssc->resize(et->iNodes());
	73	if (_ssc->size()>0)
	74	if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)...
	75	for (vector<suffStatComponent::suffStatComponentCell>::iterator it=ssc->_suffCellVec.begin();
	76	it !=ssc->_suffCellVec.end();++it) {
	77	it->allocatePlace(_et->seqLen(),
	78	_sp->categories(),_et->alphabetSize());
	79	}
	80	}
	81	recursiveFillDown(_et->iRoot());
	82	}
	83
	84	void bbComputeDownAlg::bbFillComputeDownForOnePos(const evolTree* et,
	85	const stochasticProcess* sp,
	86	const suffStatComponent* cup,
	87	const computePij* cpij,
	88	suffStatComponent* ssc,
	89	vector<sequence>* ancS,
	90	const int pos) {
	91
	92
	93	_et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS;
	94	_ssc->resize(et->iNodes());
	95	if (_ssc->size()>0)
	96	if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)...
	97	for (vector<suffStatComponent::suffStatComponentCell>::iterator it=ssc->_suffCellVec.begin();
	98	it !=ssc->_suffCellVec.end();++it) {
	99	it->allocatePlace(_et->seqLen(),
	100	_sp->categories(),_et->alphabetSize());
	101	}
	102	}
	103	recursiveFillDownPos(_et->iRoot(),pos);
	104	}
	105
	106	void bbComputeDownAlg::recursiveFillDownPos(const evolTree::NodeP& mynode,
	107	const int pos) {
	108	fillDownNodePos(mynode,pos);
	109	for (vector<evolTree::nodeP>::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) {
	110	recursiveFillDownPos(*i,pos);
	111	}
	112	}
	113
	114	void bbComputeDownAlg::recursiveFillDown(const evolTree::NodeP& mynode) {
	115	fillDownNode(mynode);
	116	for (vector<evolTree::nodeP>::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) {
	117	recursiveFillDown(*i);
	118	}
	119	}
	120
	121	void bbComputeDownAlg::fillDownNode(
	122	const evolTree::NodeP& mynode) {
	123	for(int pos=0; pos<_et->seqLen();pos++) fillDownNodePos(mynode,pos);
	124	}
	125
	126	void bbComputeDownAlg::fillDownNodePos(
	127	const evolTree::NodeP& mynode,
	128	const int pos) {
	129
	130	int rateCategor,letter,letter_in_father,bro,letter_in_son;
	131	if (mynode->father==NULL) {// if root
	132	for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) {
	133	for(letter=0; letter<_et->alphabetSize();letter++) {
	134	(*_ssc)[mynode->id()].set(pos,rateCategor,letter,1.0);
	135	}
	136	}
	137	return;
	138	}
	139	for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) {
	140	evolTree::NodeP father_node=mynode->father;
	141	const int n_bro=father_node->sons.size();
	142	for(letter=0; letter<_et->alphabetSize();letter++) {//alpha
	143	assert(_ancS != NULL);
	144	//------------------------------------------------------
	145	if (((*_ancS)[mynode->father->id()][pos]!=letter) &&
	146	((*_ancS)[mynode->father->id()][pos]!=-2)) {
	147	(*_ssc)[mynode->id()].set(pos,rateCategor,letter,0);
	148	continue;
	149	} // this if takes care of internal node assignments...
	150	//------------------------------------------------------
	151
	152	MDOUBLE total_prob=1.0;
	153	MDOUBLE father_term=0;
	154	if (father_node->father!=NULL) {
	155	for(letter_in_father=0; letter_in_father<_et->alphabetSize();letter_in_father++)
	156	father_term += _cpij->getPij(father_node->id(),letter,letter_in_father,rateCategor)*
	157	(*_ssc)[father_node->id()].get(pos,rateCategor,letter_in_father);
	158	}
	159	else {
	160	father_term=1.0;
	161	}
	162	MDOUBLE brother_term=1.0;
	163	for(bro=0;bro<n_bro;bro++) {
	164	evolTree::NodeP brother=father_node->sons[bro];
	165	if (brother != mynode) {
	166	MDOUBLE tmp_bro=0.0;
	167	for(letter_in_son=0; letter_in_son<_et->alphabetSize();letter_in_son++) {
	168	tmp_bro+=_cpij->getPij(
	169	father_node->sons[bro]->id(),
	170	letter,
	171	letter_in_son,rateCategor)*
	172	_cup->get(brother->id(),
	173	pos,
	174	rateCategor,
	175	letter_in_son);
	176	}
	177	brother_term *=tmp_bro;
	178	}
	179	}
	180	total_prob = father_term * brother_term;
	181	(*_ssc)[mynode->id()].set(pos,rateCategor,letter,total_prob);
	182	}
	183	}
	184	}
	185	*/
	186
	187
	188
	189
	190

+23

-0

programs/fastml/bbComputeDownAlg.h less more

	0	#ifndef ___BB_COMPUTE_DOWN_ALG__
	1	#define ___BB_COMPUTE_DOWN_ALG__
	2
	3	#include "tree.h"
	4	#include "sequenceContainer.h"
	5	#include "computePijComponent.h"
	6	#include "suffStatComponent.h"
	7	#include "sequence.h"
	8	#include <vector>
	9	using namespace std;
	10
	11	void BBfillComputeDown(const tree& et,
	12	const sequenceContainer& sc,
	13	const int pos,
	14	const computePijHom& pi,
	15	suffStatGlobalHomPos& ssc,
	16	const suffStatGlobalHomPos& cup,
	17	const vector<sequence>& ancS);
	18
	19
	20
	21	#endif
	22

+46

-0

programs/fastml/bbComputeUpAlg.cpp less more

	0	#include "bbComputeUpAlg.h"
	1	#include "seqContainerTreeMap.h"
	2
	3	void BBfillComputeUp(const tree& et,
	4	const sequenceContainer& sc,
	5	const int pos,
	6	const computePijHom& pi,
	7	suffStatGlobalHomPos& ssc,
	8	const vector<sequence>& ancS) {
	9
	10	seqContainerTreeMap sctm(sc,et);
	11
	12	ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
	13	treeIterDownTopConst tIt(et);
	14	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	15	int letter;
	16	if (mynode->isLeaf()) {
	17	for(letter=0; letter<pi.alphabetSize();letter++) {
	18	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	19	MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
	20	ssc.set(mynode->id(),letter,val);
	21	}
	22	}
	23	else {
	24	for(letter=0; letter<pi.alphabetSize();letter++) {
	25	if ((ancS[mynode->id()][pos]!=-2) && // if there is already assignments for this node
	26	(ancS[mynode->id()][pos]!=letter)) {
	27	ssc.set(mynode->id(),letter,0);
	28	continue;
	29	} // this if takes care of internal node assignments...
	30
	31
	32	doubleRep total_prob=1.0;
	33	for(int i=0; i < mynode->getNumberOfSons();++i){
	34	doubleRep prob=0.0;
	35	for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
	36	prob += ssc.get(mynode->getSon(i)->id(), letInSon)*
	37	pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
	38	}
	39	total_prob*=prob;
	40	}
	41	ssc.set(mynode->id(),letter,total_prob);
	42	}
	43	}
	44	}
	45	}

+26

-0

programs/fastml/bbComputeUpAlg.h less more

	0	#ifndef ___BB_COMPUTE_UP_ALG__
	1	#define ___BB_COMPUTE_UP_ALG__
	2
	3	#include "computePijComponent.h"
	4	#include "suffStatComponent.h"
	5
	6	// the only different from computeUpAlg is that here char assignments to
	7	// internal nodes are taken into account while calculating compute up.
	8
	9	#include "tree.h"
	10	#include "sequenceContainer.h"
	11	#include "computePijComponent.h"
	12	#include "suffStatComponent.h"
	13	#include "sequence.h"
	14	#include <vector>
	15	using namespace std;
	16
	17	void BBfillComputeUp(const tree& et,
	18	const sequenceContainer& sc,
	19	const int pos,
	20	const computePijHom& pi,
	21	suffStatGlobalHomPos& ssc,
	22	const vector<sequence>& ancS);
	23
	24	#endif
	25

+115

-0

programs/fastml/bbEvaluateSpecificAV.cpp less more

	0	#include "bbEvaluateSpecificAV.h"
	1
	2	bbEvaluateSpecificAV::bbEvaluateSpecificAV(const tree& et,
	3	const stochasticProcess& sp,
	4	const sequenceContainer& sc,
	5	const computePijGam& cpij) : _et(et), _sp(sp), _sc(sc), _bbcpij(cpij) {
	6	_sctm = new seqContainerTreeMap(_sc,_et);
	7
	8	_alphabetSize=_sc.alphabetSize();
	9	_Lvec.resize(_et.getNodesNum());
	10	for (int i=0; i < _Lvec.size(); ++i ) {
	11	_Lvec[i].resize(_alphabetSize);
	12	}
	13	}
	14
	15	bbEvaluateSpecificAV::~bbEvaluateSpecificAV() {
	16	delete _sctm;
	17	}
	18
	19	MDOUBLE bbEvaluateSpecificAV::evaluateSpecificAv(
	20	const int pos,
	21	const vector<sequence>* ancestralSequences) {
	22	_ancss = ancestralSequences;
	23	return recursiveEvaluateSpecificAv(pos,_et.getRoot());
	24	}
	25
	26	MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv(
	27	const int pos,
	28	const tree::nodeP thisNode) {
	29
	30	MDOUBLE res=0.0;
	31	for (int rateCategor=0;rateCategor<_sp.categories();rateCategor++) {
	32	res += (
	33	recursiveEvaluateSpecificAv(pos,thisNode,rateCategor)*
	34	_sp.ratesProb(rateCategor)
	35	);
	36	}
	37	return res;
	38	}
	39
	40	MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv(const int pos,
	41	const tree::nodeP thisNode,
	42	const int categor) {
	43
	44	int letterInNode;
	45	const alphabet* alph = _sc.getAlphabet();
	46	if (thisNode->isLeaf() ) {
	47	const int seqID = _sctm->seqIdOfNodeI(thisNode->id());
	48	letterInNode = _sc[seqID][pos];
	49	for (int k = 0; k < _alphabetSize ; ++k) { // taking care of ? by the -2 64 - for codons...
	50	if ((!(alph->isSpecific(letterInNode)))\|\|(letterInNode==-2) \|\| (letterInNode==-1)\|\|(letterInNode==64) \|\|(letterInNode==k)) _Lvec[thisNode->id()][k] = 1.0;
	51	else _Lvec[thisNode->id()][k] = 0.0;
	52	}
	53	return 0.0;
	54	}
	55
	56	for (int i = 0 ; i < thisNode->getNumberOfSons() ; ++i ) {// recursive call for the childs
	57	recursiveEvaluateSpecificAv(pos,thisNode->getSon(i),categor);
	58	}
	59
	60	letterInNode = (*_ancss)[thisNode->id()][pos];
	61	if (!(alph->isSpecific(letterInNode))){
	62	//if (letterInNode == -2) {// internal node with asterix.
	63	for (int y = 0 ; y < _alphabetSize ; ++y) {
	64	MDOUBLE rate = _sp.rates(categor); // the r.
	65	_Lvec[thisNode->id()][y] = 1.0;
	66	for (int u = 0 ; u < thisNode->getNumberOfSons() ; ++u) {
	67	MDOUBLE tmp = 0;
	68	for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) {
	69	tmp+=(
	70	_bbcpij.getPij(categor,thisNode->getSon(u)->id(),y,letInSon)*
	71	_Lvec[thisNode->getSon(u)->id()][letInSon]
	72	);
	73	}
	74	_Lvec[thisNode->id()][y] *= tmp;
	75
	76	}
	77	}
	78	}
	79
	80	else { // if the character in the HTU is known (not an asterix)
	81	for (int w = 0 ; w < _alphabetSize ; ++w) {
	82	if (w != letterInNode) _Lvec[thisNode->id()][w] = 0.0;
	83	else {
	84	// MDOUBLE rate = _myStoc_proc.rates(categor); // the r.
	85	_Lvec[thisNode->id()][w] = 1.0;
	86	for (int z = 0 ; z < thisNode->getNumberOfSons() ; ++z) {
	87	MDOUBLE tmp = 0;
	88	for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) {
	89	tmp += (
	90	_bbcpij.getPij(categor,thisNode->getSon(z)->id(),w,letInSon)*
	91	_Lvec[thisNode->getSon(z)->id()][letInSon]
	92	);
	93	}
	94	_Lvec[thisNode->id()][w] *= tmp;
	95	}
	96	}// end of else
	97	}
	98	}
	99
	100	MDOUBLE result= 0.0;
	101	if (thisNode->father() == NULL){ // tree root
	102
	103	for (int letRoot = 0 ; letRoot < _alphabetSize; ++letRoot) {
	104	result += _sp.freq(letRoot) * _Lvec[thisNode->id()][letRoot];
	105	}
	106	}
	107	return result;
	108
	109	}
	110
	111
	112
	113
	114

+51

-0

programs/fastml/bbEvaluateSpecificAV.h less more

	0	#if !defined ___BB__EVALUATE_SPECIFIC_AV__
	1	#define ___BB__EVALUATE_SPECIFIC_AV__
	2
	3	#include "bb_options.h"
	4	#include "computePijComponent.h"
	5	#include "suffStatComponent.h"
	6	#include "sequence.h"
	7	#include "sequenceContainer.h"
	8	#include "stochasticProcess.h"
	9	#include "tree.h"
	10	#include "seqContainerTreeMap.h"
	11
	12	#include <vector>
	13	using namespace std;
	14
	15	class bbEvaluateSpecificAV {
	16
	17	public:
	18	explicit bbEvaluateSpecificAV(
	19	const tree& et,
	20	const stochasticProcess& sp,
	21	const sequenceContainer& sc,
	22	const computePijGam& cpij);
	23	virtual ~bbEvaluateSpecificAV();
	24
	25	MDOUBLE evaluateSpecificAv( const int pos,
	26	const vector<sequence>* ancestralSequences);
	27	private:
	28	const tree& _et;
	29	const stochasticProcess& _sp;
	30	const computePijGam& _bbcpij;
	31	int _alphabetSize;
	32	int _pos;
	33	const sequenceContainer& _sc;
	34	seqContainerTreeMap * _sctm;
	35
	36
	37	const vector<sequence>* _ancss;
	38
	39	MDOUBLE recursiveEvaluateSpecificAv(
	40	const int pos,
	41	const tree::nodeP thisNode);
	42
	43	MDOUBLE recursiveEvaluateSpecificAv(const int pos,
	44	const tree::nodeP thisNode,
	45	const int categor);
	46	VVdouble _Lvec; // inodes * letter
	47
	48	};
	49
	50	#endif

+134

-0

programs/fastml/bbNodeOrderAlg.cpp less more

	0	#include "bbNodeOrderAlg.h"
	1	#include "bbComputeUpAlg.h"
	2	#include "bbComputeDownAlg.h"
	3	#include "computeMarginalAlg.h"
	4	#include <algorithm>
	5	using namespace std;
	6
	7	bbNodeOrderAlg::bbNodeOrderAlg(const tree& et,
	8	const stochasticProcess &sp,
	9	const sequenceContainer& sc,
	10	const computePijGam& cpij,
	11	const MDOUBLE computeAgainExactTreshold) :_et(et),_sp(sp),_sc(sc),_cpij(cpij){
	12	_alphabetSize=_sp.alphabetSize();
	13	_computeAgainExactTreshold = computeAgainExactTreshold;
	14	cupbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
	15	cdownbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
	16	cmarginalbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
	17	}
	18
	19	bbNodeOrderAlg::~bbNodeOrderAlg(){}
	20
	21	// note: there is a way to dynamically correct exact.
	22	// it is not implemented here.
	23	void bbNodeOrderAlg::getNextNodeAndCharOrder(tree::nodeP &nextNode,
	24	vector<int> &charOrder,
	25	vector<sequence> &ancestralSequences,
	26	const int pos,
	27	const bool firstTime,
	28	doubleRep& exactVal){
	29	doubleRep highestProb=0;
	30	if (firstTime) {
	31	_et.getAllHTUs(_nodesLeft,_et.getRoot());
	32	recalculateExact(ancestralSequences,pos);
	33	rankRemainingNodesAccordingToTheirMarginalProb(pos);
	34	}
	35	assert(_nodesLeftExact.size()>=1);
	36	assert(_nodesLeftExact.size()==_nodesLeft.size());
	37	highestProb = _nodesLeftExact[_nodesLeftExact.size()-1];
	38	if (highestProb<_computeAgainExactTreshold) {
	39	recalculateExact(ancestralSequences,pos);
	40	rankRemainingNodesAccordingToTheirMarginalProb(pos);
	41	highestProb = _nodesLeftExact[_nodesLeftExact.size()-1];
	42	}
	43	_nodesLeftExact.pop_back();
	44	nextNode = _nodesLeft[_nodesLeft.size()-1];
	45	_nodesLeft.pop_back();
	46	charOrder = findBestOrderInNode(nextNode,pos);
	47	exactVal = highestProb;
	48	}
	49
	50	void bbNodeOrderAlg::putBack(tree::nodeP& node2check,const doubleRep & exactVal) {
	51	_nodesLeft.push_back(node2check);
	52	_nodesLeftExact.push_back(exactVal);
	53	}
	54
	55
	56	void bbNodeOrderAlg::rankRemainingNodesAccordingToTheirMarginalProb(
	57	const int pos) {
	58
	59	typedef pair<doubleRep,tree::nodeP> sortedElement;
	60	vector<sortedElement> sortVec;
	61	int i;
	62	doubleRep tmpVal;
	63	for ( i = 0 ; i < _nodesLeft.size() ; ++i) {
	64	tmpVal = getNodeHighestMarginal(_nodesLeft[i]);
	65	sortedElement elem(tmpVal,_nodesLeft[i]);
	66	sortVec.push_back(elem);
	67	}
	68
	69	sort(sortVec.begin(), sortVec.end());
	70	_nodesLeft.clear();
	71	_nodesLeftExact.clear();
	72	_nodesLeft.resize(sortVec.size());
	73	_nodesLeftExact.resize(sortVec.size());
	74	for ( i = 0 ; i < _nodesLeft.size() ; ++i ) {
	75	_nodesLeft[i] = sortVec[i].second;
	76	_nodesLeftExact[i]=sortVec[i].first;
	77	}
	78	}
	79
	80	// this function gets as input the "exact" sufficient statistic for a given node
	81	// for a given position. It goes over all the alphabet, and computes
	82	// the marginal at each position. Then he returns the highest marginal.
	83	doubleRep bbNodeOrderAlg::getNodeHighestMarginal(const tree::nodeP& inNodeP) {
	84	doubleRep highestProb =0.0;
	85
	86	int j,s;
	87	for (j=0;j<_alphabetSize;++j) {
	88	doubleRep tmpVal = 0;
	89	for (s=0; s< _sp.categories();++s ) {
	90	tmpVal += cmarginalbb.get(s,inNodeP->id(),j)*_sp.ratesProb(s);
	91	}
	92	if (highestProb<tmpVal) {
	93	highestProb=tmpVal;
	94	}
	95	}
	96	return highestProb;
	97	}
	98
	99	void bbNodeOrderAlg::recalculateExact(vector<sequence> &ancestralSequences,
	100	const int pos) {
	101	for (int i=0; i < _sp.categories(); ++i) {
	102	BBfillComputeUp(_et,_sc,pos,_cpij[i],cupbb[i],ancestralSequences);
	103	BBfillComputeDown(_et,_sc,pos,_cpij[i],cdownbb[i],cupbb[i],ancestralSequences);
	104	doubleRep posProb = 0.0;
	105	computeMarginalAlg cmalg;
	106	cmalg.fillComputeMarginal(_et,_sc,_sp,pos,_cpij[i],cmarginalbb[i],cupbb[i],cdownbb[i],posProb);
	107	}
	108	}
	109
	110	vector<int> bbNodeOrderAlg::findBestOrderInNode(const tree::nodeP node2check,
	111	const int pos) const {
	112	assert (node2check != NULL);
	113	typedef pair<doubleRep,int> sortedElement; // (marginal, letter)
	114	vector<sortedElement> sortVec;
	115	int i,s;
	116	for ( i = 0 ; i < _alphabetSize ; i++ ) {
	117	doubleRep tmpVal = 0;
	118	for (s=0; s< _sp.categories();++s ) {
	119	tmpVal += cmarginalbb.get(s,node2check->id(),i)*_sp.ratesProb(s);
	120	}
	121	sortedElement elem(tmpVal,i);
	122	sortVec.push_back(elem);
	123	}
	124
	125	sort(sortVec.begin(), sortVec.end());
	126	reverse(sortVec.begin(), sortVec.end());
	127	vector<int> bestCharOrder(_alphabetSize);
	128	for ( i = 0 ; i < _alphabetSize ; i++ ) {
	129	bestCharOrder[i] = sortVec[i].second;
	130	}
	131	return bestCharOrder;
	132	}
	133

+54

-0

programs/fastml/bbNodeOrderAlg.h less more

	0	#if !defined ___BB__NODE_ORDER_ALG__
	1	#define ___BB__NODE_ORDER_ALG__
	2
	3	#include "definitions.h"
	4	#include "bb_options.h"
	5	#include "computePijComponent.h"
	6	#include "suffStatComponent.h"
	7	#include "sequence.h"
	8	#include "tree.h"
	9	#include "stochasticProcess.h"
	10	#include "sequenceContainer.h"
	11
	12	class bbNodeOrderAlg {
	13	public:
	14	explicit bbNodeOrderAlg(const tree& et,
	15	const stochasticProcess &sp,
	16	const sequenceContainer& sc,
	17	const computePijGam& cpij,
	18	const MDOUBLE computeAgainExactTreshold);
	19	virtual ~bbNodeOrderAlg();
	20	void getNextNodeAndCharOrder(tree::nodeP &nextNode,
	21	vector<int> &charOrder,
	22	vector<sequence> &ancestralSequences,
	23	const int pos,
	24	const bool firstTime,
	25	doubleRep& exactVal);
	26	void putBack(tree::nodeP& node2check,const doubleRep & exactVal);
	27
	28	private:
	29	const tree& _et;
	30	const stochasticProcess& _sp;
	31	const computePijGam& _cpij;
	32	const sequenceContainer& _sc;
	33	suffStatGlobalGamPos cmarginalbb;
	34	suffStatGlobalGamPos cupbb;
	35	suffStatGlobalGamPos cdownbb;
	36
	37	MDOUBLE _computeAgainExactTreshold;
	38	int _alphabetSize;
	39	int _pos;
	40	vector<tree::nodeP> _nodesLeft;
	41	vector<doubleRep> _nodesLeftExact;
	42
	43	void recalculateExact( vector<sequence> &ancestralSequences,
	44	const int pos);
	45	vector<int> findBestOrderInNode(const tree::nodeP node2check,
	46	const int pos) const;
	47	void rankRemainingNodesAccordingToTheirMarginalProb(
	48	const int pos);
	49	doubleRep getNodeHighestMarginal( const tree::nodeP& inNodeP);
	50	};
	51
	52
	53	#endif

+75

-0

programs/fastml/bbReport.cpp less more

	0	#include "bbReport.h"
	1	#include "amino.h"
	2	#include "nucleotide.h"
	3	#include "codon.h"
	4	#include <iomanip>
	5	#include <iostream>
	6	#include <cmath>
	7	using namespace std;
	8
	9	BandBReport::BandBReport( const string& reportFileName, const int position, const int alphabetSize ) :
	10	_reportFileName(reportFileName), _position(position), _alphabetSize(alphabetSize)
	11	{
	12	// _root = new TreeNode;
	13	// DecisionNode rootData(-2,"allstar"); // char, node-id
	14	// _root->Setdata(rootData);
	15	// _current = _root;
	16	// _nodes = 1;
	17	}
	18
	19	void BandBReport::report(
	20	const string NodeName,
	21	const int charPutInsideNode,
	22	const int depth,
	23	const doubleRep bestRecord,
	24	const doubleRep probOfVector,
	25	const doubleRep BoundSigma,
	26	const doubleRep boundMax
	27	) {
	28
	29	VNodeName.push_back(NodeName);
	30	VcharPutInsideNode.push_back(charPutInsideNode);
	31	VbestRecord.push_back(bestRecord);
	32	VprobOfVector.push_back(probOfVector);
	33	VBoundSigma.push_back(BoundSigma);
	34	VboundMax.push_back(boundMax);
	35	Vdepth.push_back(depth);
	36
	37	}
	38
	39
	40	void BandBReport::makeReport() const {
	41
	42	ofstream out;
	43	//if (_position==0) out.open("report.txt",ios::trunc);
	44	//else {
	45	out.open(_reportFileName.c_str(),ios::app);
	46	//}
	47	out<<" position is: "<<_position<<endl;
	48	// cerr<<"reportFileIs: "<<_reportFileName<<endl;
	49	if (out == NULL) {
	50	errorMsg::reportError("unable to open output file for reporting");
	51	}
	52	// exit(555);
	53	amino aa;
	54	nucleotide nuc;
	55	codon co;
	56	for (int k=0; k < VNodeName.size(); ++k) {
	57	for (int l=0; l < Vdepth[k]; ++l) out<<" ";
	58	out<<VNodeName[k]<<" ";
	59	if (_alphabetSize==20) out<<aa.fromInt(VcharPutInsideNode[k])<<" ";
	60	else if (_alphabetSize==4) out<<nuc.fromInt(VcharPutInsideNode[k])<<" ";
	61	else if (_alphabetSize==61) out<<co.fromInt(VcharPutInsideNode[k])<<" ";
	62	else errorMsg::reportError(" error in function BandBReport::makeReport( )");
	63	out<<setiosflags(ios::scientific);
	64	out<<"best Record: "<<VbestRecord[k]<<" ";
	65	out<<"BoundSigma: "<<VBoundSigma[k]<<" ";
	66	out<<"boundMax: "<<VboundMax[k]<<" ";
	67	out<<"probAV: "<<VprobOfVector[k];
	68	out<<endl;
	69	}
	70	out.close();
	71
	72	return;
	73	}
	74

+58

-0

programs/fastml/bbReport.h less more

	0	#ifndef ________BANBREPORT
	1	#define ________BANBREPORT
	2
	3	#include "definitions.h"
	4	#include <fstream>
	5	using namespace std;
	6
	7	class BandBReportAllPos {
	8	public:
	9	explicit BandBReportAllPos(const string& reportFileName, int minNumOfNodesToVisit)
	10	: _reportFileName(reportFileName),_minNumOfNodesToVisit(minNumOfNodesToVisit) {totalNumberOfNodeVisited=0;}
	11	int totalNumberOfNodeVisited;
	12	const int _minNumOfNodesToVisit;
	13	const string& _reportFileName;
	14	void printReport() const {
	15	fstream out(_reportFileName.c_str(),ios::app);
	16	out<<"total positions visited: "<<totalNumberOfNodeVisited<<endl;
	17	out<<"min positions to be visited: "<<_minNumOfNodesToVisit<<endl;
	18	out.close();
	19	return;
	20	}
	21	};
	22
	23
	24	class BandBReport
	25	{
	26	public:
	27	explicit BandBReport( const string& reportFileName,
	28	const int position,
	29	const int alphabetSize);
	30	void report(
	31	const string NodeName,
	32	const int charPutInsideNode,
	33	const int depth,
	34	const doubleRep bestRecord,
	35	const doubleRep probOfVector,
	36	const doubleRep BoundSigma,
	37	const doubleRep boundMax);
	38	void makeReport() const;
	39	int size() {return VNodeName.size();}
	40	private:
	41
	42	vector<string> VNodeName;
	43	vector<int> VcharPutInsideNode;
	44	vector<doubleRep> VbestRecord;
	45	vector<doubleRep> VprobOfVector;
	46	vector<doubleRep> VBoundSigma;
	47	vector<doubleRep> VboundMax;
	48	vector<int> Vdepth;
	49
	50	const int _position;
	51	const int _alphabetSize;
	52	const string& _reportFileName;
	53	};
	54
	55
	56	#endif
	57

+158

-0

programs/fastml/bb_options.cpp less more

	0	#include <cstdlib>
	1	#include "bb_options.h"
	2	#include "logFile.h"
	3	#include "errorMsg.h"
	4
	5	bb_options::bb_options(int& argc, char *argv[]):
	6	computeAgainExactTreshold(0.9),
	7	optimizeBrLenOnStartingTree(true),
	8	doJoint(true),
	9	treefile(""),
	10	reportFile("log.txt"),
	11	outFile_seq_joint("seq.joint.txt"),
	12	outFile_seq_marginal("seq.marginal.txt"),
	13	outFile_prob_joint("prob.joint.txt"),
	14	outFile_prob_marginal("prob.marginal.txt"),
	15	seqfile(""),
	16	distributionName(hom),
	17	seqOutputFormat(clustal),
	18	outTreeFileNewick("tree.newick.txt"),
	19	outTreeFileAncestor("tree.ancestor.txt"),
	20	boundMethod(both),
	21	gammaPar(1.0),
	22	userProvideAlpha(false),
	23	gammaCategies(8),
	24	modelName(jtt),
	25	alphabet_size(20),
	26	removeGapsPosition(true),
	27	useChebyshev(true),
	28	treeOutFile("TheTree.txt"),
	29	outPtr(&cout){
	30	static struct option long_options[] = {{0, 0, 0, 0}};
	31	int option_index = 0;
	32	int c=0;
	33	while (c >= 0) {
	34	c = getopt_long(argc, argv,"a:bc:d:e:fghj:k:m:p:q:R:s:t:ux:y:z:", long_options,&option_index);
	35
	36	switch (c) {
	37	case 'a': computeAgainExactTreshold=atof(optarg); break;
	38	case 'b': optimizeBrLenOnStartingTree=false; break;
	39	case 'c': gammaCategies=atoi(optarg); break;
	40	case 'd': outFile_prob_joint=optarg; break;
	41	case 'e': outFile_prob_marginal=optarg; break;
	42	case 'f': doJoint=false; break;
	43	case 'g': distributionName=gam; break;
	44	case 'h' : {
	45	cout << "USAGE: "<<argv[0]<<" [-options] "<<endl;
	46	cout << usage()<<endl;
	47	exit (0);
	48	} break;
	49	case 'j': outFile_seq_joint=optarg; break;
	50	case 'k': outFile_seq_marginal=optarg; break;
	51	case 'm': {
	52	switch (optarg[0]) {
	53	case 'd': case 'D': modelName=day;alphabet_size=20; break;
	54	case 'j': case 'J': modelName=jtt;alphabet_size=20; break;
	55	case 'l': case 'L': modelName=lg;alphabet_size=20; break;
	56	case 'r': case 'R': modelName=rev;alphabet_size=20; break;
	57	case 'w': case 'W': modelName=wag;alphabet_size=20; break;
	58	case 'c': case 'C': modelName=cprev;alphabet_size=20; break;
	59	case 'a': case 'A': modelName=aajc;alphabet_size=20; break;
	60	case 'n': case 'N': modelName=nucjc;alphabet_size=4; break;
	61	case 'h': case 'H': modelName=hky;alphabet_size=4; break;
	62	case 't': case 'T': modelName=tamura92;alphabet_size=4; break;
	63	case 'g': case 'G': modelName=nucgtr;alphabet_size=4; break;
	64	case 'e': case 'E': modelName=empiriCodon;alphabet_size=61; break;
	65	case 'y': case 'Y': modelName=nyCodon;alphabet_size=61; break;
	66	default:modelName=jtt;alphabet_size=20;
	67	break;
	68	}
	69	} break;
	70	case 'p': {
	71	userProvideAlpha = true;
	72	gammaPar=atof(optarg);
	73	distributionName=gam;
	74
	75	} break;
	76	case 'q': {
	77	switch (optarg[0]) {
	78	case 'c': seqOutputFormat=clustal; break;
	79	case 'f': seqOutputFormat=fasta; break;
	80	case 'm': seqOutputFormat=molphy; break;
	81	case 's': seqOutputFormat=mase; break;
	82	case 'p': seqOutputFormat=phylip; break;
	83	case 'n': seqOutputFormat=nexus; break;
	84	default: seqOutputFormat=clustal; break;
	85	}
	86	} break;
	87	case 'R': reportFile=optarg; break;
	88	case 's': seqfile=optarg; break;
	89	case 't': treefile=optarg; break;
	90	case 'u': useChebyshev=false; break;
	91	case 'x': outTreeFileNewick=optarg; break;
	92	case 'y': outTreeFileAncestor=optarg; break;
	93	case 'z': {
	94	switch (optarg[0]) {
	95	case 's': case 'S': boundMethod=sum; break;
	96	case 'm': case 'M': boundMethod=max; break;
	97	case 'b': case 'B': boundMethod=both; break;
	98	default:boundMethod=both;break;
	99	}
	100	} break;
	101
	102	//default: printf ("?? getopt returned character code 0%o ??\n", c);
	103	} // end of switch c
	104	} // end of while (c)
	105	if (seqfile=="") {
	106	cout << "USAGE: "<<argv[0]<<" [-options] "<<endl;
	107	//cout << "cat SeqFile \|"<<argv[0]<<" [-options]"<<endl <<endl;
	108	cout << usage();
	109	cout << endl;
	110	exit (0);
	111	}
	112	}
	113
	114
	115	string bb_options::modelNameStr() const
	116	{
	117
	118	string res = "";
	119	switch (modelName)
	120	{
	121	case day:
	122	res = "DAY";
	123	break;
	124	case jtt:
	125	res = "JTT";
	126	break;
	127	case wag:
	128	res = "WAG";
	129	break;
	130	case lg:
	131	res = "LG";
	132	break;
	133	case nyCodon:
	134	res = "NY_CODON";
	135	break;
	136	case rev:
	137	res = "REV";
	138	break;
	139	case cprev:
	140	res = "CPREV";
	141	break;
	142	case nucjc:
	143	res = "NUC_JC";
	144	break;
	145	case aajc:
	146	res = "AA_JC";
	147	break;
	148	case empiriCodon:
	149	res = "EMPIRICAL_CODON";
	150	break;
	151	default:
	152	errorMsg::reportError("unknown type in bb_options::modelNameStr");
	153	}
	154	return res;
	155
	156	}
	157

+69

-0

programs/fastml/bb_options.h less more

	0	#if !defined ___BB__OPTION__T__
	1	#define ___BB__OPTION__T__
	2
	3
	4	#ifndef __STDC__
	5	#define __STDC__ 1
	6	#include "getopt.h"
	7	#undef __STDC__
	8	#else
	9	#include "getopt.h"
	10	#endif
	11
	12	#include "definitions.h"
	13	#include <iostream>
	14	#include <fstream>
	15	using namespace std;
	16
	17	class bb_options {
	18	public:
	19	MDOUBLE computeAgainExactTreshold;
	20	mutable bool optimizeBrLenOnStartingTree;
	21	bool doJoint;
	22	string treefile;
	23	string seqfile;
	24	enum SeqFileFormat {mase,clustal,fasta,molphy,phylip,nexus};
	25	SeqFileFormat seqOutputFormat;
	26	string treeOutFile;
	27	bool userProvideAlpha;
	28	enum distributionsNames {hom,gam};
	29	distributionsNames distributionName;
	30	enum boundMethods {max,sum,both};
	31	boundMethods boundMethod;
	32	bool verbose; // if true: print starting tree to the file: start_tree
	33	// tree::TREEformats outputFormat;
	34	enum modelNameOptions {day,jtt,lg,rev,wag,cprev,nucjc,aajc,nyCodon,empiriCodon,nucgtr,tamura92,hky};
	35	modelNameOptions modelName;
	36	int alphabet_size;
	37	bool removeGapsPosition;
	38	bool useChebyshev;
	39	string outTreeFileNewick;
	40	string outTreeFileAncestor;
	41	string outFile_prob_joint;
	42	string outFile_prob_marginal;
	43	string outFile_seq_joint;
	44	string outFile_seq_marginal;
	45
	46	MDOUBLE gammaPar;
	47	int gammaCategies;
	48	string reportFile;
	49	private:
	50	ostream* outPtr;
	51	ofstream out_f;
	52	public:
	53	ostream& out() const {return *outPtr;};
	54	string modelNameStr() const;
	55	explicit bb_options(int& argc, char *argv[]);
	56	};
	57
	58	#include "bb_options_list.h"
	59	#include <string>
	60	using namespace std;
	61	static const string usege_splash_screen() {
	62	string tmp = usage();
	63	return tmp;
	64	};
	65
	66
	67	#endif
	68

+53

-0

programs/fastml/bb_options_list.h less more

	0	#include <string>
	1	using namespace std;
	2	static string usage() {
	3	string tmp;
	4	tmp +=" \|-------------------------------- HELP: -------------------------------------+\n";
	5	tmp +=" \| VALUES IN [] ARE DEFAULT VALUES \|\n";
	6	tmp +=" \|-h help \|\n";
	7	tmp +=" \|-s sequence input file (for example use -s D:\\mySequences\\seq.txt ) \|\n";
	8	tmp +=" \|-t tree input file \|\n";
	9	tmp +=" \| (if tree is not given, a neighbor joining tree is computed). \|\n";
	10	tmp +=" \|-g Assume among site rate variation model (Gamma) [By default the program \|\n";
	11	tmp +=" \| will assume an homogenous model. very fast, but less accurate!] \|\n";
	12	tmp += "\|-m model name \|\n";
	13	tmp += "\|-mj [JTT] \|\n";
	14	tmp += "\|-ml LG \|\n";
	15	tmp += "\|-mr mtREV (for mitochondrial genomes) \|\n";
	16	tmp += "\|-md DAY \|\n";
	17	tmp += "\|-mw WAG \|\n";
	18	tmp += "\|-mc cpREV (for chloroplasts genomes) \|\n";
	19	tmp += "\|-ma Jukes and Cantor (JC) for amino acids \|\n";
	20	tmp += "\|-mn Jukes and Cantor (JC) for nucleotides \|\n";
	21	tmp += "\|-mh HKY Model for nucleotides \|\n";
	22	tmp += "\|-mg nucgtr Model for nucleotides \|\n";
	23	tmp += "\|-mt tamura92 Model for nucleotides \|\n";
	24	tmp += "\|-my yang M5 codons model \|\n";
	25	tmp += "\|-me empirical codon matrix \|\n";
	26	tmp +=" +----------------------------------------------------------------------------+\n";
	27	tmp +=" \|Controling the output options: \|\n";
	28	tmp +=" \|-x tree file output in Newick format [tree.newick.txt] \|\n";
	29	tmp +=" \|-y tree file output in ANCESTOR format [tree.ancestor.txt] \|\n";
	30	tmp +=" \|-j joint sequences output file [seq.joint.txt] \|\n";
	31	tmp +=" \|-k marginal sequences output file [seq.marginal.txt] \|\n";
	32	tmp +=" \|-d joint probabilities output file [prob.joint.txt] \|\n";
	33	tmp +=" \|-e marginal probabilities output file [prob.marginal.txt] \|\n";
	34	tmp +=" \|-q ancestral sequences output format. -qc = [CLUSTAL], -qf = FASTA \|\n";
	35	tmp +=" \| -qm = MOLPHY, -qs = MASE, -qp = PHLIYP, -qn = Nexus \|\n";
	36	tmp +=" +----------------------------------------------------------------------------+\n";
	37	tmp +=" \|Advances options: \|\n";
	38	tmp +=" \|-a Treshold for computing again marginal probabilities [0.9] \|\n";
	39	tmp +=" \|-b Do not optimize branch lengths on starting tree \|\n";
	40	tmp +=" \| [by default branches and alpha are ML optimized from the data] \|\n";
	41	tmp +=" \|-c number of discrete Gamma categories for the gamma distribution [8] \|\n";
	42	tmp +=" \|-f don't compute Joint reconstruction (good if the branch and bound \|\n";
	43	tmp +=" \| algorithm takes too much time, and the goal is to compute the \|\n";
	44	tmp +=" \| marginal reconstruction with Gamma). \|\n";
	45	tmp +=" \|-z The bound used. -zs - bound based on sum. -zm based on max. -zb [both] \|\n";
	46	tmp +=" \|-p user alpha parameter of the gamma distribution [if alpha is not given, \|\n";
	47	tmp +=" \| alpha and branches will be evaluated from the data (override -b) \|\n";
	48	// tmp +=" \|R report file. Show the choices made by the algorithm \|\n";
	49	// tmp +=" \|-u do not use Chebyshev optimization \|\n";
	50	tmp +=" +----------------------------------------------------------------------------+\n";
	51	return tmp;
	52	}

+120

-0

programs/fastml/bbfindBestAVDynProg.cpp less more

	0	#include "bbfindBestAVDynProg.h"
	1
	2	bbfindBestAVDynProg::bbfindBestAVDynProg(const tree* et,
	3	const stochasticProcess *sp,
	4	const sequenceContainer& sc,
	5	const computePijGam* cpij): _sc(sc) {
	6	_et = et;
	7	_sp = sp;
	8	_bbcpij = cpij;
	9	_sctm = new seqContainerTreeMap(_sc,*_et);
	10	_alphabetSize=_sp->alphabetSize();
	11	_jointLval.resize(_et->getNodesNum());
	12	_jointCval.resize(_et->getNodesNum());
	13	for (int i=0; i < _et->getNodesNum(); ++i) {
	14	_jointLval[i].resize(_alphabetSize);
	15	_jointCval[i].resize(_alphabetSize);
	16	}
	17	}
	18
	19	bbfindBestAVDynProg::~bbfindBestAVDynProg() {
	20	delete _sctm;
	21	}
	22
	23	MDOUBLE bbfindBestAVDynProg::evaluateSpecificAvDP(
	24	const int pos,
	25	const vector<sequence>* ancestralSequences,
	26	const int rateCategor) {
	27	_ancss = ancestralSequences;
	28
	29	const alphabet* alph = _sc.getAlphabet();
	30	recursiveComputeLandC(pos,_et->getRoot(),rateCategor);
	31	// modified from NancestralTree::findBestLetInRoot(const int pos) {
	32	MDOUBLE bestLinRoot =0 ;
	33	//MDOUBLE bestLetInRoot = -2;
	34	MDOUBLE tmp = 0.0;
	35	int letInRoot = (*_ancss)[_et->getRoot()->id()][pos];
	36	//if (letInRoot==-2) {
	37	if (!alph->isSpecific(letInRoot)){
	38
	39	for (int x = 0 ; x < _alphabetSize; ++x) {
	40	tmp = _sp->freq(x);
	41	for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) {
	42	tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][x];
	43	}
	44	if (tmp > bestLinRoot) {
	45	bestLinRoot = tmp;
	46	//bestLetInRoot = x;
	47	}
	48	}
	49	}
	50	else {//if (letInRoot!=-2)
	51	tmp = _sp->freq(letInRoot);
	52	for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) {
	53	tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][letInRoot];
	54	}
	55	if (tmp > bestLinRoot) {
	56	bestLinRoot = tmp;
	57	//bestLetInRoot = x;
	58	}
	59	}
	60
	61	//iRoot()->data()[pos] = bestLetInRoot;
	62	return bestLinRoot;
	63	}
	64
	65	void bbfindBestAVDynProg::recursiveComputeLandC(const int pos,
	66	const tree::nodeP inNode,
	67	const int rateCategor) {
	68	// root has to be internal node here.
	69	const alphabet* alph = _sc.getAlphabet();
	70	for (int i=0; i<inNode->getNumberOfSons();++i) {
	71	recursiveComputeLandC(pos,inNode->getSon(i),rateCategor);
	72	}
	73	if (inNode->father() == NULL) return;
	74
	75	int letInNode;
	76	if (inNode->isLeaf()) {
	77	const int seqID = _sctm->seqIdOfNodeI(inNode->id());
	78	letInNode=_sc[seqID][pos];
	79	}
	80	else {
	81	letInNode = (*_ancss)[inNode->id()][pos];
	82	}
	83
	84	//if (letInNode!=-2){ // known leaf, or known HTU, (no root)
	85	if (alph->isSpecific(letInNode)){ // known leaf, or known HTU, (no root)
	86
	87	for (int FatherLet = 0; FatherLet<_alphabetSize;++FatherLet) {
	88	_jointLval[inNode->id()][FatherLet] = _bbcpij->getPij(rateCategor,inNode->id(),FatherLet,letInNode);
	89	_jointCval[inNode->id()][FatherLet] = letInNode;
	90	for (int k=0; k < inNode->getNumberOfSons() ; ++k) {
	91	_jointLval[inNode->id()][FatherLet] *= _jointLval[inNode->getSon(k)->id()][letInNode];
	92	}
	93	}
	94	}
	95	else {// unknown leaf or HTU -> no root.
	96	for (int letInFather = 0; letInFather < _alphabetSize; ++letInFather) {
	97	MDOUBLE bestVal = 0;
	98	int bestLet = -2;
	99	for (int lenInNode = 0; lenInNode < _alphabetSize; ++lenInNode) {
	100	MDOUBLE tmp = 1;
	101	if (inNode->isInternal())
	102	tmp*= _bbcpij->getPij(rateCategor,inNode->id(),letInFather,lenInNode);
	103	// if it is a leaf, and since it is ? tmp will be 1.0...
	104	for (int k=0; k < inNode->getNumberOfSons() ; ++k) {
	105	tmp *= _jointLval[inNode->getSon(k)->id()][lenInNode];
	106	}
	107	if (tmp > bestVal) {
	108	bestVal = tmp;
	109	bestLet = lenInNode;
	110	}
	111	}
	112	_jointLval[inNode->id()][letInFather] = bestVal;
	113	_jointCval[inNode->id()][letInFather] = bestLet;
	114	}
	115	}
	116	}
	117
	118
	119

+44

-0

programs/fastml/bbfindBestAVDynProg.h less more

	0	#if !defined ___BB__FIND_BEST_AV_DYN_PROG
	1	#define ___BB__FIND_BEST_AV_DYN_PROG
	2
	3
	4	#include "bb_options.h"
	5	#include "computePijComponent.h"
	6	#include "suffStatComponent.h"
	7	#include "sequence.h"
	8	#include "tree.h"
	9	#include "sequenceContainer.h"
	10	#include "seqContainerTreeMap.h"
	11
	12	class bbfindBestAVDynProg {
	13	public:
	14	explicit bbfindBestAVDynProg(const tree* et,
	15	const stochasticProcess *sp,
	16	const sequenceContainer& sc,
	17	const computePijGam* cpij);
	18	virtual ~bbfindBestAVDynProg();
	19
	20	MDOUBLE evaluateSpecificAvDP( const int pos,
	21	const vector<sequence>* ancestralSequences,
	22	const int rateCategory
	23	);
	24
	25	private:
	26	const tree* _et;
	27	const stochasticProcess* _sp;
	28	const computePijGam* _bbcpij;
	29	int _alphabetSize;
	30	int _pos;
	31	seqContainerTreeMap * _sctm;
	32	const sequenceContainer& _sc;
	33
	34	const vector<sequence>* _ancss;
	35
	36	void recursiveComputeLandC( const int pos,
	37	const tree::nodeP inNode,
	38	const int rateCategor);
	39	VVdouble _jointLval; // inodes * letter
	40	VVdouble _jointCval; // inodes * letter
	41	};
	42
	43	#endif

+187

-0

programs/fastml/computeMarginalReconstruction.cpp less more

	0	#include "computeMarginalReconstruction.h"
	1	#include "computeUpAlg.h"
	2	#include "computePijComponent.h"
	3
	4	#include "computeDownAlg.h"
	5	#include "computeMarginalAlg.h"
	6	#include "treeIt.h"
	7	#include <algorithm>
	8	#include <iostream>
	9	#include <fstream>
	10	#include <math.h>
	11	using namespace std;
	12
	13
	14	computeMarginalReconstruction::computeMarginalReconstruction(const tree& et,
	15	vector<stochasticProcess>& spVec,
	16	const sequenceContainer& sc) : _et(et), _spVec(spVec), _sc(sc) {
	17	_resultProb.resize(_sc.seqLen());
	18	_bestProb.resize(_sc.seqLen());
	19	for (int i=0; i < _sc.seqLen(); ++i) {
	20	_resultProb[i].resize(et.getNodesNum());
	21	_bestProb[i].resize(et.getNodesNum());
	22	for (int j=0; j < et.getNodesNum(); ++j) {
	23	_resultProb[i][j].resize(_spVec[0].alphabetSize(),0.0);
	24	}
	25	}
	26	}
	27
	28
	29
	30	void computeMarginalReconstruction::compute(const distribution * forceDistr){
	31	computePijGam pi;
	32	if (_spVec.size()>1) {//w codon model + gamma special case
	33	pi._V.resize(forceDistr->categories());
	34	for (int i=0; i < _spVec.size(); ++i)
	35	pi._V[i].fillPij(_et,_spVec[i]);
	36	_spVec[0].setDistribution(forceDistr);//update the first process with gamma distr
	37	//for all the functions that needs no catregor and categor probabilty
	38	}
	39	else{
	40	pi.fillPij(_et,_spVec[0]);
	41	}
	42
	43	//pi.fillPij(_et,_sp);
	44	MDOUBLE totalLikelihoodOfReconstruction = 0;
	45	cout<<"doing position (marginal): ";
	46	for (int pos=0; pos<_sc.seqLen(); ++pos) {
	47	suffStatGlobalGamPos sscUp;// this is for a specific position.
	48	suffStatGlobalGamPos sscDown;// this is for a specific position.
	49	suffStatGlobalGamPos sscMarginal; // this is for a specific position.
	50	sscUp.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
	51	sscDown.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
	52	sscMarginal.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
	53
	54	cout<<pos+1<<" ";
	55	computeUpAlg computeUpAlg1;
	56	computeDownAlg computeDownAlg1;
	57	computeMarginalAlg computeMarginalAlg1;
	58
	59	for (int cat = 0; cat < _spVec[0].categories(); ++cat) {
	60	computeUpAlg1.fillComputeUp(_et,_sc,pos,pi[cat],sscUp[cat]);
	61	computeDownAlg1.fillComputeDown(_et,_sc,pos,pi[cat],sscDown[cat],sscUp[cat]);
	62	doubleRep posProb =0;
	63	computeMarginalAlg1.fillComputeMarginal(_et,_sc,_spVec[0],pos,pi[cat],sscMarginal[cat],sscUp[cat],sscDown[cat],posProb);
	64	}
	65
	66	MDOUBLE likelihoodOfPos = 0;
	67
	68	fillResultProb(sscMarginal,_spVec[0],_et,pos);
	69	fillMarginalReconstruction();
	70	}
	71	cout<<endl;
	72	}
	73
	74	void computeMarginalReconstruction::fillResultProb(
	75	const suffStatGlobalGamPos& ssc,
	76	const stochasticProcess & sp,
	77	const tree& et,
	78	const int pos){
	79	treeIterTopDownConst tIt(et);
	80	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	81	for (int i=0; i < sp.alphabetSize(); ++i) {
	82	doubleRep tmp=0; // the value for this letter in this node.
	83	for (int j=0; j < sp.categories(); ++j) {
	84	tmp += ssc.get(j,mynode->id(),i)*sp.ratesProb(j);
	85	}
	86	_resultProb[pos][mynode->id()][i] = convert(tmp);
	87	}
	88	}
	89	}
	90
	91	void computeMarginalReconstruction::fillMarginalReconstruction() {
	92	_resultSec = _sc;
	93	treeIterTopDownConst tIt(_et);
	94	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	95	if (mynode->isLeaf()) continue;
	96	// creating the place for this sequence in the resulting sequence container
	97	sequence tmp("",mynode->name(),"",_resultSec.numberOfSeqs(),_sc.getAlphabet());
	98	_resultSec.add(tmp);
	99	fillMarginalReconstructionSpecificNode(mynode);
	100	}
	101	}
	102
	103	void computeMarginalReconstruction::fillMarginalReconstructionSpecificNode(tree::nodeP mynode) {
	104	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	105	MDOUBLE bestP =-1.0;
	106	int bestChar = -1;
	107	for (int letter=0; letter < _spVec[0].alphabetSize(); ++letter) {
	108	if (_resultProb[pos][mynode->id()][letter] > bestP) {
	109	bestP = _resultProb[pos][mynode->id()][letter];
	110	bestChar = letter;
	111	}
	112	}
	113	_bestProb[pos][mynode->id()] = bestP;
	114
	115	// adding bestChar to the resulting sequence container.
	116	string res = _sc.getAlphabet()->fromInt(bestChar);
	117	int id = _resultSec.getId(mynode->name());
	118	_resultSec[id].addFromString(res);
	119	}
	120	}
	121
	122	void computeMarginalReconstruction::outputTheMarginalProbForEachCharForEachNode(const string& outputFileName) {
	123	ofstream out(outputFileName.c_str());
	124	for (int pos=0; pos<_sc.seqLen(); ++pos) {
	125	outputTheMarginalProbForEachCharForEachNodePos(out,pos);
	126	}
	127	out<<endl<<"++++++++++++++++++++++++ marginal probs +++++++++++++++++++++++++++++++"<<endl<<endl;
	128	out<<"node,site";
	129	for (int c=0; c < _spVec[0].alphabetSize(); ++c) {
	130	out<<","<<_sc.getAlphabet()->fromInt(c);
	131	}
	132	out<<endl;
	133	treeIterDownTopConst tIt(_et);
	134	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	135	if (mynode->isLeaf()) continue;
	136	for (int pos=0; pos<_sc.seqLen(); ++pos) {
	137	out<<mynode->name()<<","<<pos+1;
	138	for (int c=0; c < _spVec[0].alphabetSize(); ++c) {
	139	out<<","<<_resultProb[pos][mynode->id()][c];
	140	}
	141	out<<endl;
	142	}
	143	}
	144
	145
	146	out<<endl<<"++++++++++++++++++++++++ marginal log likelihood +++++++++++++++++++++++++++++++"<<endl<<endl;
	147	out<<"node,site";
	148	for (int c=0; c < _spVec[0].alphabetSize(); ++c) {
	149	out<<","<<_sc.getAlphabet()->fromInt(c);
	150	}
	151	out<<endl;
	152	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	153	if (mynode->isLeaf()) continue;
	154	for (int pos=0; pos<_sc.seqLen(); ++pos) {
	155	out<<mynode->name()<<","<<pos+1;
	156	for (int c=0; c < _spVec[0].alphabetSize(); ++c) {
	157	out<<","<<log(_resultProb[pos][mynode->id()][c]);
	158	}
	159	out<<endl;
	160	}
	161	}
	162	out.close();
	163	}
	164
	165	void computeMarginalReconstruction::outputTheMarginalProbForEachCharForEachNodePos(ostream& out,const int pos){//(DEFAULT = JPF, same file as above).
	166	treeIterDownTopConst tIt(_et);
	167	out<<"marginal probabilities at position: "<<pos+1<<endl;
	168	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	169	//if (mynode->isLeaf()) continue;
	170	out<<"of node: "<<mynode->name()<<": ";
	171	vector<pair< MDOUBLE,string> > pres;
	172	int c=0;
	173	for (c=0; c < _spVec[0].alphabetSize(); ++c) {
	174	pres.push_back(pair<MDOUBLE,string>(_resultProb[pos][mynode->id()][c],_sc.getAlphabet()->fromInt(c)));
	175	}
	176	sort(pres.begin(),pres.end());
	177	for (c=pres.size()-1; c >=0 ; --c) {
	178	if (pres[c].first<0.0001) continue;
	179	out<<"p("<<pres[c].second;
	180	out<<")="<<pres[c].first<<" ";
	181	}
	182	out<<endl;
	183	}
	184	out<<endl;
	185	}
	186

+39

-0

programs/fastml/computeMarginalReconstruction.h less more

	0	#ifndef ___COMPUTE_MARGINAL_RECONSTRUCTION
	1	#define ___COMPUTE_MARGINAL_RECONSTRUCTION
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "sequenceContainer.h"
	7	#include "suffStatComponent.h"
	8
	9	class computeMarginalReconstruction {
	10	public:
	11	explicit computeMarginalReconstruction(
	12	const tree& et,
	13	vector<stochasticProcess>& spVec,
	14	const sequenceContainer& sc);
	15
	16	void compute(const distribution * forceDistr);
	17	void outputTheMarginalProbForEachCharForEachNode(const string& outputFileName);
	18	sequenceContainer getResultingMarginalReconstruction() const {return _resultSec;}
	19	private:
	20	const tree& _et;
	21	vector<stochasticProcess>& _spVec;
	22	const sequenceContainer& _sc;
	23	sequenceContainer _resultSec;
	24
	25	// this will be the marginal for each node, for each pos, for each letter
	26	VVVdouble _resultProb; //_resultProb[pos][node][letter]
	27
	28	// this will be the marginal for each node, for each pos, of the best reconsturction.
	29	VVdouble _bestProb; //_resultProb[pos][node]
	30
	31	void fillResultProb(const suffStatGlobalGamPos& ssc,const stochasticProcess & sp,const tree& et, const int pos);
	32	void fillMarginalReconstruction();
	33	void fillMarginalReconstructionSpecificNode(tree::nodeP mynode);
	34	void outputTheMarginalProbForEachCharForEachNodePos(ostream& out,const int pos);
	35
	36	};
	37
	38	#endif

+361

-0

programs/fastml/fastml.cpp less more

	0	#include "mainbb.h"
	1	#include "logFile.h"
	2
	3
	4	int main(int argc, char* argv[]) {
	5	myLog::setLog("",10);
	6	mainbb mainbb1(argc,argv);
	7	return 0;
	8	}
	9
	10	/*
	11	//------------------------------------------------
	12
	13
	14	#include "bbAlg.h"
	15	#include "sequenceDataDiff.h"
	16	sequenceContainer main1(const string& seqFile,
	17	char format,
	18	const string& treeFile,
	19	const string& reportFileName,
	20	const string& ancestralSequencesFileName,
	21	const MDOUBLE alpha,
	22	const int categor,
	23	time_t& timeTaken,
	24	clock_t& ctimeTaken,
	25	const MDOUBLE recalculateExactVal); //0 never recalculate...
	26
	27	int veryMainLysSmallCheck() {// the non command line version for debugging and checking.
	28	const string seqFile = "C:\\tal\\seq\\lys6\\junk\\seqF1.txt";
	29	const string treeFile1 = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
	30	const string treeFile2 = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
	31	const string reportFileHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileHom.txt";
	32	const string reportFileGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileGam.txt";
	33	const string reportFileDiffAndTime = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileDif.txt";
	34	const string ancstralSeqGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqGam.txt";
	35	const string ancstralSeqHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqHom.txt";
	36	time_t time1;
	37	time_t time2;
	38	clock_t ctime1;
	39	clock_t ctime2;
	40
	41	sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.924884,4,time1,ctime1,0); // gam
	42	sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
	43	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	44	sequenceDataDiff1f.computeDifferences();
	45	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	46	sequenceDataDiff1f.printDiff(outdiff);
	47	outdiff.close();
	48	ofstream out;
	49	out.open(reportFileDiffAndTime.c_str(),ios::app);
	50	out<<" time taken for hom was: "<<time2<<endl;
	51	out<<" time taken for gam was: "<<time1<<endl;
	52	out.close();
	53	return 0;
	54	}
	55
	56	int veryMainLys() {// the non command line version for debugging and checking.
	57	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\lys71.ngap.mase";
	58	const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\treehom.txt";
	59	const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\treegam.txt";
	60	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileHom.txt";
	61	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileGam.txt";
	62	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileDif.txt";
	63	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\ancstralSeqGam.txt";
	64	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\ancstralSeqHom.txt";
	65	time_t time1;
	66	time_t time2;
	67	clock_t ctime1;
	68	clock_t ctime2;
	69	sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.924884,4,time1,ctime1,0); // gam
	70	sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
	71	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	72	sequenceDataDiff1f.computeDifferences();
	73	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	74	sequenceDataDiff1f.printDiff(outdiff);
	75	outdiff.close();
	76	ofstream out;
	77	out.open(reportFileDiffAndTime.c_str(),ios::app);
	78	out<<" time taken for hom was: "<<time2<<endl;
	79	out<<" time taken for gam was: "<<time1<<endl;
	80	out.close();
	81	return 0;
	82	}
	83
	84	int veryMainCo1() {// the non command line version for debugging and checking.
	85	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\co1.ngap.aln";
	86	const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\treehom.txt";
	87	const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\treegam.txt";
	88	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileHom.txt";
	89	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileGam.txt";
	90	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileDif.txt";
	91	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\ancstralSeqGam.txt";
	92	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\ancstralSeqHom.txt";
	93	time_t time1;
	94	time_t time2;
	95	clock_t ctime1;
	96	clock_t ctime2;
	97	sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileGam,ancstralSeqGam,0.257432,4,time1,ctime1,0); // gam
	98	sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
	99	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	100	sequenceDataDiff1f.computeDifferences();
	101	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	102	sequenceDataDiff1f.printDiff(outdiff);
	103	outdiff.close();
	104	ofstream out;
	105	out.open(reportFileDiffAndTime.c_str(),ios::app);
	106	out<<" time taken for hom was: "<<time2<<endl;
	107	out<<" time taken for gam was: "<<time1<<endl;
	108	out.close();
	109	return 0;
	110	}
	111
	112	int veryMainCo2() {// the non command line version for debugging and checking.
	113	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\co2ngap.aln";
	114	const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\treehom.txt";
	115	const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\treegam.txt";
	116	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileHom.txt";
	117	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileGam.txt";
	118	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileDif.txt";
	119	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\ancstralSeqGam.txt";
	120	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\ancstralSeqHom.txt";
	121	time_t time1;
	122	time_t time2;
	123	clock_t ctime1;
	124	clock_t ctime2;
	125	sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileGam,ancstralSeqGam,0.476490,4,time1,ctime1,0); // gam
	126	sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
	127	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	128	sequenceDataDiff1f.computeDifferences();
	129	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	130	sequenceDataDiff1f.printDiff(outdiff);
	131	outdiff.close();
	132	ofstream out;
	133	out.open(reportFileDiffAndTime.c_str(),ios::app);
	134	out<<" time taken for hom was: "<<time2<<endl;
	135	out<<" time taken for gam was: "<<time1<<endl;
	136	out.close();
	137	return 0;
	138	}
	139
	140	int veryMainOpsin() {// the non command line version for debugging and checking.
	141	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\opsin.mase";
	142	const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\treehom.txt";
	143	const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\treegam.txt";
	144	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileHom.txt";
	145	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileGam.txt";
	146	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileDif.txt";
	147	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\ancstralSeqGam.txt";
	148	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\ancstralSeqHom.txt";
	149	time_t time1;
	150	time_t time2;
	151	clock_t ctime1;
	152	clock_t ctime2;
	153	sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.331405,4,time1,ctime1,0); // gam
	154	sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
	155	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	156	sequenceDataDiff1f.computeDifferences();
	157	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	158	sequenceDataDiff1f.printDiff(outdiff);
	159	outdiff.close();
	160	ofstream out;
	161	out.open(reportFileDiffAndTime.c_str(),ios::app);
	162	out<<" time taken for hom was: "<<time2<<endl;
	163	out<<" time taken for gam was: "<<time1<<endl;
	164	out.close();
	165	return 0;
	166	}
	167
	168
	169	int veryMainSteroid() {// the non command line version for debugging and checking.
	170	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\noGaps.mase";
	171	const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\treehom.txt";
	172	const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\treegam.txt";
	173	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileHom.txt";
	174	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileGam.txt";
	175	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileDif.txt";
	176	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\ancstralSeqGam.txt";
	177	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\ancstralSeqHom.txt";
	178	time_t time1;
	179	time_t time2;
	180	sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,1.534586,4,time1,0); // gam
	181	sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,0); // hom
	182	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	183	sequenceDataDiff1f.computeDifferences();
	184	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	185	sequenceDataDiff1f.printDiff(outdiff);
	186	outdiff.close();
	187	ofstream out;
	188	out.open(reportFileDiffAndTime.c_str(),ios::app);
	189	out<<" time taken for hom was: "<<time2<<endl;
	190	out<<" time taken for gam was: "<<time1<<endl;
	191	out.close();
	192	return 0;
	193	}
	194
	195
	196	int veryMainSteroid() {// the non command line version for debugging and checking.
	197	const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\dataPreperation\\B4remGap\\ster73.snames.correct.ngap.aln";
	198	const string treeFile1 ="C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\buildingTree\\topologyHom.ph";
	199	const string treeFile2 ="C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\buildingTree\\topologyGam.ph";
	200
	201
	202
	203	const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileHom.txt";
	204	const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileGam.txt";
	205	const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileDif.txt";
	206	const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NancstralSeqGam.txt";
	207	const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NancstralSeqHom.txt";
	208	time_t time1;
	209	time_t time2;
	210	clock_t ctime1;
	211	clock_t ctime2;
	212	sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileHom,ancstralSeqHom,-600,1,time1,ctime1,0); // hom
	213	sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileGam,ancstralSeqGam,1.29,4,time2,ctime2,0); // gam
	214	sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
	215	sequenceDataDiff1f.computeDifferences();
	216	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	217	sequenceDataDiff1f.printDiff(outdiff);
	218	outdiff.close();
	219	ofstream out;
	220	out.open(reportFileDiffAndTime.c_str(),ios::app);
	221	out<<" time taken for hom was: "<<time1<<endl;
	222	out<<" time taken for gam was: "<<time2<<endl;
	223	out<<" ctime taken for hom was: "<<ctime1<<endl;
	224	out<<" ctime taken for gam was: "<<ctime2<<endl;
	225	out.close();
	226	return 0;
	227	}
	228
	229	MDOUBLE totalBranchLengh(const tree& t1) {
	230	MDOUBLE sum=0;
	231	vector<tree::nodeP> vec;
	232	t1.getAllNodes(vec,t1.getRoot());
	233	for (int i=0; i< vec.size(); ++i) {
	234	if (vec[i]->father != NULL) sum += vec[i]->dis2father();
	235	cerr<<sum<<" "<<vec[i]->dis2father()<<endl;
	236	}
	237	return sum;
	238	}
	239
	240
	241
	242
	243	*/
	244	#include "sequenceDataDiff.h"
	245	#include "amino.h"
	246	#include <ctime>
	247	#include "recognizeFormat.h"
	248	#include "uniDistribution.h"
	249	#include "gammaDistribution.h"
	250	#include "replacementModel.h"
	251	#include "readDatMatrix.h"
	252	#include "chebyshevAccelerator.h"
	253	#include "bbAlg.h"
	254	/*
	255	sequenceContainer main1(const string& seqFile,
	256	char format,
	257	const string& treeFile,
	258	const string& reportFileName,
	259	const string& ancestralSequencesFileName,
	260	const MDOUBLE alpha,
	261	const int categor,
	262	time_t& timeTaken,
	263	clock_t& ctimeTaken,
	264	const MDOUBLE recalculateExactVal) { // gamma distribution
	265
	266	alphabet* _alph = new amino;
	267	ifstream f(seqFile.c_str());
	268	sequenceContainer original = recognizeFormat::read(f,_alph);;
	269	tree t1(treeFile); // with sequence data
	270	// t1.multipleAllBranchesByFactor(10);
	271	// stochastic process:
	272
	273	// cerr<<" total br-len is:"<<totalBranchLengh(t1)<<endl;
	274	// return *sd;
	275
	276
	277	distribution *dist1 = NULL;
	278	if (categor ==1 ) dist1 = new uniDistribution;
	279	else dist1 = new gammaDistribution(alpha,categor);
	280
	281	replacementModel *probMod=new pupAll(datMatrixHolder::jones);
	282	pijAccelerator *pijAcc1 = new chebyshevAccelerator(probMod);
	283
	284	// replacementModel *probMod1=new nucJC;
	285	// replacementModel *probMod1=new pupJTT;
	286	// pijAccelerator *pijAcc1= new chebyshevAccelerator(probMod1);
	287	// pijAccelerator *pijAcc1= new trivialAccelerator(probMod1);
	288	stochasticProcess* _s1 = new stochasticProcess(dist1, pijAcc1);
	289	bbAlg bbAlg1(t1,*_s1,original,bbAlg::both,reportFileName,recalculateExactVal);//computeAgainExactTreshold
	290	// bbAlg bbAlg1(&t1,_s1,bbAlg::sum,0);//computeAgainExactTreshold
	291	// bbAlg bbAlg1(&t1,_s1,bbAlg::max,0);//computeAgainExactTreshold
	292	time_t time1,time2;
	293	clock_t ctime1, ctime2;
	294	time(&time1);
	295	ctime1 = clock();
	296	cerr<<"starting time is: "<<time1<<endl;
	297	cerr<<"starting clock is: "<<ctime1<<endl;
	298	MDOUBLE res = bbAlg1.bbReconstructAllPositions(original);
	299	time(&time2);
	300	ctime2 = clock();
	301	cerr<<"ending time is: "<<time2<<endl;
	302	cerr<<"ending clock is: "<<ctime2<<endl;
	303	timeTaken=time2-time1;
	304	ctimeTaken=ctime2-ctime1;
	305
	306	ofstream outi;
	307	outi.open(reportFileName.c_str(),ios::app);
	308	outi<<" the likelihood of the reconstruction is:"<<res<<endl;
	309	outi.close();
	310	sequenceContainer recS= bbAlg1.fromAncestralSequenceToSeqData();
	311
	312	delete pijAcc1;
	313	delete dist1;
	314	return recS;
	315	}
	316	*/
	317	/*
	318	int mainNoCommandLine() {
	319
	320	// veryMainLysSmallCheck(); // just to check that everything is working...
	321	// veryMainLys();
	322	// veryMainCo1();
	323	// veryMainCo2();
	324	// veryMainOpsin();
	325	veryMainSteroid();
	326	return 0;
	327	}
	328	// const string seqFile = "C:\\tal\\seq\\lys6\\junk\\seq.txt";
	329	// const string treeFile = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
	330	// const string seqFile = "C:\\tal\\seq\\lys6\\seq.txt";
	331	// const string treeFile = "C:\\tal\\seq\\lys6\\tree.txt";
	332	// main1(seqFile,treeFile,-3,1,time1);// hom
	333
	334	*/
	335
	336	//int main() {
	337	int FindDifferencesBetween2SequenceContainerFiles() {
	338	const string seqFile1 = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\seq_joint.txt";
	339	const string seqFile2 = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\seq_marginal.txt";
	340	const string reportFileDiffAndTime = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\reportFileDif.txt";
	341
	342	alphabet* _alph = new amino;
	343	ifstream f(seqFile1.c_str());
	344	sequenceContainer sd1 = recognizeFormat::read(f,_alph);
	345	f.close();
	346
	347	ifstream f2(seqFile2.c_str());
	348	sequenceContainer sd2 = recognizeFormat::read(f2,_alph);
	349	f2.close();
	350
	351	sequenceDataDiff sequenceDataDiff1f(sd1,sd2);
	352	sequenceDataDiff1f.computeDifferences();
	353	ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
	354	sequenceDataDiff1f.printDiff(outdiff);
	355	outdiff.close();
	356	ofstream out;
	357	out.open(reportFileDiffAndTime.c_str(),ios::app);
	358	out.close();
	359	return 0;
	360	}⏎

+201

-0

programs/fastml/fastml.vcproj less more

	0	<?xml version="1.0" encoding="windows-1255"?>
	1	<VisualStudioProject
	2	ProjectType="Visual C++"
	3	Version="7.10"
	4	Name="fastml"
	5	ProjectGUID="{B3965C03-9119-4F3D-BD47-7E30CC30D2FB}"
	6	RootNamespace="fastml"
	7	Keyword="Win32Proj">
	8	<Platforms>
	9	<Platform
	10	Name="Win32"/>
	11	</Platforms>
	12	<Configurations>
	13	<Configuration
	14	Name="Debug\|Win32"
	15	OutputDirectory="Debug"
	16	IntermediateDirectory="Debug"
	17	ConfigurationType="1"
	18	CharacterSet="2">
	19	<Tool
	20	Name="VCCLCompilerTool"
	21	Optimization="0"
	22	AdditionalIncludeDirectories="..\..\libs\phylogeny\"
	23	PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
	24	MinimalRebuild="TRUE"
	25	BasicRuntimeChecks="3"
	26	RuntimeLibrary="5"
	27	UsePrecompiledHeader="0"
	28	WarningLevel="3"
	29	Detect64BitPortabilityProblems="TRUE"
	30	DebugInformationFormat="4"/>
	31	<Tool
	32	Name="VCCustomBuildTool"/>
	33	<Tool
	34	Name="VCLinkerTool"
	35	OutputFile="$(OutDir)/fastml.exe"
	36	LinkIncremental="2"
	37	GenerateDebugInformation="TRUE"
	38	ProgramDatabaseFile="$(OutDir)/fastml.pdb"
	39	SubSystem="1"
	40	TargetMachine="1"/>
	41	<Tool
	42	Name="VCMIDLTool"/>
	43	<Tool
	44	Name="VCPostBuildEventTool"/>
	45	<Tool
	46	Name="VCPreBuildEventTool"/>
	47	<Tool
	48	Name="VCPreLinkEventTool"/>
	49	<Tool
	50	Name="VCResourceCompilerTool"/>
	51	<Tool
	52	Name="VCWebServiceProxyGeneratorTool"/>
	53	<Tool
	54	Name="VCXMLDataGeneratorTool"/>
	55	<Tool
	56	Name="VCWebDeploymentTool"/>
	57	<Tool
	58	Name="VCManagedWrapperGeneratorTool"/>
	59	<Tool
	60	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	61	</Configuration>
	62	<Configuration
	63	Name="Release\|Win32"
	64	OutputDirectory="Release"
	65	IntermediateDirectory="Release"
	66	ConfigurationType="1"
	67	CharacterSet="2">
	68	<Tool
	69	Name="VCCLCompilerTool"
	70	AdditionalIncludeDirectories="..\..\libs\phylogeny\"
	71	PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
	72	RuntimeLibrary="4"
	73	UsePrecompiledHeader="0"
	74	WarningLevel="3"
	75	Detect64BitPortabilityProblems="TRUE"
	76	DebugInformationFormat="3"/>
	77	<Tool
	78	Name="VCCustomBuildTool"/>
	79	<Tool
	80	Name="VCLinkerTool"
	81	OutputFile="$(OutDir)/fastml.exe"
	82	LinkIncremental="1"
	83	GenerateDebugInformation="TRUE"
	84	SubSystem="1"
	85	OptimizeReferences="2"
	86	EnableCOMDATFolding="2"
	87	TargetMachine="1"/>
	88	<Tool
	89	Name="VCMIDLTool"/>
	90	<Tool
	91	Name="VCPostBuildEventTool"/>
	92	<Tool
	93	Name="VCPreBuildEventTool"/>
	94	<Tool
	95	Name="VCPreLinkEventTool"/>
	96	<Tool
	97	Name="VCResourceCompilerTool"/>
	98	<Tool
	99	Name="VCWebServiceProxyGeneratorTool"/>
	100	<Tool
	101	Name="VCXMLDataGeneratorTool"/>
	102	<Tool
	103	Name="VCWebDeploymentTool"/>
	104	<Tool
	105	Name="VCManagedWrapperGeneratorTool"/>
	106	<Tool
	107	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	108	</Configuration>
	109	</Configurations>
	110	<References>
	111	</References>
	112	<Files>
	113	<File
	114	RelativePath=".\bb_options.cpp">
	115	</File>
	116	<File
	117	RelativePath=".\bb_options.h">
	118	</File>
	119	<File
	120	RelativePath=".\bb_options_list.h">
	121	</File>
	122	<File
	123	RelativePath=".\bbAlg.cpp">
	124	</File>
	125	<File
	126	RelativePath=".\bbAlg.h">
	127	</File>
	128	<File
	129	RelativePath=".\bbComputeDownAlg.cpp">
	130	</File>
	131	<File
	132	RelativePath=".\bbComputeDownAlg.h">
	133	</File>
	134	<File
	135	RelativePath=".\bbComputeUpAlg.cpp">
	136	</File>
	137	<File
	138	RelativePath=".\bbComputeUpAlg.h">
	139	</File>
	140	<File
	141	RelativePath=".\bbEvaluateSpecificAV.cpp">
	142	</File>
	143	<File
	144	RelativePath=".\bbEvaluateSpecificAV.h">
	145	</File>
	146	<File
	147	RelativePath=".\bbfindBestAVDynProg.cpp">
	148	</File>
	149	<File
	150	RelativePath=".\bbfindBestAVDynProg.h">
	151	</File>
	152	<File
	153	RelativePath=".\bbNodeOrderAlg.cpp">
	154	</File>
	155	<File
	156	RelativePath=".\bbNodeOrderAlg.h">
	157	</File>
	158	<File
	159	RelativePath=".\bbReport.cpp">
	160	</File>
	161	<File
	162	RelativePath=".\bbReport.h">
	163	</File>
	164	<File
	165	RelativePath=".\computeMarginalReconstruction.cpp">
	166	</File>
	167	<File
	168	RelativePath=".\computeMarginalReconstruction.h">
	169	</File>
	170	<File
	171	RelativePath=".\fastml.cpp">
	172	</File>
	173	<File
	174	RelativePath=".\jointNoGamma.cpp">
	175	</File>
	176	<File
	177	RelativePath=".\jointNoGamma.h">
	178	</File>
	179	<File
	180	RelativePath=".\mainbb.cpp">
	181	</File>
	182	<File
	183	RelativePath=".\mainbb.h">
	184	</File>
	185	<File
	186	RelativePath=".\sequenceDataDiff.cpp">
	187	</File>
	188	<File
	189	RelativePath=".\sequenceDataDiff.h">
	190	</File>
	191	<File
	192	RelativePath=".\suffStatComponentJointNoGamma.cpp">
	193	</File>
	194	<File
	195	RelativePath=".\suffStatComponentJointNoGamma.h">
	196	</File>
	197	</Files>
	198	<Globals>
	199	</Globals>
	200	</VisualStudioProject>

+153

-0

programs/fastml/jointNoGamma.cpp less more

	0	#include "jointNoGamma.h"
	1	#include "treeIt.h"
	2	#include "seqContainerTreeMap.h"
	3	#include <fstream>
	4	#include <cmath>
	5	using namespace std;
	6
	7	jointNoGamma::jointNoGamma(const tree& et,
	8	const stochasticProcess& sp,
	9	const sequenceContainer& sc)
	10	: _et(et), _sp(sp), _sc(sc) {
	11	_cpih.fillPij(_et,_sp);
	12	}
	13
	14	void jointNoGamma::compute() {
	15
	16	suffStatGlobalHomPos ssc;
	17	suffStatGlobalHomPosJointNoGamma sscJointNoGam;
	18	ssc.allocatePlace(_et.getNodesNum(),_sc.alphabetSize());
	19	sscJointNoGam.allocatePlace(_et.getNodesNum(),_sc.alphabetSize());
	20
	21	vector<string> ancestralSequences(_et.getNodesNum());
	22	MDOUBLE totalLikelihoodOfReconstruction = 0;
	23	cout<<"doing position (joint): ";
	24	for (int pos=0; pos<_sc.seqLen(); ++pos) {
	25	cout<<pos+1<<" ";
	26	fillComputeUp(pos,ssc,sscJointNoGam);
	27	doubleRep likelihoodOfPos = 0;
	28
	29	vector<int> res =computeJointAncestralFromSSC(pos,ssc,sscJointNoGam,likelihoodOfPos);
	30	treeIterDownTopConst tIt(_et);
	31	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	32	if (mynode->isInternal()) {
	33	ancestralSequences[mynode->id()]+=_sc.getAlphabet()->fromInt(res[mynode->id()]);
	34	}
	35	}
	36	_jointLikelihoodOfPositions.push_back(likelihoodOfPos);
	37	}
	38	cout<<endl;
	39	fromJointReconstructionToSequenceContainer(ancestralSequences);
	40	}
	41
	42	void jointNoGamma::fillComputeUp(const int pos,
	43	suffStatGlobalHomPos& ssc,
	44	suffStatGlobalHomPosJointNoGamma& sscJointNoGam) {
	45	seqContainerTreeMap sctm(_sc,_et);
	46	ssc.allocatePlace(_et.getNodesNum(),_cpih.alphabetSize());
	47	treeIterDownTopConst tIt(_et);
	48	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	49	if (mynode->isLeaf()) {// leaf
	50	for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) {
	51	const int seqID = sctm.seqIdOfNodeI(mynode->id());
	52	MDOUBLE totalVal = 0.0;
	53	for (int let=0; let<_cpih.alphabetSize();let++) {
	54	MDOUBLE val = _sc.getAlphabet()->relations(_sc[seqID][pos],let);
	55	if (val>0) {
	56	val*=_cpih.getPij(mynode->id(),letterInFather,let);
	57	totalVal +=val;
	58	}
	59	}
	60	//cerr<<"val =" << val <<" "; // REMOVE!
	61	//cerr<<"_pi->data(mynode->id(),pos)= "<<_pi->data(mynode->id(),pos)<<" ";//REMOVE
	62	ssc.set(mynode->id(),letterInFather,totalVal);
	63	sscJointNoGam.set(mynode->id(),letterInFather,_sc[seqID][pos]);
	64	}
	65	}
	66	else {
	67	for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) {
	68	doubleRep maxProb=0.0;
	69	int bestLet = -1;
	70	for (int let=0; let<_cpih.alphabetSize();++let) {
	71	doubleRep tmpProb = 1;
	72	if (mynode->isRoot() == false) {
	73	tmpProb *= _cpih.getPij(mynode->id(),letterInFather,let);
	74	}
	75	for(int i=0; i < mynode->getNumberOfSons();++i){
	76	tmpProb *= ssc.get(mynode->getSon(i)->id(),let);
	77	}
	78	if (tmpProb>maxProb) {
	79	maxProb = tmpProb;
	80	bestLet = let;
	81	}
	82	}
	83	ssc.set(mynode->id(),letterInFather,maxProb);
	84	assert(bestLet>=0);
	85	assert(bestLet<_cpih.alphabetSize());
	86
	87	sscJointNoGam.set(mynode->id(),letterInFather,bestLet);
	88	if (mynode->isRoot()) break; // there's no meening to letterInFather in case of root.
	89	}
	90	}
	91	}
	92	}
	93
	94	vector<int> jointNoGamma::computeJointAncestralFromSSC(
	95	const int pos,
	96	const suffStatGlobalHomPos& ssc,
	97	const suffStatGlobalHomPosJointNoGamma& sscFASTML,
	98	doubleRep & likelihoodOfReconstruction) {
	99	treeIterTopDownConst tIt(_et);
	100	vector<int> res(_et.getNodesNum());
	101	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	102	if (mynode->isRoot() == false) {
	103	int letterInFather = res[mynode->father()->id()];
	104	int tmp = sscFASTML.get(mynode->id(),letterInFather);
	105	res[mynode->id()] = tmp;
	106	} else {//special case of the root
	107	MDOUBLE maxL = VERYSMALL;
	108	int bestCharInRoot = sscFASTML.get(mynode->id(),0);
	109	likelihoodOfReconstruction = ssc.get(mynode->id(),0)*_sp.freq(bestCharInRoot);;
	110	res[mynode->id()] = bestCharInRoot;
	111	}
	112	}
	113	return res;
	114	}
	115
	116	void jointNoGamma::fromJointReconstructionToSequenceContainer(const vector<string> & ancestralSequences){
	117	_resultSec = _sc;
	118	treeIterDownTopConst tIt2(_et);
	119	for (tree::nodeP mynode = tIt2.first(); mynode != tIt2.end(); mynode = tIt2.next()) {
	120	if (mynode->isInternal()) {
	121	sequence tmp(ancestralSequences[mynode->id()],mynode->name(),"joint reconstruction",_resultSec.numberOfSeqs(),_sc.getAlphabet());
	122	_resultSec.add(tmp);
	123	}
	124	}
	125	}
	126
	127	void jointNoGamma::outputTheJointProbAtEachSite(const string & outputFileProbJoint) {
	128	ofstream jointProbOutput(outputFileProbJoint.c_str());
	129	MDOUBLE totalLogLikelihood =0;
	130	for (int j=0; j < _jointLikelihoodOfPositions.size(); ++j) {
	131	totalLogLikelihood+=log(_jointLikelihoodOfPositions[j]);
	132	jointProbOutput<<"Joint log likelihood of position "<<j+1;// j+1 so that positions start from 1, and not from 0.
	133	jointProbOutput<<": "<<log(_jointLikelihoodOfPositions[j])<<endl;
	134	}
	135	jointProbOutput<<"total log likelihood of joint reconstruction: "<<totalLogLikelihood<<endl;
	136
	137	jointProbOutput<<endl<<"++++++++++++++++++++++++ joing log likelihood +++++++++++++++++++++++++++++++"<<endl<<endl;
	138	for (int j=0; j < _jointLikelihoodOfPositions.size(); ++j) {
	139	jointProbOutput<<j+1<<",";// j+1 so that positions start from 1, and not from 0.
	140	jointProbOutput<<log(_jointLikelihoodOfPositions[j])<<endl;
	141	}
	142
	143	jointProbOutput<<endl<<"++++++++++++++++++++++++ joint probs +++++++++++++++++++++++++++++++"<<endl<<endl;
	144	for (int j=0; j < _jointLikelihoodOfPositions.size(); ++j) {
	145	jointProbOutput<<j+1<<",";// j+1 so that positions start from 1, and not from 0.
	146	jointProbOutput<<_jointLikelihoodOfPositions[j]<<endl;
	147	}
	148
	149	jointProbOutput.close();
	150	}
	151
	152

+44

-0

programs/fastml/jointNoGamma.h less more

	0	#ifndef ___JOINT_NO_GAMMA
	1	#define ___JOINT_NO_GAMMA
	2
	3	#include "definitions.h"
	4	#include "tree.h"
	5	#include "stochasticProcess.h"
	6	#include "sequenceContainer.h"
	7	#include "computePijComponent.h"
	8	#include "suffStatComponent.h"
	9	#include "suffStatComponentJointNoGamma.h"
	10
	11	class jointNoGamma {
	12	public:
	13	explicit jointNoGamma(
	14	const tree& et,
	15	const stochasticProcess& sp,
	16	const sequenceContainer& sc);
	17
	18	void compute();
	19	void outputTheJointProbAtEachSite(const string & outputFileProbJoint);
	20	sequenceContainer getTheJointReconstruction() const {return _resultSec;}
	21
	22	private:
	23	void fillComputeUp(const int pos,
	24	suffStatGlobalHomPos& ssc,
	25	suffStatGlobalHomPosJointNoGamma& sscJointNoGam);
	26	vector<int> computeJointAncestralFromSSC(
	27	const int pos,
	28	const suffStatGlobalHomPos& ssc,
	29	const suffStatGlobalHomPosJointNoGamma& sscFASTML,
	30	doubleRep & likelihoodOfReconstruction);
	31	void fromJointReconstructionToSequenceContainer(const vector<string> & ancestralSequences);
	32
	33	const tree& _et;
	34	const stochasticProcess& _sp;
	35	const sequenceContainer& _sc;
	36	sequenceContainer _resultSec;
	37	computePijHom _cpih;
	38	vector<doubleRep> _jointLikelihoodOfPositions;
	39	};
	40
	41
	42
	43	#endif

+580

-0

programs/fastml/mainbb.cpp less more

	0	#include "mainbb.h"
	1
	2	#include "aaJC.h"
	3	#include "amino.h"
	4	#include "bbAlg.h"
	5	#include "bestAlpha.h"
	6	#include "bblEM.h"
	7	#include "chebyshevAccelerator.h"
	8	#include "clustalFormat.h"
	9	#include "computeMarginalReconstruction.h"
	10	#include "distanceTable.h"
	11	#include "fastaFormat.h"
	12	#include "gammaDistribution.h"
	13	#include "jointNoGamma.h"
	14	#include "likeDist.h"
	15	#include "logFile.h"
	16	#include "maseFormat.h"
	17	#include "molphyFormat.h"
	18	#include "nexusFormat.h"
	19	#include "nucleotide.h"
	20	#include "nucJC.h"
	21	#include "nj.h"
	22	#include "tamura92.h"
	23	#include "gtrModel.h"
	24	#include "hky.h"
	25	#include "phylipFormat.h"
	26	#include "readDatMatrix.h"
	27	#include "recognizeFormat.h"
	28	#include "trivialAccelerator.h"
	29	#include "uniDistribution.h"
	30	#include "bestGtrModelParams.h"
	31	#include "bestTamura92param.h"
	32	#include "bestHKYparam.h"
	33
	34	//For the codon part
	35	#include "bestAlphaAndK.h"
	36	#include "codonUtils.h"
	37
	38
	39	#include <fstream>
	40	#include <iostream>
	41	using namespace std;
	42
	43	mainbb::mainbb(int argc, char* argv[]) {
	44	fillOptionsParameters(argc,argv);
	45	myLog::setLog(_options->reportFile,10);
	46	printBBProjectInfo();
	47	printSearchParameters();
	48	getStartingSequenceData();
	49	getStartingStochasticProcess();
	50	getStartingEvolTreeTopology();
	51	//_et.rootToUnrootedTree();
	52	//_et.createFlatLengthMatrix(0.001); // TO BE USED FOR TESTING ONLY.
	53	if (_options->modelName == bb_options::nyCodon)
	54	getStartingBLAndModelParam(); //for NY codon Models
	55	else
	56	getStartingBranchLengthsAndAlpha();
	57	printOutputTree();
	58	if (_options->doJoint) {
	59	if (_options->distributionName == bb_options::gam) {
	60	findAncestralSequencesGammaJoint();
	61	} else {
	62	findAncestralSequencesHomJoint();
	63	}
	64	}
	65	getMarginalReconstruction();
	66	myLog::endLog();
	67	}
	68
	69	void mainbb::printAncestralSequencesGammaJoint() {
	70	replaceSequences(_resulutingJointReconstruction,_originSc);
	71	ofstream out(_options->outFile_seq_joint.c_str());
	72	// out<<"sequences of the joint reconstruction, model: "<<_options->modelNameStr()<<endl;
	73	switch (_options->seqOutputFormat){
	74	case (bb_options::mase) : maseFormat::write(out,_resulutingJointReconstruction); break;
	75	case (bb_options::fasta) : fastaFormat::write(out,_resulutingJointReconstruction); break;
	76	case (bb_options::clustal): clustalFormat::write(out,_resulutingJointReconstruction); break;
	77	case (bb_options::phylip) : phylipFormat::write(out,_resulutingJointReconstruction); break;
	78	case (bb_options::molphy) : molphyFormat::write(out,_resulutingJointReconstruction); break;
	79	case (bb_options::nexus) : nexusFormat::write(out,_resulutingJointReconstruction); break;
	80	}
	81	out.close();
	82	}
	83
	84	mainbb::~mainbb() {
	85	if (_alph) delete _alph;
	86	if (_options) delete _options;
	87	}
	88
	89	void mainbb::getStartingEvolTreeTopology(){
	90	if (_options->treefile=="") {
	91	getStartingNJtreeNjMLdis();
	92	}
	93	else getStartingTreeFromTreeFile();
	94	}
	95
	96
	97
	98	void mainbb::getStartingNJtreeNjMLdis() {
	99	// note that here ALWAYS, the ML distances are computed using
	100	// an homogenous rate distribution.
	101	uniDistribution lUni;
	102	// const pijAccelerator* lpijAcc = _sp->getPijAccelerator();// note this is just a copy of the pointer.
	103	const pijAccelerator* lpijAcc = _spVec[0].getPijAccelerator();// note this is just a copy of the pointer.
	104	stochasticProcess lsp(&lUni,lpijAcc);
	105
	106	likeDist pd1(lsp,0.01);
	107	VVdouble disTab;
	108	vector<string> vNames;
	109	giveDistanceTable(&pd1,
	110	_sc,
	111	disTab,
	112	vNames);
	113	getStartingTreeNJ_fromDistances(disTab,vNames);
	114	}
	115
	116	void mainbb::getStartingTreeNJ_fromDistances(const VVdouble& disTab,
	117	const vector<string>& vNames) {
	118	NJalg nj1;
	119	_et= nj1.computeTree(disTab,vNames);
	120
	121	}
	122
	123	void mainbb::getStartingTreeFromTreeFile(){
	124	_et= tree(_options->treefile);
	125	if (!_et.withBranchLength()) {
	126	_et.createFlatLengthMatrix(0.05);
	127	_options->optimizeBrLenOnStartingTree = true;
	128	}
	129	}
	130
	131	void mainbb::getStartingBranchLengthsAndAlpha(){
	132	if (_options->distributionName == bb_options::hom) {
	133	if (_options->optimizeBrLenOnStartingTree == true) {
	134	cout<<"Optimizing branch lengths & Model parametrs (Homogenuos model)..."<<endl;
	135	if (_options->modelName ==bb_options::hky){
	136	bestHkyParamAndBBL bestHkyParamAndBBL1(_et,_sc,_spVec[0],NULL);
	137	cout<<"Optimized HKY model & bb"<<"like = "<<bestHkyParamAndBBL1.getBestL()<<endl;
	138	}
	139	else if (_options->modelName == bb_options::tamura92){
	140	bestTamura92ParamAndBBL bestTamura92ParamAndBBL1(_et,_sc,_spVec[0],NULL);
	141	cout<<"Optimized tamura92 model & bb"<<endl;
	142	}
	143	else if (_options->modelName == bb_options::nucgtr){
	144	bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,true,false);
	145	cout<<"Optimized nucgtr model & bb"<<endl;
	146	}
	147	else {
	148	cout<<"No models parametrs need to be optimaized"<<endl;
	149	bblEM bblem1(_et,_sc,_spVec[0],NULL);
	150	}
	151	//bblEM bblem1(_et,_sc,*_sp,NULL);
	152	//brLenOptEM::optimizeBranchLength1G_EM(_et,_sc,*_sp,NULL);
	153	}
	154	else // optimize only models parametrs if is needed
	155	{
	156	cout<<"Optimizing Model parametrs no branch lengths (Homogenuos model)..."<<endl;
	157	if (_options->modelName ==bb_options::hky){
	158	bestHkyParamFixedTree bestHkyParamFixedTree1(_et,_sc,_spVec[0],NULL);
	159	cout<<"Optimized HKY model"<<endl;
	160	}
	161	else if (_options->modelName == bb_options::tamura92){
	162	bestTamura92ParamFixedTree bestTamura92ParamFixedTree1(_et,_sc,_spVec[0],NULL);
	163	cout<<"Optimized tamura92 model"<<endl;
	164	}
	165	else if (_options->modelName == bb_options::nucgtr){
	166	bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,false,false); // 2nd last parameter : const bool optimizeTree = false
	167	cout<<"Optimized nucgtr model"<<endl;
	168	}
	169	else{
	170	cout<<"No models parametrs need to be optimaized"<<endl;
	171	}
	172	}
	173	}
	174	else { // GAMMA MODEL!
	175	// Here we want to optimize branch lengths with a gamma model.
	176	// there are three options:
	177	//(1) User provides the alpha and no bbl.
	178	//(2) User provides the alpha and bbl
	179	//(3) Alpha is optimized from the data and bbl.
	180
	181
	182	// User provides the alpha and bbl
	183	if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) {
	184	cout<<"Optimizing branch lengths (Gamma model, user alpha)..."<<endl;
	185	MDOUBLE intitalAlpha = _options->gammaPar;
	186	static_cast<gammaDistribution*>(_spVec[0].distr())->setAlpha(intitalAlpha);
	187	if (_options->modelName ==bb_options::hky){
	188	bestHkyParamAndBBL bestHkyParamAndBBL1(_et,_sc,_spVec[0],NULL);
	189	cout<<"Optimized HKY model & bb"<<endl;
	190	}
	191	else if (_options->modelName == bb_options::tamura92){
	192	bestTamura92ParamAndBBL bestTamura92ParamAndBBL1(_et,_sc,_spVec[0],NULL);
	193	cout<<"Optimized tamura92 model & bb"<<endl;
	194	}
	195	else if (_options->modelName == bb_options::nucgtr){
	196	bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,true,false);
	197	cout<<"Optimized nucgtr model & bb"<<endl;
	198	}
	199	else {
	200	cout<<"No models parametrs were needed to be optimaized"<<endl;
	201	bblEM bblem1(_et,_sc,_spVec[0],NULL);
	202	}
	203
	204	}
	205	else if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) { // User provides the alpha and no bbl.
	206	cout<<"No Optimizing branch lengths (Gamma model, user alpha)..."<<endl;
	207	if (_options->modelName ==bb_options::hky){
	208	bestHkyParamFixedTree bestHkyParamFixedTree1(_et,_sc,_spVec[0],NULL);
	209	cout<<"Optimized HKY model"<<endl;
	210	}
	211	else if (_options->modelName == bb_options::tamura92){
	212	bestTamura92ParamFixedTree bestTamura92ParamFixedTree1(_et,_sc,_spVec[0],NULL);
	213	cout<<"Optimized tamura92 model"<<endl;
	214	}
	215	else if (_options->modelName == bb_options::nucgtr){
	216	bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,false,false); // 2nd last parameter : const bool optimizeTree = false
	217	cout<<"Optimized nucgtr model"<<endl;
	218	}
	219	else{
	220	cout<<"No models parametrs were needed to be optimaized"<<endl;
	221	return;
	222	}
	223	}
	224	else if (_options->userProvideAlpha == false) { //Alpha is optimized from the data and bbl.
	225	cout<<"Optimizing branch lengths and alpha (Gamma model) ..."<<endl;
	226	if (_options->modelName ==bb_options::hky){
	227	bestHkyParamAlphaAndBBL bestHkyParamAlphaAndBBL1(_et,_sc,_spVec[0],NULL);
	228	cout<<"Optimized HKY model & bbl & alpha"<<endl;
	229	}
	230	else if (_options->modelName == bb_options::tamura92){
	231	bestTamura92ParamAlphaAndBBL bestTamura92ParamAlphaAndBBL1(_et,_sc,_spVec[0],NULL);
	232	cout<<"Optimized tamura92 model & bbl & alpha"<<endl;
	233	}
	234	else if (_options->modelName == bb_options::nucgtr){
	235	bestGtrModel bestGtrModel1(_et,_sc,_spVec[0]);
	236	cout<<"Optimized nucgtr model & bbl & alpha"<<endl;
	237	}
	238	else {
	239	bestAlphaAndBBL bbl2(_et,_sc,_spVec[0]);
	240	cout<<"Optimized bbl & alpha no model parametrs need to be optimaized"<<endl;
	241	}
	242	}
	243	}
	244	}
	245
	246	void mainbb::getStartingStochasticProcess() {
	247	int numberOfCategories = _options->gammaCategies;
	248	MDOUBLE alpha = _options->gammaPar;
	249	if (_options->distributionName == bb_options::hom) {
	250	numberOfCategories = 1; // forcing homogenous model.
	251	alpha = 1.0;
	252	cout<<"Using homogenous model (no among site rate variation)"<<endl;
	253	} else {
	254	cout<<"Using a Gamma model with: "<<numberOfCategories<<" discrete categories "<<endl;
	255	}
	256	distribution *dist = new gammaDistribution(alpha,numberOfCategories);
	257	replacementModel *probMod=NULL;
	258	pijAccelerator *pijAcc=NULL;
	259	MDOUBLE initTrTv = 1;
	260	MDOUBLE initTamura92Theta = 0.5;
	261	switch (_options->modelName){
	262	case (bb_options::day):
	263	probMod=new pupAll(datMatrixHolder::dayhoff);
	264	if (_options->useChebyshev == true) {
	265	pijAcc = new chebyshevAccelerator(probMod);
	266	} else {
	267	pijAcc = new trivialAccelerator(probMod);
	268	}
	269	cout<<"Amino acid replacement matrix is Dayhoff"<<endl;
	270	break;
	271	case (bb_options::jtt):
	272	probMod=new pupAll(datMatrixHolder::jones);
	273	if (_options->useChebyshev == true) {
	274	pijAcc = new chebyshevAccelerator(probMod);
	275	} else {
	276	pijAcc = new trivialAccelerator(probMod);
	277	}
	278	cout<<"Amino acid replacement matrix is JTT"<<endl;
	279	break;
	280	case (bb_options::lg):
	281	probMod=new pupAll(datMatrixHolder::lg);
	282	if (_options->useChebyshev == true) {
	283	pijAcc = new chebyshevAccelerator(probMod);
	284	} else {
	285	pijAcc = new trivialAccelerator(probMod);
	286	}
	287	cout<<"Amino acid replacement matrix is LG"<<endl;
	288	break;
	289	case (bb_options::rev):
	290	probMod=new pupAll(datMatrixHolder::mtREV24);
	291	if (_options->useChebyshev == true) {
	292	pijAcc = new chebyshevAccelerator(probMod);
	293	} else {
	294	pijAcc = new trivialAccelerator(probMod);
	295	}
	296	cout<<"Amino acid replacement matrix is mtREV24"<<endl;
	297	break;
	298	case (bb_options::wag):
	299	probMod=new pupAll(datMatrixHolder::wag);
	300	if (_options->useChebyshev == true) {
	301	pijAcc = new chebyshevAccelerator(probMod);
	302	} else {
	303	pijAcc = new trivialAccelerator(probMod);
	304	}
	305	cout<<"Amino acid replacement matrix is WAG"<<endl;
	306	break;
	307	case (bb_options::cprev):
	308	probMod=new pupAll(datMatrixHolder::cpREV45);
	309	if (_options->useChebyshev == true) {
	310	pijAcc = new chebyshevAccelerator(probMod);
	311	} else {
	312	pijAcc = new trivialAccelerator(probMod);
	313	}
	314	cout<<"Amino acid replacement matrix is cpREV45"<<endl;
	315	break;
	316	case (bb_options::empiriCodon):
	317	probMod=new pupAll(datMatrixHolder::empiriCodon,61);
	318	if (_options->useChebyshev == true) {
	319	pijAcc = new chebyshevAccelerator(probMod,61);
	320	} else {
	321	pijAcc = new trivialAccelerator(probMod);
	322	}
	323	cout<<"Codon replacement matrix is empiriCodon of adrian"<<endl;
	324	break;
	325	case (bb_options::nucjc):
	326	probMod=new nucJC;
	327	pijAcc = new trivialAccelerator(probMod);
	328	cout<<"Nucleotide substitution model is Jukes and Cantor"<<endl;
	329	break;
	330	case (bb_options::hky):
	331	probMod=new hky(evaluateCharacterFreq(_sc),initTrTv);
	332	pijAcc = new trivialAccelerator(probMod);
	333	break;
	334	case (bb_options::tamura92):
	335	probMod=new tamura92(initTamura92Theta,initTrTv);
	336	pijAcc = new trivialAccelerator(probMod);
	337	break;
	338	case (bb_options::nucgtr):
	339	probMod=new gtrModel(evaluateCharacterFreq(_sc));
	340	pijAcc = new trivialAccelerator(probMod);
	341	break;
	342	case (bb_options::aajc):
	343	probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
	344	cout<<"Amino acid replacement matrix is Jukes and Cantor"<<endl;
	345	break;
	346	//this part for the codon model c & w init as with no selection
	347	case (bb_options::nyCodon):
	348	{
	349	codon codonAlph;
	350	Vdouble freq = computeFreq(codonAlph);
	351	probMod = new wYangModel(1.0,1.0,freq, 0, &codonAlph);
	352	pijAcc = new trivialAccelerator(probMod);
	353	cout<<"Codon replacement matrix is NY model"<<endl;
	354	}
	355	break;
	356	default:
	357	errorMsg::reportError("this probablistic model is not yet available");
	358	}
	359	stochasticProcess sp(dist, pijAcc);
	360	_spVec.push_back(sp);
	361	if (probMod) delete probMod;
	362	if (pijAcc) delete pijAcc;
	363	if (dist) delete dist;
	364	}
	365
	366	void mainbb::printOutputTree() {
	367	ofstream f;
	368	string fileName1=_options->outTreeFileNewick;
	369	f.open(fileName1.c_str());
	370	_et.output(f,tree::PHYLIP,true);
	371	//_et.output(f,tree::PHYLIP,false);
	372	f.close();
	373	cout<<"The tree in 'Newick tree format' (with the internal nodes labeled)\nwas written to a file name called "<<fileName1<<endl;
	374	fileName1 = _options->outTreeFileAncestor;
	375	f.open(fileName1.c_str());
	376	_et.output(f,tree::ANCESTOR);
	377	f.close();
	378	cout<<"The tree in 'ANCESTOR tree format' was written to a file name called "<<fileName1<<endl;
	379	}
	380
	381	void mainbb::fillOptionsParameters(int argc, char* argv[]) {
	382	_options = new bb_options(argc, argv);
	383	}
	384
	385	void mainbb::getStartingSequenceData(){
	386	if (_options->alphabet_size==4) _alph = new nucleotide;
	387	else if (_options->alphabet_size == 20) _alph = new amino;
	388	else if (_options->alphabet_size == 61) _alph = new codon;
	389	else errorMsg::reportError("no such alphabet in function rate4site::getStartingSequenceData");
	390
	391	ifstream fstream1(_options->seqfile.c_str());
	392	_sc = recognizeFormat::read(fstream1,_alph);
	393	_originSc = _sc;
	394	_sc.changeGaps2MissingData();
	395	}
	396
	397	void mainbb::printSearchParameters() {
	398	if (_options->verbose) {
	399	LOG(1,<<"\nBB parameters: "<<endl);
	400	LOG(1,<<endl);
	401	LOG(1,<<"-------------------------------------------------------------------------------"<<endl);
	402	LOG(1,<<endl);
	403	if (_options->treefile.size()>0) {LOG(1,<<"Tree file is: "<<_options->treefile<<endl)}
	404	else LOG(1,<<"Starting tree is the NJ tree "<<endl);
	405	if (_options->seqfile.size()>0) LOG(1,<<"Sequence file is: "<<_options->seqfile<<endl);
	406	}
	407	}
	408
	409	void mainbb::printBBProjectInfo() {
	410	LOG(1,<<"*******************************************************************************"<<endl);
	411	LOG(1,<<"B&B: A Branch and Bound algorithm for Ancestral Sequence Reconstruction. "<<endl);
	412	LOG(1,<<"For information, please send email to Tal Pupko: talp@post.tau.ac.il "<<endl);
	413	LOG(1,<<"Ref: Pupko, T., Pe'er, I., Graur, D. Hasegawa, M., and Friedman N. 2002. "<<endl);
	414	LOG(1,<<"A branch-and-bound algorithm for the inference of ancestral amino-acid "<<endl);
	415	LOG(1,<<"sequences when the replacement rate varies among sites: Application to the "<<endl);
	416	LOG(1,<<"evolution of five gene families. Bioinformatics 18: 1116-1123. "<<endl);
	417	LOG(1,<<"*******************************************************************************"<<endl);
	418	LOG(1,<<endl);
	419	}
	420
	421	void mainbb::findAncestralSequencesGammaJoint() {
	422	bbAlg::boundMethod bm;
	423	if (_options->boundMethod == bb_options::max) bm=bbAlg::max;
	424	else if (_options->boundMethod == bb_options::sum) bm=bbAlg::sum;
	425	else if (_options->boundMethod == bb_options::both) bm=bbAlg::both;
	426
	427	bbAlg bbAlg1(_et,_spVec,_sc,bm,_options->reportFile,_options->computeAgainExactTreshold,_forceDistr);
	428	cout<<"after bbAlg in findAncestralSequencesGammaJoint()"<<endl;
	429	//bbAlg bbAlg1(_et,*_sp,_sc,bm,_options->reportFile,_options->computeAgainExactTreshold);
	430	MDOUBLE res = bbAlg1.bbReconstructAllPositions(_resulutingJointReconstruction);
	431	cout<<" the likelihood of this reconstruction is: "<<res<<endl;
	432	bbAlg1.outputTheJointProbAtEachSite(_options->outFile_prob_joint);
	433	printAncestralSequencesGammaJoint();
	434	}
	435
	436	void mainbb::findAncestralSequencesHomJoint() {
	437	//jointNoGamma jng(_et,*_sp,_sc);
	438	jointNoGamma jng(_et,_spVec[0],_sc);
	439	jng.compute();
	440	jng.outputTheJointProbAtEachSite(_options->outFile_prob_joint);
	441	sequenceContainer withAncestral = jng.getTheJointReconstruction();
	442	replaceSequences(withAncestral,_originSc);
	443	ofstream jointNoGammaReconstructionOutputFile(_options->outFile_seq_joint.c_str());
	444	// jointNoGammaReconstructionOutputFile<<"sequences of the joint reconstruction, model (hom): "<<_options->modelNameStr()<<endl;
	445	switch (_options->seqOutputFormat) {
	446	case bb_options::mase:
	447	maseFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	448	break;
	449	case bb_options::molphy:
	450	molphyFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	451	break;
	452	case bb_options::clustal:
	453	clustalFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	454	break;
	455	case bb_options::fasta:
	456	fastaFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	457	break;
	458	case bb_options::phylip:
	459	phylipFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	460	break;
	461	case bb_options::nexus:
	462	nexusFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
	463	break;
	464	default: errorMsg::reportError(" format not implemented yet in this version... ",1);
	465	}
	466	}
	467
	468
	469	void mainbb::getMarginalReconstruction(){
	470	//computeMarginalReconstruction cmr(_et,*_sp,_sc);
	471	computeMarginalReconstruction cmr(_et,_spVec,_sc);
	472	cmr.compute(_forceDistr);
	473	//cmr.compute();
	474	cmr.outputTheMarginalProbForEachCharForEachNode(_options->outFile_prob_marginal);
	475	sequenceContainer withAncestral = cmr.getResultingMarginalReconstruction();
	476	replaceSequences(withAncestral,_originSc);
	477	ofstream marginalReconstructionOutputFile(_options->outFile_seq_marginal.c_str());
	478	// marginalReconstructionOutputFile<<"sequences of the marginal reconstruction, model: "<<_options->modelNameStr()<<endl;
	479	switch (_options->seqOutputFormat) {
	480	case bb_options::mase:
	481	maseFormat::write(marginalReconstructionOutputFile,withAncestral);
	482	break;
	483	case bb_options::molphy:
	484	molphyFormat::write(marginalReconstructionOutputFile,withAncestral);
	485	break;
	486	case bb_options::clustal:
	487	clustalFormat::write(marginalReconstructionOutputFile,withAncestral);
	488	break;
	489	case bb_options::fasta:
	490	fastaFormat::write(marginalReconstructionOutputFile,withAncestral);
	491	break;
	492	case bb_options::phylip:
	493	phylipFormat::write(marginalReconstructionOutputFile,withAncestral);
	494	break;
	495	case bb_options::nexus:
	496	nexusFormat::write(marginalReconstructionOutputFile,withAncestral);
	497	break;
	498	default: errorMsg::reportError(" format not implemented yet in this version... ",1);
	499	}
	500	marginalReconstructionOutputFile.close();
	501	}
	502
	503
	504	//This part for NY codon model
	505	//for optomize the w yang model under gamma model and BBL
	506	void mainbb::getStartingBLAndModelParam()
	507	{
	508	// GAMMA MODEL FOR W Yang Model
	509	// Here we want to optimize branch lengths with a gamma model.
	510	// there are three options:
	511	//(1) User provides the alpha and no bbl.
	512	//(2) User provides the alpha and bbl
	513	//(3) Alpha is optimized from the data and bbl.
	514	cout<<"Optimization of NY model with gamma - M5 in PAML"<<endl<<endl;
	515	createStochasticProcessVec();
	516	if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) {
	517	cout<<"Optimizing branch lengths & parametrs model: beta + k (Gamma model, user alpha)..."<<endl;
	518	optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,true,true,false,false,false,true,false,3,3,0.01,0.01,0.1,20,20);
	519	}
	520
	521	else if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) {
	522	cout<<"Optimizing parametrs model: k + beta (Gamma model, user alpha, user branch lengths)..."<<endl;
	523	optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,0,1,0,0,0,1,0);
	524
	525	}
	526	else if (_options->userProvideAlpha == false) {
	527	cout<<"Optimizing branch lengths and model parametrs alpha + beta +k (Gamma model) ... "<<endl;
	528	optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,1,1,0,0,0,0,0);
	529	}
	530	}
	531
	532
	533	void mainbb::createStochasticProcessVec()
	534	{
	535	wYangModel * baseModel = static_cast<wYangModel*>(_spVec[0].getPijAccelerator()->getReplacementModel());
	536	wYangModel tmp(*baseModel);
	537	_forceDistr = new generalGammaDistribution(_options->gammaPar,_options->gammaPar,_options->gammaCategies);
	538	_spVec.resize(_forceDistr->categories());
	539	uniDistribution dist;
	540	for (int categor=0; categor<_forceDistr->categories();categor++){
	541	wYangModel tmpModel(tmp);
	542	tmpModel.setW(_forceDistr->rates(categor));
	543	trivialAccelerator pijAcc(&tmpModel);
	544	stochasticProcess tmpSp(&dist,&pijAcc);
	545	_spVec[categor] = tmpSp;
	546	}
	547	normalizeMatrices(_spVec,_forceDistr);
	548
	549	}
	550
	551	Vdouble mainbb::computeFreq(codon &codonAlph){
	552	Vdouble pi;
	553	nucleotide alph;
	554	sequenceContainer nucSc;
	555	ifstream in(_options->seqfile.c_str());
	556	nucSc = recognizeFormat::readUnAligned(in, &alph);
	557	nucSc.changeGaps2MissingData();
	558	in.close();
	559	pi = freqCodonF3x4(nucSc,&codonAlph);
	560	makeSureNoZeroFreqs(pi);
	561	return pi;
	562	}
	563
	564	void mainbb::replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc)
	565	{
	566	for (int s = 0; s < originSc.numberOfSeqs();s++)
	567	{
	568	string name = originSc[s].name();
	569	for ( int i = 0;i<sc2change.numberOfSeqs(); i++)
	570	{
	571	if (sc2change[i].name() == name)
	572	{
	573	sc2change[i] = originSc[s];
	574	break;
	575	}
	576	}
	577
	578	}
	579	}

+71

-0

programs/fastml/mainbb.h less more

	0	#ifndef ___BB__MAIN__FILE
	1	#define ___BB__MAIN__FILE
	2
	3	#include "bb_options.h"
	4	#include "sequenceContainer.h"
	5	#include "stochasticProcess.h"
	6	#include "tree.h"
	7	#include "codon.h"
	8
	9	#include "suffStatComponent.h"
	10
	11	#include <vector>
	12	using namespace std;
	13
	14
	15	class mainbb {
	16	public:
	17	explicit mainbb(int argc, char* argv[]);
	18	virtual ~mainbb();
	19
	20	private:
	21	const bb_options* _options;
	22	sequenceContainer _sc;
	23	sequenceContainer _originSc; //hold the sc before change the gaps
	24	tree _et;
	25	vector<stochasticProcess> _spVec; //hold stochastic process
	26	//if codon yang model with gamma then
	27	//holds number of categores of replacment model
	28	distribution *_forceDistr; //holds the w distribution of yang codon model.
	29
	30	alphabet* _alph;
	31	sequenceContainer _resulutingJointReconstruction;
	32
	33	void getStartingStochasticProcess();
	34	void createStochasticProcessVec();
	35	Vdouble computeFreq(codon &codonAlph);
	36
	37	// get starting tree
	38	void getStartingEvolTreeTopology();
	39	void getStartingNJtreeNjMLdis();
	40	void getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector<string>& vNames);
	41	void getStartingTreeFromTreeFile();
	42	void getStartingBranchLengthsAndAlpha();
	43	void printOutputTree();
	44
	45	//get starting tree and codon model
	46	void getStartingBLAndModelParam();
	47
	48	// JOINT WITH GAMMA
	49	void printAncestralSequencesGammaJoint();
	50	void findAncestralSequencesGammaJoint();
	51
	52	// JOINT WITHOUT GAMMA
	53	void findAncestralSequencesHomJoint();
	54
	55	// MARGINAL RECONSTRUCTION:
	56	void getMarginalReconstruction();
	57
	58
	59	void fillOptionsParameters(int argc, char* argv[]);
	60	void getStartingSequenceData();
	61	void printSearchParameters();
	62	void printBBProjectInfo();
	63	void replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc);
	64
	65
	66	};
	67
	68
	69	#endif
	70

+49

-0

programs/fastml/sequenceDataDiff.cpp less more

	0	#include "sequenceDataDiff.h"
	1	#include <iostream>
	2	using namespace std;
	3
	4	void sequenceDataDiff::computeDifferences(){
	5	for (int i=0;i<_sc1.numberOfSeqs();++i) {
	6	string name1 = _sc1[i].name();
	7	int idOf1in2 = _sc2.getId(name1,false);//return -1 if not found...
	8	if (idOf1in2==-1) {
	9	string x = "sequence does not exist ";
	10	x+=name1;
	11	unitDiff ud(x);
	12	_differences.push_back(ud);
	13	continue;
	14	}
	15	const sequence& sequence1 = _sc1[i];
	16	const sequence& sequence2 = _sc2[i];
	17	if (sequence1.seqLen() != sequence1.seqLen()) {
	18	string x = "sequences don't have the same length ";
	19	x+=name1;
	20	unitDiff ud(x);
	21	_differences.push_back(ud);
	22	continue;
	23	}
	24
	25	for (int j=0; j < sequence1.seqLen(); ++j) {
	26	if (sequence1[j] != sequence2[j]) {
	27	unitDiff ud(name1,j,sequence1.toString(j),sequence2.toString(j));
	28	_differences.push_back(ud);
	29	}
	30	}
	31	}
	32	}
	33
	34
	35	void sequenceDataDiff::printDiff(ostream& out) {
	36	for (int i=0; i < _differences.size(); ++i) {
	37	out<<_differences[i]._seqName;
	38	out<<" ";
	39	out<<_differences[i]._pos;
	40	out<<" ";
	41	out<<_differences[i]._letInSd1;
	42	out<<" ";
	43	out<<_differences[i]._letInSd2;
	44	out<<endl;
	45	}
	46	}
	47
	48

+45

-0

programs/fastml/sequenceDataDiff.h less more

	0	#ifndef ___SEQ__DATA__DIF
	1	#define ___SEQ__DATA__DIF
	2
	3	#include "sequenceContainer.h"
	4
	5	#include <fstream>
	6	#include <iostream>
	7	#include <string>
	8	using namespace std;
	9
	10	// this class represents a single difference between a pair of sequences.
	11	// I.e., it is used here, to show a difference between two approaches for ancestral sequence
	12	// reconstruction, for example, Joint vs. Marginal, or With and Without Gamma.
	13
	14	class unitDiff{
	15	friend class sequenceDataDiff;
	16	public:
	17	explicit unitDiff(const string& seqName,const int pos, const string letInSd1,const string letInSd2) {
	18	_seqName = seqName; _pos = pos; _letInSd1 = letInSd1; _letInSd2 = letInSd2;
	19	}
	20	explicit unitDiff(const string& seqName) { // in case one seq is only in one
	21	_seqName = seqName; _pos = -1; _letInSd1 = '?'; _letInSd2 = '?';
	22	}
	23	private:
	24	string _seqName;
	25	int _pos;
	26	string _letInSd1;
	27	string _letInSd2;
	28	};
	29
	30	// This class prints differences between two reconstructions (or in general, between any two sequence conatiners)
	31
	32	class sequenceDataDiff {
	33	public:
	34	sequenceDataDiff(const sequenceContainer& sc1, const sequenceContainer& sc2) :_sc1(sc1) ,_sc2(sc2) {}
	35	void computeDifferences();
	36	void printDiff(ostream& out);
	37	private:
	38	vector<unitDiff> _differences;
	39	const sequenceContainer& _sc1;
	40	const sequenceContainer& _sc2;
	41	};
	42
	43	#endif
	44

+1

-0

programs/fastml/suffStatComponentJointNoGamma.cpp less more

0

#include "suffStatComponentJointNoGamma.h"

+50

-0

programs/fastml/suffStatComponentJointNoGamma.h less more

	0	#ifndef SUFF_STAT_COMPONENT_JOINT_NO_GAMMA_H___
	1	#define SUFF_STAT_COMPONENT_JOINT_NO_GAMMA_H___
	2
	3	#include "definitions.h"
	4	#include <vector>
	5	#include <cassert>
	6	using namespace std;
	7
	8	class suffStatSpecHomPosJointNoGamma{ // this is for a specific node.
	9	public:
	10	void set(const int letterInFather,const int val) {
	11	_V[letterInFather]=val;
	12	}
	13
	14	int get(const int letterInFather) const {
	15	return _V[letterInFather];
	16	}
	17
	18	void allocatePlace(const int alphabetSize) {
	19	_V.resize(alphabetSize);
	20	}
	21	bool isEmpty (){return (_V.empty());};
	22	size_t size() {return _V.size();}
	23	private:
	24	Vint _V;//size = alphabet size
	25	};
	26
	27	class suffStatGlobalHomPosJointNoGamma{ // this is for all nodes
	28	public:
	29	void set(const int nodeId,const int letterInFather,const int val) {
	30	_V[nodeId].set(letterInFather,val);
	31	}
	32
	33	int get(const int nodeId,const int letterInFather) const {
	34	return _V[nodeId].get(letterInFather);
	35	}
	36
	37	void allocatePlace(const int numOnNodes,const int alphabetSize) {
	38	_V.resize(numOnNodes);
	39	for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);}
	40	}
	41	bool isEmpty (){return (_V.empty());}
	42	size_t size() {return _V.size();}
	43
	44	private:
	45	vector<suffStatSpecHomPosJointNoGamma> _V;//size = letter
	46	};
	47
	48
	49	#endif

+24

-0

programs/gainLoss/BBLEM.algorithmicFlow.txt less more

	0	Class::bblEM (with variation: bblEMfixRoot, bblEM2codon)
	1	compute_bblEM
	2	allocatePlace (one more level for fixRoot - in computeDownAlg and countsTableVec)
	3	bblEM_it (called at each iteration of BBL)
	4	foreach pos{
	5	computeDown (use variants for fix root - fillComputeDownNonReversible
	6	vector<suffStatGlobalGamPos> _cdown; //_cdown[categ][letter@root][nodeid][letter][prob])
	7	addCounts
	8	addCountsFixedRoot (based on computeUp and computeDown... fill _computeCountsV)
	9	use class::computeCounts (but no duplicated class!!!)
	10	}
	11	optimizeBranches
	12	foreach node{
	13	class::fromCountTableComponentToDistance (with variation: ...fixRoot, ...2Codon)
	14	computeDistance() + set - based on
	15	class::likeDist (with variation: ...fixRoot, ...2Codon)
	16	giveDistance()
	17	giveDistanceBrent()
	18	C_evallikeDist and C_evallikeDist_d
	19	.... computation based on counts{alph1,alph2, root, rate(sp)}: sumL+= _ctc.getCounts(alph1,alph2,rateCategor)(log( _sp.Pij_t(alph1,alph2,distrate) )-log(_sp.freq(alph2)))
	20
	21
	22	}
	23

+47

-0

programs/gainLoss/LOG chages.txt less more

	0	17.06.08
	1	4.85 - postExp - allow only branch multiply by 10-7 to avoid "non-prob" values.
	2
	3	1 "computeEB_EXP_siteSpecificGL_zero" - not going to use it...
	4	2 P11forgain into gainLossUtils
	5	3 printLofPosBothModels()
	6	4 simulations()
	7	5 printGainLossProbabilityPerPosPerBranch(...outCount)
	8	6 modified printGainLossBayes
	9	7 rate2multiply = max(rateVal,minimumRate) @gainLoss::computePosteriorExpectationOfChangePerSite
	10	8 if (res > 1 + 1e-10) - res = 1;
	11
	12	18.06.08
	13	5.0 - Ancestral Reconstruct
	14	1. added class and functions - ancestralReconstructStates.cpp
	15	2. fix the printTreeStatesAsBPValues function
	16	3. clean code
	17
	18	rate4SiteGL
	19	o 6.0 - encapsulate into class... (operate for each simulated seq. )
	20	o 6.1 - add GAMMA_PLUS_INV
	21	o 6.2
	22	+ Delete un-needed lines //NO NEED to update since the _spVVec is sent byRef to be optimized and updated in optimizeGainLossModelVV
	23	+ ErrorMGS at gainLossOptions for imcompatible options
	24	+ "previousL" instead of "changed=false"
	25	+ C_eval is computing adhoc "_plogLforMissingData" so it is not sent
	26	o 6.21 - manyStarts as default, defaults:_userAlphaRate =0.7; _userGain =0.5; _userLoss =2.0;
	27
	28	gain - freq(1)_Q * r_Q
	29	o 7.0 - with "_gainLossRateAreFreq" - 0<gain<1 and loss = 1-gain.
	30	This formulation should be more accorate
	31	o 7.1 - sumlations with different models
	32	+ basic "non-model" - GENERAL_GAMMA_FIXED_CATEGORIES (lib changed)
	33	+ switch changed to dynamic cast at get/set ALpha/Beta - @gainLossUtil
	34
	35
	36	classes: gainLoss4site, computeCountsGL, coEvol
	37	o 8.0 - encapsulate into class, enable coEvol
	38	+ new modified functions at gainLoss.cpp - startGainLoss4Site,startComputePosteriorExpectationOfChange,
	39	startSimulateSequences,simulateSequences,
	40	+ gainLossAlphabet.cpp at phyLib
	41	+ startZeroSequenceContainerGL from sequenceContainer methods
	42	+ all bblEM functionality with _logLforMissingData is tested
	43	o 8.1 - modify thd gainLossDist (spVVec) - the purpose: the value of gain4site will ce calculated as the multiplication of (general)rate and gain.
	44
	45
	46	⏎

+45

-0

programs/gainLoss/LpostPerCat.PerSp.txt less more

	0	_LpostPerCat
	1	------------
	2	1. produce it
	3	gainLoss::startRate4Site
	4	rate4siteGL::computeRate4site
	5	computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_LpostPerCat,_unObservableData_p);
	6	sperate:
	7	LofPos_givenRateCat = likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp);
	8	pGivenR[cat] = LofPos_givenRateCat * sp.ratesProb(cat);
	9	Assign:
	10	if (LpostPerCat){
	11	(*LpostPerCat)[j][pos]= convert(pGivenR[j]);
	12
	13	2. get it
	14	_LpostPerCat = r4s.getLpostPerCat();
	15
	16	3. use it
	17	startComputePosteriorExpectationOfChange(_sc,_tr,_sp,gainLossOptions::_outDir,_LpostPerCat);
	18	if(LpostPerCat.size()==0 )
	19	{
	20	resizeMatrix(LpostPerCat,sp->categories(),sc.seqLen()) ;
	21	if(sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	22	rate4siteGL r4s(sc,tr,sp,outDir, _unObservableData_p);
	23	r4s.run();
	24	LpostPerCat = r4s.getLpostPerCat();
	25	}
	26	else{
	27	oneMatrix(LpostPerCat);
	28	}
	29	}
	30	computeCountsGL countsGL(sc,tr,sp,outDir,LpostPerCat);
	31
	32	_expV01[pos]+=exp01*_LpostPerCat[rateIndex][pos];
	33	_expV10[pos]+=exp10*_LpostPerCat[rateIndex][pos];
	34
	35	_probV01[pos]+=prob01*_LpostPerCat[rateIndex][pos];
	36	_probV10[pos]+=prob10*_LpostPerCat[rateIndex][pos];
	37
	38	_probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos];
	39	_expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos];
	40
	41
	42
	43
	44	⏎

+18

-0

programs/gainLoss/Makefile less more

	0	#! /usr/local/bin/gmake
	1	# $Id: Makefile cohenofi $
	2
	3	# In order to compile with doubleRep run make like this: make doubleRep
	4
	5	Libsources= gainLossOptions.cpp gainLoss.cpp gainLossUtils.cpp optimizeGainLossModel.cpp optimizeGainLossModelVV.cpp likelihoodComputationGL.cpp gainLossModel.cpp siteSpecificGL.cpp computePosteriorExpectationOfChange.cpp gainLossProject.cpp gainLossOptimizer.cpp ancestralReconstructStates.cpp rate4siteGL.cpp computeCountsGL.cpp computeCorrelations.cpp gainLoss4site.cpp simulateChangesAlongTree.cpp simulateOnePos.cpp bblLS.cpp sankoffReconstructGL.cpp
	6
	7	#Libsources=
	8	LIBNAME = gainLoss
	9
	10	# LibCsources= cmdline.c
	11	# LibCsources += getopt.c getopt1.c
	12
	13	EXEC = gainLoss
	14
	15
	16
	17	include ../Makefile.generic

+18

-0

programs/gainLoss/RootFreq.and.Reversibility.MixModels.txt less more

	0	Previously, The model was defined by terms from DNA and Protein Alphabet.
	1	There were either "reversible" or "Non-reversible" models, where in the first:
	2	_Q[1][0] = _Q[0][1] * _freq[0] / _freq[1]; // stated that we find lossRate from gainRate and stationary freq.
	3
	4	The freq value is also used as the Root freq at each likelihood computation,
	5	so using the "reversible" model alse means that the stationary freq = Root freq, where if lossRate was estimated independently
	6	in the "Non-reversible" - than the _freq is only addressed as Root freq.
	7
	8	The models free parameters:
	9	M1 - gainRate, freq (both stat and root)
	10	M2 - gainRate, LossRate, freq (only root)
	11
	12	In the royal society the model was re-written equivantly as:
	13	M1 - r_Q, freq (both stat and root) [where gainRate = r_Q*freq[1]]
	14	M2 - r_Q, freq_Stat, and freq_Root [where freq_Stat was seperated since it was not used in _Q[1][0] = _Q[0][1] * _freq[0] / _freq[1];]
	15
	16
	17	⏎

+220

-0

programs/gainLoss/ancestralReconstructStates.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "ancestralReconstructStates.h"
	17	#include <cmath>
	18
	19	using namespace std;
	20
	21	/********************************************************************************************
	22	ancestralReconstructStates
	23	*********************************************************************************************/
	24	ancestralReconstructStates::ancestralReconstructStates(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp):
	25	_tr(tr), _sc(sc){
	26	if(!sp){
	27	errorMsg::reportError("error in the constructor ancestralReconstructStates sp argument is NULL");
	28	}
	29	else{
	30	_sp = sp;
	31	}
	32	_statesV.resize(_sc.seqLen());
	33	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	34	initializeStatesVector(pos);
	35	}
	36	}
	37	void ancestralReconstructStates::initializeStatesVector(int pos){
	38	_statesV[pos].resize(_tr.getNodesNum(),-1000);
	39	checkThatNamesInTreeAreSameAsNamesInSequenceContainer(_tr,_sc);
	40	seqContainerTreeMap scTreeMap(_sc,_tr);
	41	vector <tree::nodeP> leaves;
	42	_tr.getAllLeaves(leaves,_tr.getRoot());
	43	for (unsigned int i=0; i< leaves.size();i++){
	44	int myleafId = (leaves[i])->id();
	45	int mySeqId = scTreeMap.seqIdOfNodeI(myleafId);
	46	_statesV[pos][myleafId] = _sc[mySeqId][pos];
	47	}
	48	}
	49	/********************************************************************************************
	50	upL[node][letter] = max(letter_here){P(letter->letter_here)upL[son1][letter_here]upL[son2][letter_here]} for letter at father node.
	51	backtrack[node][letter] = argmax of above
	52	*********************************************************************************************/
	53	void ancestralReconstructStates::traverseUpML(VVVdouble &upL, VVVint &backtrack){ // input as empty vector to be filled
	54	LOGnOUT(4,<<"traverseUpML..."<<endl);
	55	upL.resize(_sc.seqLen());
	56	backtrack.resize(_sc.seqLen());
	57	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	58	traverseUpML(upL[pos], backtrack[pos], pos);
	59	}
	60	}
	61	/********************************************************************************************
	62	*********************************************************************************************/
	63	void ancestralReconstructStates::traverseUpML(VVdouble &upL, VVint &backtrack, int pos){ // input as empty vector to be filled
	64	computePijGam pi;
	65	pi.fillPij(_tr,*_sp);
	66	upL.resize(_tr.getNodesNum());
	67	for (unsigned int i = 0; i < upL.size(); i++)
	68	upL[i].resize(_sp->alphabetSize());
	69	backtrack.resize(_tr.getNodesNum());
	70	for (unsigned int i = 0; i < backtrack.size(); i++)
	71	backtrack[i].resize(_sp->alphabetSize());
	72	treeIterDownTopConst tIt(_tr);
	73	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	74	int father_state = 0;
	75	if (mynode->isLeaf()) {
	76	for (father_state=0; father_state<_sp->alphabetSize();father_state++){ // looping over states at father
	77	int myState = _statesV[pos][mynode->id()];
	78	if(myState == _sc.getAlphabet()->unknown()){
	79	myState = father_state; // same as relations=1, for missing data
	80	}
	81	for (int i=0; i < _sp->categories();++i) {
	82	upL[mynode->id()][father_state] += pi.getPij(i,mynode->id(),father_state,myState)*_sp->ratesProb(i);
	83	}
	84	backtrack[mynode->id()][father_state]=myState;
	85	}
	86	}
	87	else if (!(mynode->isRoot())) {
	88	for (father_state=0; father_state<_sp->alphabetSize();father_state++){ // looping over states at father
	89	MDOUBLE myMax = -1;
	90	int myArgMax=-1;
	91	for (int my_state=0;my_state<_sp->alphabetSize();my_state++){ // loop to find max over current node
	92	//MDOUBLE val=_sp->Pij_t(father_state,my_state,mynode->dis2father());
	93	MDOUBLE val=0;
	94	for (int i=0; i < _sp->categories();++i) {
	95	val += pi.getPij(i,mynode->id(),father_state,my_state)*_sp->ratesProb(i);
	96	}
	97	for (int son=0;son<mynode->getNumberOfSons();son++)
	98	val*=upL[mynode->getSon(son)->id()][my_state];
	99	if (val>myMax){
	100	myMax=val;
	101	myArgMax=my_state;
	102	}
	103	}
	104	if ((myMax<0) \|\| (myArgMax<0))
	105	errorMsg::reportError("Error in traverseUpML: cannot find maximum");
	106	upL[mynode->id()][father_state]=myMax;
	107	backtrack[mynode->id()][father_state]=myArgMax;
	108	}
	109	}
	110	else {// root
	111	for (int root_state=0; root_state<_sp->alphabetSize();root_state++){
	112	MDOUBLE val=_sp->freq(root_state);
	113	for (int son=0;son<mynode->getNumberOfSons();son++)
	114	val*=upL[mynode->getSon(son)->id()][root_state];
	115	upL[mynode->id()][root_state]=val;
	116	}
	117	}
	118	}
	119	}
	120
	121	/********************************************************************************************
	122	return likelihood of max joint reconstruction
	123	*********************************************************************************************/
	124	Vdouble ancestralReconstructStates::traverseDownML(VVVdouble &upL, VVVint &backtrack,VVVint &transitionTypeCount) { // input as already filled vector
	125	LOGnOUT(4,<<"traverseDownML..."<<endl);
	126	Vdouble LofJointV;
	127	LofJointV.resize(_sc.seqLen());
	128	transitionTypeCount.resize(_sc.seqLen());
	129
	130	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	131	LofJointV[pos] = traverseDownML(upL[pos], backtrack[pos],transitionTypeCount[pos], pos);
	132	}
	133	return LofJointV;
	134	}
	135
	136	/********************************************************************************************
	137	fill _statesV, transitionTypeCount
	138	*********************************************************************************************/
	139	MDOUBLE ancestralReconstructStates::traverseDownML(VVdouble &upL, VVint &backtrack,VVint &transitionTypeCount, int pos) { // input as already filled vector
	140	if (backtrack.size() == 0)
	141	errorMsg::reportError("error in ancestralReconstruct::traverseDownML, input vector backtrack must be filled (call traverseUpML() first)");
	142	MDOUBLE LofJoint;
	143	int stateOfRoot;
	144	findMaxInVector(upL[(_tr.getRoot())->id()], LofJoint, stateOfRoot);
	145	_statesV[pos][(_tr.getRoot())->id()] = stateOfRoot;
	146	transitionTypeCount.resize(_sp->alphabetSize());
	147	for (unsigned int i = 0; i < transitionTypeCount.size(); i++)
	148	transitionTypeCount[i].resize(_sp->alphabetSize(),0);
	149	treeIterTopDownConst tIt(_tr);
	150	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	151	if (mynode->isRoot()) continue;
	152	int myId = mynode->id();
	153	int stateAtFather = _statesV[pos][mynode->father()->id()];
	154	int myState = _statesV[pos][mynode->id()];
	155	if(myState == _sc.getAlphabet()->unknown()){
	156	myState = stateAtFather; // same as relations=1, for missing data
	157	}
	158	if (mynode->isLeaf()) {
	159	transitionTypeCount[stateAtFather][myState]++;
	160	if ((_statesV[pos][mynode->id()]!=stateAtFather))
	161	LOG(7,<<"switch from "<<mynode->father()->name()<<"("<<stateAtFather<<") to "<<mynode->name()<<"("<<_statesV[pos][mynode->id()]<<")"<<endl);
	162	continue;
	163	}
	164	if(_statesV[pos][mynode->id()] == -2)
	165	cout<<_statesV[pos][mynode->id()]<<" unKnown at pos="<<pos<<" node="<<mynode->id()<<endl;
	166	_statesV[pos][mynode->id()]=backtrack[myId][stateAtFather];
	167	transitionTypeCount[stateAtFather][_statesV[pos][mynode->id()]]++;
	168	}
	169	return log(LofJoint);
	170	}
	171
	172
	173
	174	/********************************************************************************************
	175	compute Prob(letter at Node N is x\|Data): the posterior probabilities at ancestral states
	176	Use the pre-calculated joint posterior probability P(N=x, father(N)=y\|D) and just sum over these probs:
	177	Prob(N=x\|Data) = sum{fatherState}[P(N=x, father(N)=y\|D)]}
	178	stores results in member VVVdouble[pos][node][state] _ancestralProbs
	179	use VVVVdouble _probChanges_PosNodeXY == jointPost[pos][nodeID][fatherLetter][letter]- after computePosteriorOfChangeGivenTerminals
	180	*********************************************************************************************/
	181	void ancestralReconstructStates::computeAncestralPosterior(const VVVVdouble& jointPost)
	182	{
	183	LOGnOUT(4,<<"computeAncestralPosterior (take into acount joint probabilty)..."<<endl);
	184	int numNodes = _tr.getNodesNum();
	185	int alphabetSize = _sp->alphabetSize();
	186	//int alphabetSizeForProbsSize = alphabetSize;
	187	//bool isThereMissingData = _sc.getAlphabetDistribution(true)[2]>0;
	188	//if(isThereMissingData)
	189	// alphabetSizeForProbsSize++; // resize for one more
	190
	191	_ancestralProbs.resize(_sc.seqLen());
	192	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	193	resizeMatrix(_ancestralProbs[pos], numNodes, alphabetSize);
	194	treeIterTopDownConst tIt(_tr);
	195	int letter;
	196	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	197	if (mynode->isRoot()) {
	198	//for(letter = 0; letter<alphabetSize; ++letter) // itay's version - the computed vals are all 0
	199	// _ancestralProbs[pos][mynode->id()][letter] = jointPost[pos][mynode->id()][0][letter];
	200	for(letter = 0; letter < alphabetSize; ++letter) {
	201	MDOUBLE sum = 0.0;
	202	for(int sonLetter = 0; sonLetter < alphabetSize; ++sonLetter) {
	203	sum += jointPost[pos][mynode->getSon(0)->id()][letter][sonLetter]; // sum over the son joint prob (instead of father)
	204	}
	205	_ancestralProbs[pos][mynode->id()][letter] = sum;
	206	}
	207	continue;
	208	}
	209	for(letter = 0; letter < alphabetSize; ++letter) {
	210	MDOUBLE sum = 0.0;
	211	for(int fatherLetter = 0; fatherLetter < alphabetSize; ++fatherLetter) {
	212	sum += jointPost[pos][mynode->id()][fatherLetter][letter];
	213	}
	214	_ancestralProbs[pos][mynode->id()][letter] = sum;
	215	}
	216	}
	217	}
	218	}
	219

+62

-0

programs/gainLoss/ancestralReconstructStates.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___ANCESTRAL_RECONSTRUCT_STATES_
	19	#define ___ANCESTRAL_RECONSTRUCT_STATES_
	20
	21	#include "computeDownAlg.h"
	22	#include "definitions.h"
	23	#include "gainLossAlphabet.h"
	24	#include "matrixUtils.h"
	25	#include "seqContainerTreeMap.h"
	26	#include "sequence.h"
	27	#include "someUtil.h"
	28	#include "treeIt.h"
	29	#include "trivialAccelerator.h"
	30	#include "logFile.h"
	31
	32
	33	class ancestralReconstructStates {
	34
	35	public:
	36	explicit ancestralReconstructStates(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp);
	37	virtual ~ancestralReconstructStates(){};
	38
	39	void traverseUpML(VVVdouble &upL, VVVint &backtrack); // input as empty vector to be filled
	40	void traverseUpML(VVdouble &upL, VVint &backtrack, int pos);
	41
	42	Vdouble traverseDownML(VVVdouble &upL, VVVint &backtrack,VVVint &transitionTypeCount); // input as already filled vector
	43	MDOUBLE traverseDownML(VVdouble &upL, VVint &backtrack,VVint &transitionTypeCount, int pos);
	44	VVint getStates() {return _statesV;}
	45
	46	void computeAncestralPosterior(const VVVVdouble& jointPost); // posterior (marginal) reconstruction
	47	VVVdouble getAncestralProbs() {return _ancestralProbs;}
	48
	49
	50	private:
	51	void initializeStatesVector(int pos);
	52
	53
	54	const tree &_tr;
	55	const sequenceContainer &_sc;
	56	stochasticProcess *_sp;
	57	VVint _statesV;
	58	VVVdouble _ancestralProbs; // VVVdouble[pos][node][state] _ancestralProbs
	59	};
	60
	61	#endif

+218

-0

programs/gainLoss/bblLS.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "bblLS.h"
	17	#include "numRec.h"
	18	#include "likelihoodComputation.h"
	19	#include "likelihoodComputationGL.h"
	20	#include "gainLossOptions.h"
	21	#include <cmath>
	22
	23
	24	bblLS::bblLS()
	25	{}
	26
	27
	28	MDOUBLE bblLS::optimizeBranches(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p,
	29	const int outerIter,
	30	const MDOUBLE epsilonOptimizationBranch, const int numIterations,
	31	MDOUBLE curL)
	32	{
	33	_weights = weights;
	34	MDOUBLE prevIterL = VERYSMALL;
	35	if (curL == NULL)
	36	_treeLikelihood = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,*sp,_weights,unObservableData_p);
	37
	38	else
	39	_treeLikelihood = curL;
	40	LOGnOUT(4,<<"============================="<<endl;);
	41	LOGnOUT(4,<<"ll before bbl = "<<_treeLikelihood<<endl;);
	42	vector<tree::nodeP> nodesV;
	43	tr.getAllNodes(nodesV,tr.getRoot());
	44	int numberOfBranchs = nodesV.size();
	45	MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/1.5; // (was 2) for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	46	epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	47	MDOUBLE epsilonOptimizationIter = epsilonOptimizationBranch*epsilonOptimizationIterFactor; // for eBranch=0.2 next iteration only for 10 logL points
	48	LOGnOUT(4,<<"BBL starts with epsilon branch= "<<epsilonOptimizationBranch<<" and epsilon iter="<<epsilonOptimizationIter<<endl;);
	49	int iter;
	50	for (iter = 1; iter <= numIterations; ++iter)
	51	{
	52	if (_treeLikelihood < prevIterL + epsilonOptimizationIter){
	53	LOGnOUT(3,<<" BBL optimization converged. Iter= "<<iter<<" Likelihood="<<_treeLikelihood<<endl);
	54	return _treeLikelihood; //likelihood converged
	55	}
	56	prevIterL = _treeLikelihood;
	57	LOG(4,<<"---- BBL iteration: "<<iter<<endl;);
	58	MDOUBLE paramFound;
	59	MDOUBLE oldBl;
	60	MDOUBLE newL;
	61	for (int i=0; i<nodesV.size(); i++)
	62	{
	63	if (nodesV[i]->isRoot())
	64	continue;
	65	oldBl = nodesV[i]->dis2father();
	66	if(gainLossOptions::_isBblForceFactorCorrection){
	67	newL = -brent((oldBl+gainLossOptions::_minBranchLength)/gainLossOptions::_BblFactorCorrection,
	68	oldBl,
	69	(oldBl+gainLossOptions::_minBranchLength)*gainLossOptions::_BblFactorCorrection,
	70	evalBranch(nodesV[i],&tr, sc, sp,_weights,unObservableData_p), epsilonOptimizationBranch, &paramFound);
	71	}
	72	else{
	73	newL = -brent(gainLossOptions::_minBranchLength, oldBl, gainLossOptions::_maxBranchLength, evalBranch(nodesV[i],&tr, sc, sp,_weights,unObservableData_p), epsilonOptimizationBranch, &paramFound);
	74	}
	75	if (newL >= _treeLikelihood)
	76	{
	77	_treeLikelihood = newL;
	78	nodesV[i]->setDisToFather(paramFound);
	79	if(unObservableData_p) unObservableData_p->setLforMissingData(tr,sp);
	80	LOGnOUT(4,<<"BL old... "<<oldBl<<" BL done... "<<nodesV[i]->dis2father()<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	81	}
	82	else //likelihood went down!
	83	{
	84	nodesV[i]->setDisToFather(oldBl); //return to previous BL
	85	unObservableData_p->setLforMissingData(tr,sp);
	86	LOGnOUT(4,<<"*** WARNING: L went down : "<<endl;);
	87	LOGnOUT(4,<<" BL Found... "<<paramFound<<"...LL="<<newL<<"...";);
	88	LOGnOUT(4,<<" BL old... "<<oldBl<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	89	}
	90	}
	91	string treeINodes = gainLossOptions::_outDir + "//" + "TheTree.INodes.iter" +int2string(outerIter)+ ".Inner"+ int2string(iter) + ".ph";
	92	printTree (tr, treeINodes);
	93	LOGnOUT(3,<<"BBL iter "<<iter<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	94
	95	}
	96	if (iter>numIterations)
	97	LOGnOUT(4,<<" Too many="<<iter-1<<" iterations in BBL. Last optimized tree is used."<<endl);
	98	return _treeLikelihood;
	99	}
	100	//////////////////////////////////////////////////////////////////////////
	101	MDOUBLE bblLS::optimizeBranches(tree& tr, vector<vector<stochasticProcess*> >& spVVec,
	102	const distribution * gainDist, const distribution * lossDist,
	103	const sequenceContainer &sc,
	104	Vdouble* weights, unObservableData* unObservableData_p,
	105	const int outerIter,
	106	const MDOUBLE epsilonOptimizationBranch , const int numIterations ,
	107	MDOUBLE curL)
	108	{
	109	_weights = weights;
	110	MDOUBLE prevIterL = VERYSMALL;
	111	if (curL == NULL)
	112	_treeLikelihood = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,weights,unObservableData_p);
	113	else
	114	_treeLikelihood = curL;
	115	LOGnOUT(4,<<"============================="<<endl;);
	116	LOGnOUT(4,<<"ll before bbl = "<<_treeLikelihood<<endl;);
	117	vector<tree::nodeP> nodesV;
	118	tr.getAllNodes(nodesV,tr.getRoot());
	119	int numberOfBranchs = nodesV.size();
	120	MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/2.0; // for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	121	epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	122	MDOUBLE epsilonOptimizationIter = epsilonOptimizationBranch*epsilonOptimizationIterFactor; // for eBranch=0.2 next iteration only for 10 logL points
	123	LOGnOUT(4,<<"BBL starts with epsilon branch= "<<epsilonOptimizationBranch<<" and epsilon iter="<<epsilonOptimizationIter<<endl;);
	124	int iter;
	125	for (iter = 1; iter <= numIterations; ++iter)
	126	{
	127	if (_treeLikelihood < prevIterL + epsilonOptimizationIter){
	128	LOGnOUT(3,<<" BBL optimization converged. Iter= "<<iter<<" Likelihood="<<_treeLikelihood<<endl);
	129	return _treeLikelihood; //likelihood converged
	130	}
	131	prevIterL = _treeLikelihood;
	132	LOG(4,<<"---- BBL iteration: "<<iter<<endl;);
	133	MDOUBLE paramFound;
	134	MDOUBLE oldBl;
	135	MDOUBLE newL;
	136	for (int i=0; i<numberOfBranchs; i++)
	137	{
	138	if (nodesV[i]->isRoot())
	139	continue;
	140	oldBl = nodesV[i]->dis2father();
	141	if(gainLossOptions::_isBblForceFactorCorrection){
	142	newL = -brent((oldBl+gainLossOptions::_minBranchLength)/gainLossOptions::_BblFactorCorrection,
	143	oldBl,
	144	(oldBl+gainLossOptions::_minBranchLength)*gainLossOptions::_BblFactorCorrection, evalBranchSPvv(nodesV[i],&tr, sc, spVVec,gainDist,lossDist,weights,unObservableData_p), epsilonOptimizationBranch, &paramFound);
	145	}
	146	else{
	147	newL = -brent(gainLossOptions::_minBranchLength, oldBl, gainLossOptions::_maxBranchLength, evalBranchSPvv(nodesV[i],&tr, sc, spVVec,gainDist,lossDist,weights,unObservableData_p), epsilonOptimizationBranch, &paramFound);
	148	}
	149	if (newL >= _treeLikelihood)
	150	{
	151	_treeLikelihood = newL;
	152	nodesV[i]->setDisToFather(paramFound);
	153	if(unObservableData_p) unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	154	LOGnOUT(4,<<"BL old... "<<oldBl<<" BL done... "<<nodesV[i]->dis2father()<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	155	}
	156	else //likelihood went down!
	157	{
	158	nodesV[i]->setDisToFather(oldBl); //return to previous BL
	159	if(unObservableData_p) unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	160	LOGnOUT(4,<<"*** WARNING: L went down: "<<endl;);
	161	LOGnOUT(4,<<" BL Found... "<<paramFound<<"...LL="<<newL<<"...";);
	162	LOGnOUT(4,<<" BL old... "<<oldBl<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	163	}
	164	}
	165	string treeINodes = gainLossOptions::_outDir + "//" + "TheTree.INodes.iter" +int2string(outerIter)+ ".Inner"+ int2string(iter) + ".ph";
	166	printTree (tr, treeINodes);
	167	LOGnOUT(3,<<"BBL iter "<<iter<<"...LL="<<_treeLikelihood<<"..."<<endl;);
	168	}
	169	if (iter>numIterations)
	170	LOGnOUT(4,<<" Too many="<<iter-1<<" iterations in BBL. Last optimized tree is used."<<endl);
	171	return _treeLikelihood;
	172	}
	173
	174
	175	//////////////////////////////////////////////////////////////////////////
	176	MDOUBLE evalBranch::operator()(MDOUBLE x)
	177	{
	178	_pNode->setDisToFather(x);
	179	if(_unObservableData_p)_unObservableData_p->setLforMissingData(*_tr,_sp);
	180	MDOUBLE LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_sp,_weights,_unObservableData_p);
	181	return -LL;
	182	}
	183
	184	//////////////////////////////////////////////////////////////////////////
	185	MDOUBLE evalBranchSPvv::operator()(MDOUBLE x)
	186	{
	187	_pNode->setDisToFather(x);
	188	if(_unObservableData_p) _unObservableData_p->setLforMissingData(*_tr,_spVVec,_gainDist,_lossDist);
	189	MDOUBLE LL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(*_tr,_sc,_spVVec,_gainDist,_lossDist,_weights,_unObservableData_p);
	190	return -LL;
	191	}
	192	//////////////////////////////////////////////////////////////////////////
	193	MDOUBLE evalBranchProportionExponent::operator()(MDOUBLE x)
	194	{
	195
	196	MDOUBLE factorBL = pow(10,x);
	197	_tr->multipleAllBranchesByFactor(factorBL);
	198	if(_unObservableData_p)_unObservableData_p->setLforMissingData(*_tr,_sp);
	199	MDOUBLE LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_sp,_weights,_unObservableData_p);
	200	_tr->multipleAllBranchesByFactor(1/factorBL);
	201	LOG(5,<<"Branch factor val = "<<factorBL<<" logL = "<<LL<<endl);
	202	return -LL;
	203	}
	204
	205	//////////////////////////////////////////////////////////////////////////
	206	MDOUBLE evalBranchProportionExponentSPvv::operator()(MDOUBLE x)
	207	{
	208	MDOUBLE factorBL = pow(10,x);
	209	_tr->multipleAllBranchesByFactor(factorBL);
	210	if(_unObservableData_p) _unObservableData_p->setLforMissingData(*_tr,_spVVec,_gainDist,_lossDist);
	211	MDOUBLE LL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(*_tr,_sc,_spVVec,_gainDist,_lossDist,_weights,_unObservableData_p);
	212	LOG(5,<<"Branch factor val = "<<factorBL<<" logL = "<<LL<<endl);
	213	_tr->multipleAllBranchesByFactor(1/factorBL);
	214	return -LL;
	215	}
	216
	217

+180

-0

programs/gainLoss/bblLS.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___BBL_LS__
	19	#define ___BBL_LS__
	20
	21	#include "definitions.h"
	22	#include "tree.h"
	23	#include "sequenceContainer.h"
	24	#include "stochasticProcess.h"
	25	#include "unObservableData.h"
	26	#include "gainLossUtils.h"
	27
	28	using namespace std;
	29
	30	//#define MAX_BRANCH_LENGTH 50.0 //20.0
	31
	32	/*
	33	This class optimize the branches using "naive" line search methodology.
	34	go over each branch and optimize it using brent.
	35	In one iteration it optimze seperatly all branches.
	36	This procedure continues until convergence is reached or until the maximum number of iteration is reached.
	37	*/
	38	class bblLS {
	39	public:
	40
	41	explicit bblLS();
	42	~bblLS() {};
	43	MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
	44
	45
	46	MDOUBLE optimizeBranches(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p,
	47	const int outerIter,
	48	const MDOUBLE epsilonOptimization =0.1, const int numIterations =10,
	49	MDOUBLE curL =NULL);
	50
	51	MDOUBLE optimizeBranches(tree& tr, vector<vector<stochasticProcess*> >& spVVec,
	52	const distribution * gainDist, const distribution * lossDist,
	53	const sequenceContainer &sc,
	54	Vdouble* weights, unObservableData* unObservableData_p,
	55	const int outerIter,
	56	const MDOUBLE epsilonOptimization =0.1, const int numIterations =10,
	57	MDOUBLE curL =NULL);
	58
	59
	60	private:
	61	Vdouble* _weights;
	62	MDOUBLE _treeLikelihood;
	63	};
	64
	65	//////////////////////////////////////////////////////////////////////////
	66	class evalBranch{
	67	public:
	68	explicit evalBranch(tree::nodeP pNode, tree* tr, const sequenceContainer &sc, stochasticProcess* sp, Vdouble* weights, unObservableData* unObservableData_p )
	69	:_pNode(pNode),_tr(tr), _sc(sc), _sp(sp),_weights(weights)
	70	{
	71	if(unObservableData_p)
	72	_unObservableData_p = unObservableData_p->clone();
	73	else
	74	_unObservableData_p = NULL;
	75
	76	};
	77	virtual ~evalBranch(){
	78	if(_unObservableData_p) delete _unObservableData_p;
	79	}
	80
	81	MDOUBLE operator() (MDOUBLE x);
	82
	83	private:
	84	tree::nodeP _pNode;
	85	tree* _tr;
	86	const sequenceContainer& _sc;
	87	const stochasticProcess* _sp;
	88	Vdouble* _weights;
	89	unObservableData* _unObservableData_p;
	90	};
	91
	92	//////////////////////////////////////////////////////////////////////////
	93	class evalBranchSPvv{
	94	public:
	95	explicit evalBranchSPvv(tree::nodeP pNode, tree* tr, const sequenceContainer &sc, vector<vector<stochasticProcess*> >& spVVec,
	96	const distribution * gainDist, const distribution * lossDist,
	97	Vdouble* weights, unObservableData* unObservableData_p)
	98	:_pNode(pNode),_tr(tr),_sc(sc),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_unObservableData_p(unObservableData_p),_weights(weights)
	99	{
	100	if(unObservableData_p)
	101	_unObservableData_p = unObservableData_p->clone();
	102	else
	103	_unObservableData_p = NULL;
	104	};
	105	virtual ~evalBranchSPvv(){
	106	if(_unObservableData_p) delete _unObservableData_p;
	107	}
	108	MDOUBLE operator() (MDOUBLE x);
	109
	110	private:
	111	tree::nodeP _pNode;
	112	tree* _tr;
	113	const sequenceContainer& _sc;
	114	const vector<vector<stochasticProcess*> >& _spVVec;
	115	const distribution * _gainDist;
	116	const distribution * _lossDist;
	117	Vdouble* _weights;
	118	unObservableData* _unObservableData_p;
	119	};
	120
	121
	122	//////////////////////////////////////////////////////////////////////////
	123	class evalBranchProportionExponent{
	124	public:
	125	explicit evalBranchProportionExponent(tree* tr, const sequenceContainer &sc, stochasticProcess* sp, Vdouble* weights, unObservableData* unObservableData_p )
	126	:_tr(tr), _sc(sc), _sp(sp),_weights(weights)
	127	{
	128	if(unObservableData_p)
	129	_unObservableData_p = unObservableData_p->clone();
	130	else
	131	_unObservableData_p = NULL;
	132
	133	};
	134	virtual ~evalBranchProportionExponent(){
	135	if(_unObservableData_p) delete _unObservableData_p;
	136	}
	137
	138	MDOUBLE operator() (MDOUBLE x);
	139
	140	private:
	141	tree* _tr;
	142	const sequenceContainer& _sc;
	143	const stochasticProcess* _sp;
	144	Vdouble* _weights;
	145	unObservableData* _unObservableData_p;
	146	};
	147
	148	//////////////////////////////////////////////////////////////////////////
	149	class evalBranchProportionExponentSPvv{
	150	public:
	151	explicit evalBranchProportionExponentSPvv(tree* tr, const sequenceContainer &sc, vector<vector<stochasticProcess*> >& spVVec,
	152	const distribution * gainDist, const distribution * lossDist,
	153	Vdouble* weights, unObservableData* unObservableData_p)
	154	:_tr(tr),_sc(sc),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_unObservableData_p(unObservableData_p),_weights(weights)
	155	{
	156	if(unObservableData_p)
	157	_unObservableData_p = unObservableData_p->clone();
	158	else
	159	_unObservableData_p = NULL;
	160	};
	161	virtual ~evalBranchProportionExponentSPvv(){
	162	if(_unObservableData_p) delete _unObservableData_p;
	163	}
	164	MDOUBLE operator() (MDOUBLE x);
	165
	166	private:
	167	tree* _tr;
	168	const sequenceContainer& _sc;
	169	const vector<vector<stochasticProcess*> >& _spVVec;
	170	const distribution * _gainDist;
	171	const distribution * _lossDist;
	172	Vdouble* _weights;
	173	unObservableData* _unObservableData_p;
	174	};
	175
	176
	177
	178
	179	#endif

+25

-0

programs/gainLoss/classesInherit.costurs.clone.assignment.txt less more

	0	Example: distributions
	1
	2
	3	class gammaDistribution : public generalGammaDistribution {
	4	class generalGammaDistribution : public distribution { // This is a virtual class from which all types of distribution classes inherit from.
	5
	6
	7
	8	All the constructors are "explicit":
	9	e.g.,
	10	explicit gammaDistribution() {} // empty constructor
	11	explicit gammaDistribution(MDOUBLE alpha,int in_number_of_categories); // init constructor
	12	explicit gammaDistribution(const gammaDistribution& other); // copy constructor
	13	All the methods are "virtual:
	14	e.g.,
	15	virtual ~gammaDistribution() {}
	16	virtual distribution* clone() const { return new gammaDistribution(*this); }
	17
	18	At the .cpp, some methods are refering the function to the son class
	19	e.g.,
	20	void gammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) {
	21	generalGammaDistribution::setGammaParameters(in_number_of_categories,in_alpha,in_alpha);
	22	}
	23
	24

+1442

-0

programs/gainLoss/computeCorrelations.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "computeCorrelations.h"
	17	#include "gainLossUtils.h"
	18	#include "gainLossAlphabet.h"
	19
	20
	21	/********************************************************************************************
	22	computeCorrelations
	23	Input: _expChanges_PosNodeXY - required,
	24	if _expChanges_PosNodeXY_B not NULL, compute correlation against this data
	25
	26	1. Compute correlation before simulations, based only on real dataset
	27	(R is computed for each pair in real data)
	28	startComputeAmongSitesCorrelations()
	29	correl->runComputeCorrelations() // with Real data
	30
	31	Perform several iteration of simulations:
	32	startParametricBootstapCorrelation()
	33	Foreach iteration of simulations:
	34	1.1. simulated data with same model
	35
	36	2. Compute correlation of simulated data
	37	computeCoEvolutionScoresBasedOnSimulatedDataCoMap()
	38	2.1 fill LpostPerCat using rate4site or GL4site
	39	2.2 fill expChanges_PosNodeXY_Sim stochastic mapping using computeCountsGL
	40	2.3 new computeCorrel object with both real and simulated data used:
	41	2.3.1 runComputeCorrelations
	42	2.3.2 sort - produceSortedVectorsOfAllCorrelations
	43	2.3.3 bins - produceSortedVectorsOfCorrelationsBinedByRate
	44	2.3.4 pVal - computedCorrelationsPValBasedOnSimulatedDataCoMapBins
	45	2.3.5 FDR pVals2qVals
	46	2.3.6 printComputedCorrelationsData (smart print of map values)
	47
	48	*********************************************************************************************/
	49	computeCorrelations::computeCorrelations(tree& tr, string& outDir, VVVVdouble* expChanges_PosNodeXY, VVVVdouble* expChanges_PosNodeXY_B):
	50	_tr(tr),_outDir(outDir)
	51	{
	52	_expChanges_PosNodeXY = *expChanges_PosNodeXY;
	53
	54	// Type of correlation - assume _EventTypes =(gain, loss, both) and if less options, the last ones are missing
	55
	56	if(gainLossOptions::_isCorrelateWithPearson)
	57	_isPearson.push_back(true);
	58	if(gainLossOptions::_isCorrelateWithSpearman)
	59	_isPearson.push_back(false);
	60	if(_isPearson.size()==0){
	61	_isPearson.push_back(true);
	62	LOGnOUT(4,<<"Pearson correlation is compted since no option is selected"<<endl);
	63	}
	64
	65	if(gainLossOptions::_isOnlyCorrelateWithBoth){
	66	_EventTypes.push_back("gain");
	67	_EventTypes.push_back("loss");
	68	_EventTypes.push_back("both");
	69	}
	70	else{
	71	_EventTypes.push_back("gain");
	72	if(gainLossOptions::_isAlsoCorrelateWithLoss)
	73	_EventTypes.push_back("loss");
	74	if(gainLossOptions::_isAlsoCorrelateWithBoth)
	75	_EventTypes.push_back("both");
	76	}
	77	for (int i = 0; i <_EventTypes.size(); ++i){
	78	_EventTypesMap[_EventTypes[i]]=i;
	79	map<string, int> FromTo;
	80	if(_EventTypes[i] == "gain"){
	81	FromTo["from"]=0;
	82	FromTo["to"]=1; }
	83	else if(_EventTypes[i] == "loss"){
	84	FromTo["from"]=1;
	85	FromTo["to"]=0;
	86	}else if(_EventTypes[i] == "both"){
	87	LOGnOUT(4,<<"Event _EventTypesFromTo is not applicable for "<<_EventTypes[i]<<" both 0->1 and 1->0 are computed"<<endl);
	88	break;
	89	}
	90	_EventTypesFromTo[_EventTypes[i]] = FromTo;
	91	LOGnOUT(4,<<"Event Type="<<_EventTypes[i]<<endl);
	92	}
	93
	94	if(expChanges_PosNodeXY_B){
	95	_expChanges_PosNodeXY_B = *expChanges_PosNodeXY_B;
	96	_isTwoSetsOfInputForCorrelation = true;
	97	}else{
	98	_isTwoSetsOfInputForCorrelation = false;
	99	}
	100	_numOfSamplesInLowRateFirstBin = (int)min(100.0, (double)(_expChanges_PosNodeXY.size()/10.0)); // thus, the best p-value for low Rate 0.01
	101	if(_numOfSamplesInLowRateFirstBin<1)
	102	_numOfSamplesInLowRateFirstBin = 1;
	103	LOGnOUT(4,<<"Lowest pVal for correlation with rate below simulations is "<<1.0/_numOfSamplesInLowRateFirstBin<<endl);
	104	}
	105
	106	/********************************************************************************************
	107	*********************************************************************************************/
	108	computeCorrelations::~computeCorrelations(){
	109	//clearVVVV(_jointProb_PosNodeXY);
	110	}
	111
	112	/********************************************************************************************
	113	*********************************************************************************************/
	114	computeCorrelations& computeCorrelations::operator=(const computeCorrelations &other){
	115	if (this != &other) { // Check for self-assignment
	116	}
	117	return *this;
	118	}
	119
	120	/********************************************************************************************
	121	Compute the Pearson / Spearman correlation among sites.
	122	*********************************************************************************************/
	123	void computeCorrelations::runComputeCorrelations(const Vint& selectedPositions, const Vint& numOfGapsTillSite, const bool isNormalizeForBranch)
	124	{
	125	LOGnOUT(4,<<endl<<"runComputeCorrelations..."<<endl);
	126	time_t t1,t2;
	127	time(&t1);
	128
	129	int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	130	int numOfSitesSelected = selectedPositions.size();
	131	int numOfpositionsIn_A = _expChanges_PosNodeXY.size();
	132	int numOfpositionsIn_B;
	133	if(_isTwoSetsOfInputForCorrelation)
	134	numOfpositionsIn_B = _expChanges_PosNodeXY_B.size();
	135	else
	136	numOfpositionsIn_B = _expChanges_PosNodeXY.size(); // if B is not given, it's copy
	137
	138	if(_isTwoSetsOfInputForCorrelation)
	139	LOGnOUT(3, <<"NOTE: Two seperate dataset input.\n Compute correl for selectedSites="<<numOfSitesSelected<<" subset of A="<<numOfpositionsIn_A<<" against B="<<numOfpositionsIn_B<<endl);
	140
	141	//// Mapping vectors
	142	LOGnOUT(4, <<"Fill events vectors..."<<endl);
	143	// Expectation, keep the duplicated code. Maybe update later
	144	_expPerPosPerBranchVec.resize(_EventTypes.size());
	145	_expPerPosPerBranchVec_B.resize(_EventTypes.size());
	146	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){
	147	if(evnt == "gain" \|\| evnt == "loss")
	148	fillMapValPerPosPerBranch(_expPerPosPerBranchVec[_EventTypesMap[evnt]],evnt,_expChanges_PosNodeXY,isNormalizeForBranch); // fill _expPerPosPerBranchVec
	149	if(*evnt == "both"){
	150	if(_EventTypes.size()<3)
	151	errorMsg::reportError("Error: correlation for _EventTypes=both with less than 3 options assume:(gain, loss, both)");
	152	_expPerPosPerBranchVec[_EventTypesMap[*evnt]] = _expPerPosPerBranchVec[_EventTypesMap["gain"]]; // gain and loss appended (double size vector)
	153	appendVectors(_expPerPosPerBranchVec[_EventTypesMap[*evnt]], _expPerPosPerBranchVec[_EventTypesMap["loss"]]);
	154	}
	155	if(_isTwoSetsOfInputForCorrelation){
	156	if(evnt == "gain" \|\| evnt == "loss")
	157	fillMapValPerPosPerBranch(_expPerPosPerBranchVec_B[_EventTypesMap[evnt]],evnt,_expChanges_PosNodeXY_B,isNormalizeForBranch); //
	158	if(*evnt == "both"){
	159	_expPerPosPerBranchVec_B[_EventTypesMap[*evnt]] = _expPerPosPerBranchVec_B[_EventTypesMap["gain"]]; // gain and loss appended (double size vector)
	160	appendVectors(_expPerPosPerBranchVec_B[_EventTypesMap[*evnt]], _expPerPosPerBranchVec_B[_EventTypesMap["loss"]]);
	161	}
	162	}else{
	163	_expPerPosPerBranchVec_B = _expPerPosPerBranchVec;
	164	}
	165	}
	166
	167	if(gainLossOptions::_isOnlyCorrelateWithBoth){ // if "both", gain and loss were used only for the fill-up.
	168	while(_EventTypes.begin() == "gain" \|\| _EventTypes.begin() == "loss")
	169	_EventTypes.erase (_EventTypes.begin());
	170	}
	171
	172	//// correlation vectors, filled below
	173	LOGnOUT(6, <<"Resize correlation vectors vectors"<<endl);
	174	int numberOfCorrelations = _isPearson.size()*_EventTypes.size();
	175	_correlationsPerSitePerPosVec.resize(numberOfCorrelations);
	176	for (int typeC = 0; typeC <numberOfCorrelations; ++typeC)
	177	resizeMatrix(_correlationsPerSitePerPosVec[typeC], numOfSitesSelected, numOfpositionsIn_B);
	178
	179	//for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	180	// for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	181	// LOGnOUT(4, <<vecIndex<<" - Compute correl isSpearman="<<it<<" with type="<<evnt<<endl);
	182	// vecIndex++;
	183	// }
	184	//}
	185
	186	int vecIndex=0;
	187	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	188	//int typeIndex=0;
	189	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	190	Vdouble correlationVecAve; // per correlation type, each item is the Mean for a selected position again all
	191	Vdouble correlationVecMedian; // per correlation type, each item is the Median for a selected position again all
	192	LOGnOUT(4, <<"Compute correlation isPearson="<<it<<" with type="<<evnt<<endl);
	193	for (int selectedSiteIndex = 0; selectedSiteIndex <numOfSitesSelected; ++selectedSiteIndex){
	194	if(selectedSiteIndex%100==0)
	195	cout<<"*";
	196	int selectedSite = selectedPositions[selectedSiteIndex];
	197	int selectedSiteRemovedGaps = selectedSite- numOfGapsTillSite[selectedSiteIndex];
	198	fillCorrPerSelectedSites(_correlationsPerSitePerPosVec[vecIndex][selectedSiteIndex],_expPerPosPerBranchVec[_EventTypesMap[evnt]],_expPerPosPerBranchVec_B[_EventTypesMap[evnt]],selectedSiteRemovedGaps,(*it)); // expPerPosPerBranchVec still have gain,loss,both
	199
	200	correlationVecAve.push_back(computeAverage((_correlationsPerSitePerPosVec[vecIndex][selectedSiteIndex])));
	201	correlationVecMedian.push_back(computeMedian((_correlationsPerSitePerPosVec[vecIndex][selectedSiteIndex])));
	202	}
	203	cout<<"\n"; // end of "*" for this correlation type
	204	if(gainLossOptions::_selectedSitesForCorrelation=="")
	205	LOGnOUT(4, <<"Correlation coefficient (mean of Val=Mean/Median per selected) Mean="<<computeAverage(correlationVecAve)<<" Median="<<computeAverage(correlationVecMedian)<<endl);
	206	//typeIndex++;
	207	vecIndex++;
	208	}
	209	}
	210	time(&t2);
	211	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	212	}
	213
	214	/********************************************************************************************
	215	*********************************************************************************************/
	216	MDOUBLE computeCorrelations::computeNminPerPair(const int site_A, const int site_B, const int typeIndex, const VVVdouble& exp_PosXY){
	217	MDOUBLE NminVal = 0;
	218	MDOUBLE siteA_Rate;
	219	MDOUBLE siteB_Rate;
	220
	221	if(typeIndex == 2 \|\| gainLossOptions::_isOnlyCorrelateWithBoth){ // both
	222	MDOUBLE siteA_Gain = exp_PosXY[site_A][0][1];
	223	MDOUBLE siteA_Loss = exp_PosXY[site_A][1][0];
	224	siteA_Rate = computeNminRforCorrelWithGainAndLoss(siteA_Gain,siteA_Loss);
	225
	226	MDOUBLE siteB_Gain = exp_PosXY[site_B][0][1];
	227	MDOUBLE siteB_Loss = exp_PosXY[site_B][1][0];
	228	siteB_Rate = computeNminRforCorrelWithGainAndLoss(siteB_Gain,siteB_Loss);
	229	}else{
	230	string type = _EventTypes[typeIndex];
	231	int from = _EventTypesFromTo[type]["from"];
	232	int to = _EventTypesFromTo[type]["to"];
	233	siteA_Rate = exp_PosXY[site_A][from][to];
	234	siteB_Rate = exp_PosXY[site_B][from][to];
	235	}
	236
	237	NminVal = min(siteA_Rate, siteB_Rate);
	238	return NminVal;
	239
	240
	241	}
	242
	243
	244
	245	/********************************************************************************************
	246	*********************************************************************************************/
	247	void computeCorrelations::produceSortedVectorsOfAllCorrelations(Vdouble& rate4siteSim){
	248	LOGnOUT(4,<<endl<<"produceSortedVectorsOfAllCorrelations for simulated data (sort rates and correlations (paired))..."<<endl);
	249	time_t t1,t2;
	250	time(&t1);
	251
	252	if(rate4siteSim.size()==0) // no Rate4site input is provided
	253	computeRateValPerPos(_expChanges_PosNodeXY,_exp_PosXY);
	254	int numberOfcorrelationVec = _correlationsPerSitePerPosVec.size();
	255	int numOfSites_A = _correlationsPerSitePerPosVec[0].size();
	256	int numOfSites_B = _correlationsPerSitePerPosVec[0][0].size();
	257	_pairWiseCorrelationsAndNminSim.resize(numberOfcorrelationVec);
	258	_NminSortedSim.resize(numberOfcorrelationVec);
	259
	260	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	261	LOGnOUT(4,<<" *** corIndex="<<corIndex<<endl);
	262	int typeIndex = corIndex % _EventTypes.size(); // in case both Spearman and pearson are used
	263	int indexAll = 0;
	264	Vdouble correlations;
	265	Vdouble Nmins;
	266	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	267	for (int site_B = site_A; site_B <numOfSites_B; ++site_B){
	268	if(site_A == site_B)
	269	continue;
	270	MDOUBLE correlVal = _correlationsPerSitePerPosVec[corIndex][site_A][site_B];
	271	correlations.push_back(correlVal);
	272	MDOUBLE NminVal=0;
	273	if(rate4siteSim.size()==0)
	274	NminVal = computeNminPerPair(site_A, site_B, typeIndex, _exp_PosXY);
	275	else
	276	NminVal = min(rate4siteSim[site_A],rate4siteSim[site_B]);
	277
	278	Nmins.push_back(NminVal);
	279	indexAll++;
	280	}
	281	}
	282	vector< vecElem<MDOUBLE> > orderVecNmin;
	283	orderVec(Nmins, orderVecNmin);
	284
	285	//resizeMatrix(_pairWiseCorrelationsAndNminSim[corIndex], 2 ,indexAll); // pairWiseCorrelationsAndNmin[corrIndex][pairIndex][0/1][val]
	286	resizeMatrix(_pairWiseCorrelationsAndNminSim[corIndex], 1 ,indexAll); // pairWiseCorrelationsAndNmin[corrIndex][pairIndex][0/1][val]
	287	for (int i = 0; i <indexAll; ++i){
	288	//_pairWiseCorrelationsAndNminSim[corIndex][0][i] = orderVecNmin[i].getValue();
	289	//_pairWiseCorrelationsAndNminSim[corIndex][1][i] = correlations[orderVecNmin[i].getPlace()];
	290	_pairWiseCorrelationsAndNminSim[corIndex][0][i] = correlations[orderVecNmin[i].getPlace()];
	291	}
	292	_NminSortedSim[corIndex] = Nmins; // vector copy no resize?
	293	sort( _NminSortedSim[corIndex].begin(),_NminSortedSim[corIndex].end() );
	294
	295	LOGnOUT(4,<<"\nSimulated Data correlations frequencies:"<<endl);
	296	LOGnOUT(4,<<"num of correlations="<<correlations.size()<<endl);
	297	sort(correlations.begin(),correlations.end());
	298	printCorrelationsFrequencies(correlations);
	299
	300	LOGnOUT(4,<<"Finish sorting "<<indexAll<<" pairs of correlating sites"<<endl);
	301	LOGnOUT(4,<<"Minimal rate (Nmin)="<<(_NminSortedSim[corIndex].begin() )<<" max="<<(_NminSortedSim[corIndex].end()-1)<<endl);
	302	}
	303	time(&t2);
	304	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (sort vectors)"<<endl);
	305	}
	306
	307
	308	/********************************************************************************************
	309	Each bin the index within sorted Nmin vector (Rate) is progressed (+++)
	310	if _isSortVectorOfCorrelationsBinsByLowerRateBound
	311	1. => the binLimit = LowLimit = f(index), (computed before index +++)
	312	UpLimit is maxLimit for all bins
	313	else
	314	2. => the binLimit = UpLimit = f(index), (computed after index +++)
	315	LowLimit is the UpLimit of previous bin
	316
	317	Note: Two versions exists for the _isSortVectorOfCorrelationsBinsByLowerRateBound (which is not the default)
	318	The assumption (which appears correct in Corr~1) is that the probability of high correlation by chance is smaller with higher rate.
	319
	320	Thus, in the modified one (16/05/12) higher rates had more simulations to compare with (compare with all sim. with lower rate)
	321	(compared with all those below.)
	322
	323	In previous version, higher rates had less simulations to compare with (compare with all sim. with higher rate), but for high Obs. rate,
	324	comparison with "lower rate bins" was allowed to avoid the paradox of smaller pVal of pairs with low rate (with "while" mechanism)
	325
	326	*********************************************************************************************/
	327	int computeCorrelations::produceSortedVectorsOfCorrelationsBinedByRate(MDOUBLE medianNminOfRealData, ofstream* simCorrelStream){
	328	LOGnOUT(4,<<endl<<"produceSortedVectorsOfCorrelationsBinedByRate for simulated data..."<<endl);
	329	time_t t1,t2;
	330	time(&t1);
	331
	332	int numberOfHighCorrInSimulationOfMedianNminBin = 0;
	333	int numOfBins = gainLossOptions::_numOfBinsInParametricBootstrapSimulations;
	334
	335	pair<vector<double>::iterator,vector<double>::iterator> bounds;
	336
	337	int numberOfcorrelationVec = _correlationsPerSitePerPosVec.size();
	338	_correlationSubSetsNminLimitValues.resize(numberOfcorrelationVec);
	339	_correlationsSubSets.resize(numberOfcorrelationVec);
	340	_extremeValDistributions.resize(numberOfcorrelationVec);
	341
	342	int numOfSimulatedTotalPairs = _NminSortedSim[0].size(); // same for all CorrTypes
	343	LOGnOUT(4,<<"Num of pairs in simulations="<<numOfSimulatedTotalPairs<<endl);
	344
	345	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	346	LOGnOUT(4,<<"For corIndex="<<corIndex<<endl);
	347	int typeIndex = corIndex % _EventTypes.size(); // in case both Spearman and pearson are used
	348	MDOUBLE Nmin_min = 0;
	349	if(Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")>0){
	350	Nmin_min = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	351	LOGnOUT(4,<<"Nmin_min by threshold="<<Nmin_min<<endl);
	352	}
	353	else
	354	Nmin_min = *_NminSortedSim[corIndex].begin();
	355
	356	bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), Nmin_min);
	357	int IndexOfValAboveNmin_min = int(bounds.first - _NminSortedSim[corIndex].begin());
	358	LOGnOUT(4,<<"Nmin_min in dataset="<<*(_NminSortedSim[corIndex].begin()+IndexOfValAboveNmin_min)<<endl);
	359
	360	int numOfSimulationPairs = _NminSortedSim[corIndex].size()-IndexOfValAboveNmin_min;
	361	if(numOfSimulationPairs==0)
	362	errorMsg::reportError("_minExpThresholdForPValComputationForCorrelatingPair is too high, no simulations above that value");
	363
	364	LOGnOUT(4,<<"Num of pairs above threshold (valid)="<<numOfSimulationPairs<<endl<<endl);
	365	int numOfSamplesInBin;
	366	bool randomOverlapPerIteration = true;
	367	MDOUBLE overlap = gainLossOptions::_relativeSizeOfOverLappedBins;
	368	if(randomOverlapPerIteration)
	369	overlap = gainLossOptions::_relativeSizeOfOverLappedBins + talRandom::giveRandomNumberBetweenTwoPoints(-0.1, 0.1);
	370
	371
	372	LOGnOUT(4,<<"Size of overlapped bins ="<<overlap<<endl<<endl);
	373
	374	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound)
	375	numOfSamplesInBin = (int)(numOfSimulationPairs * overlap);
	376	else
	377	numOfSamplesInBin = (int)(numOfSimulationPairs/numOfBins);
	378
	379
	380	MDOUBLE Nmin_max = *(_NminSortedSim[corIndex].end()-1);
	381	MDOUBLE w_range = (Nmin_max-Nmin_min)/numOfBins;
	382
	383	int numOfSamplesInCurrBin = 0;
	384	MDOUBLE Nmin_lower = Nmin_min;
	385	MDOUBLE Nmin_upper = Nmin_min;
	386	MDOUBLE Nmin_mid = Nmin_min; // use with Mid Boundary with overlap
	387
	388	MDOUBLE NminPerBin = 0;
	389
	390	int indexOfSamplesForBin = IndexOfValAboveNmin_min;
	391	int indexOfSamplesForBinPrev = IndexOfValAboveNmin_min;
	392	int indexOfSamplesForBinUpper = IndexOfValAboveNmin_min;
	393	int indexOfSamplesForBinMid = IndexOfValAboveNmin_min;
	394
	395	_correlationsSubSets[corIndex].resize(numOfBins+1); // the actual size may be smaller, if break
	396	//_correlationSubSetsNminLimitValues[corIndex].resize(numOfBins+1); // to Zero bin
	397
	398	vector<MDOUBLE>::iterator it = _pairWiseCorrelationsAndNminSim[corIndex][0].begin(); // correlation part of vector
	399
	400
	401	// elevate Nmin Threshold if: (A) freqOfHighCorr was too high (B) freqOfHighCorr is reduced consistently with higher Nmin (C) new Nmin is lower than medianNminOfRealData
	402	MDOUBLE minExpTBeforeChange = (double)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	403	MDOUBLE freqOfHighCorr = 0;
	404	MDOUBLE freqOfHighCorrPrev = 0;
	405	MDOUBLE expextedFreq;
	406	if(gainLossOptions::_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel){
	407	int isHigherNminReducedFreqOfHighCorr = false;
	408	int numOfBranches = _expChanges_PosNodeXY[0].size();
	409	int numOfBranches99 = (int)(numOfBranches*0.99);
	410	int combo99 = BinomialCoeff(numOfBranches,numOfBranches99);
	411	expextedFreq = 0.01 / (double)combo99;
	412	LOGnOUT(3,<<"Allowed fraction of high correlation. Computed with number of branch "<<numOfBranches<<" is "<<expextedFreq<<endl);
	413	}
	414
	415	for (int binIndex = 0; binIndex < numOfBins; ++binIndex){
	416	int indexOfIncrementation;
	417	int numOfSamplesToDivideAmondBins = numOfSimulationPairs-numOfSamplesInBin;
	418
	419	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound){
	420	indexOfIncrementation = (int)(numOfSamplesToDivideAmondBins*binIndex/(numOfBins-1) );
	421	indexOfSamplesForBinPrev = indexOfIncrementation +IndexOfValAboveNmin_min;
	422	}
	423	else
	424	indexOfSamplesForBinPrev = indexOfSamplesForBin;
	425
	426	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound)
	427	Nmin_lower = *(_NminSortedSim[corIndex].begin()+indexOfSamplesForBinPrev); // Low is computed before bin-related ++ of index
	428	else
	429	Nmin_lower = Nmin_upper;
	430
	431	// +++ index for bin
	432	if(gainLossOptions::_isDivideBinsByRange){
	433	NminPerBin = Nmin_min +(w_range*binIndex);
	434	bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), NminPerBin); // assume sorted,
	435	indexOfSamplesForBin = int(bounds.first - _NminSortedSim[corIndex].begin())-1; // Nmin_endIndex = int(bounds.second - _NminSortedSim[corIndex].begin());
	436	}
	437	else if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound)
	438	indexOfSamplesForBin = indexOfIncrementation +numOfSamplesInBin +IndexOfValAboveNmin_min -1; //
	439	else
	440	indexOfSamplesForBin = (int)(numOfSimulationPairs*(binIndex+1)/numOfBins) +IndexOfValAboveNmin_min -1; // is -1 for bin=0
	441
	442	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound)
	443	Nmin_lower = *(_NminSortedSim[corIndex].begin()+indexOfSamplesForBin);
	444
	445
	446	// compute numOfSamples per Bin
	447	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound){
	448	numOfSamplesInCurrBin = indexOfSamplesForBin; //_NminSortedSim[corIndex].size()-indexOfSamplesForBinPrev;
	449	Nmin_upper = Nmin_max; // UpperIsFixedAtMax
	450	indexOfSamplesForBinUpper = indexOfSamplesForBin; //_NminSortedSim[corIndex].size()-1;
	451	}
	452	else{
	453	numOfSamplesInCurrBin = indexOfSamplesForBin-indexOfSamplesForBinPrev;
	454	Nmin_upper = *(_NminSortedSim[corIndex].begin()+indexOfSamplesForBin); // Up is computed after bin-related ++ of index;
	455	indexOfSamplesForBinUpper = indexOfSamplesForBin;
	456	}
	457
	458	if(numOfSamplesInCurrBin<10) // no samples in this range, for median
	459	break;
	460	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound && numOfSamplesInCurrBin<numOfSimulationPairs*0.05 ) // at least 5% of simulation to start new bin, otherwise previous was last
	461	break;
	462
	463	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound){
	464	indexOfSamplesForBinMid = (int)(indexOfSamplesForBinUpper+indexOfSamplesForBinPrev)/2;
	465	Nmin_mid = *(_NminSortedSim[corIndex].begin()+indexOfSamplesForBinMid);
	466	}
	467
	468	// assign limits utility vector
	469	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound)
	470	_correlationSubSetsNminLimitValues[corIndex].push_back(Nmin_lower); // UpperIsFixedAtMax
	471	else if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound)
	472	_correlationSubSetsNminLimitValues[corIndex].push_back(Nmin_mid);
	473	else
	474	_correlationSubSetsNminLimitValues[corIndex].push_back(Nmin_upper);
	475
	476	_correlationsSubSets[corIndex][binIndex].resize(numOfSamplesInCurrBin);
	477	copy(it+indexOfSamplesForBinPrev, it+indexOfSamplesForBinUpper ,_correlationsSubSets[corIndex][binIndex].begin());
	478	sort(_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end());
	479
	480
	481	extremeValDistribution distr;
	482	MDOUBLE averageCorr = computeAverage(_correlationsSubSets[corIndex][binIndex]);
	483	MDOUBLE stdCorr = computeStd(_correlationsSubSets[corIndex][binIndex]);
	484	distr.fitParametersFromMoments(averageCorr, stdCorr);
	485	_extremeValDistributions[corIndex].push_back(distr);
	486
	487	pair<vector<double>::iterator,vector<double>::iterator> boundsOne;
	488	boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.99999);
	489	int indexOfpairEq1_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin());
	490	int numOfpairWithCorrEq1 = numOfSamplesInCurrBin - indexOfpairEq1_first;
	491
	492	boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.99);
	493	int indexOfpairEq99_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin());
	494	int numOfpairWithCorrEq99 = numOfSamplesInCurrBin - indexOfpairEq99_first;
	495
	496	boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.9);
	497	int indexOfpairEq9_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin());
	498	int numOfpairWithCorrEq9 = numOfSamplesInCurrBin - indexOfpairEq9_first;
	499
	500	// elevate Nmin Threshold if: (A) freqOfHighCorr was too high (B) freqOfHighCorr is reduced consistently with higher Nmin (C) new Nmin is lower than medianNminOfRealData
	501	if(gainLossOptions::_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel){
	502	freqOfHighCorrPrev = freqOfHighCorr;
	503	freqOfHighCorr = (double)numOfpairWithCorrEq99/numOfSamplesInCurrBin;
	504	if(freqOfHighCorr>expextedFreq && freqOfHighCorr<freqOfHighCorrPrev && Nmin_lower < medianNminOfRealData){
	505	LOGnOUT(3,<<"Fraction of high (0.99) correlation prev="<<freqOfHighCorrPrev<<" reduced to "<<freqOfHighCorr<<endl);
	506	LOGnOUT(3,<<" Update MinExpThreshold Given highCorrlation in previous Nmin to "<<Nmin_lower<<endl);
	507	Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(Nmin_lower).c_str());
	508	}
	509	if(freqOfHighCorr>freqOfHighCorrPrev){ // revert back
	510	LOGnOUT(3,<<"Fraction of high (0.99) correlation prev="<<freqOfHighCorrPrev<<" elevated to "<<freqOfHighCorr<<endl);
	511	LOGnOUT(3,<<" Revert to "<<minExpTBeforeChange<<endl);
	512	Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(minExpTBeforeChange).c_str());
	513	}
	514	}
	515
	516	if(Nmin_lower>=medianNminOfRealData)
	517	numberOfHighCorrInSimulationOfMedianNminBin = max((double)numberOfHighCorrInSimulationOfMedianNminBin,(double)numOfpairWithCorrEq1);
	518
	519	*simCorrelStream<<"Bin = "<< binIndex+1 <<"\n";
	520	printCorrelationsFrequencies(_correlationsSubSets[corIndex][binIndex], simCorrelStream);
	521
	522	LOGnOUT(4,<<binIndex+1<<" Bin.\t#samples=\t"<<numOfSamplesInCurrBin<<".\tFrom rate:\t"<<Nmin_lower<<"\t-\t"<<Nmin_upper
	523	<<".\tis with corr:\t"<<_correlationsSubSets[corIndex][binIndex].begin()<<"\t-\t"<<(_correlationsSubSets[corIndex][binIndex].end()-1)
	524	<<"\tAve=\t"<<computeAverage(_correlationsSubSets[corIndex][binIndex])<<"\tMedian=\t"<< computeMedian(_correlationsSubSets[corIndex][binIndex])
	525	<<"\tratioOfpairWithCorrEq1=\t"<<(double)numOfpairWithCorrEq1/numOfSamplesInCurrBin<<" ("<<numOfpairWithCorrEq1<<")"
	526	<<"\tratioOfpairWithCorrEq0.99=\t"<<(double)numOfpairWithCorrEq99/numOfSamplesInCurrBin<<" ("<<numOfpairWithCorrEq99<<")"
	527	<<"\tratioOfpairWithCorrEq0.9=\t"<<(double)numOfpairWithCorrEq9/numOfSamplesInCurrBin<<" ("<<numOfpairWithCorrEq9<<")"<<endl);
	528
	529	if(gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound)
	530	LOGnOUT(4,<<" Mid rate= "<<Nmin_mid<<endl);
	531
	532	// Util
	533	//bool isPrintCorrListForEachBin = false;
	534	//if(isPrintCorrListForEachBin){
	535	// string debugS = _outDir + "//"+int2string(corIndex)+int2string(binIndex)+ "Rofbins.txt"; // D
	536	// ofstream debugSStream(debugS.c_str()); // D
	537	// debugSStream<<" Bin "<<binIndex<<" from rate: "<<Nmin_lower<<" to "<<Nmin_max<<endl;
	538	// for(vector<double>::iterator it = _correlationsSubSets[corIndex][binIndex].begin(); it<_correlationsSubSets[corIndex][binIndex].end();++it){
	539	// debugSStream<<*it<<"\n";
	540	// }
	541	//}
	542	}
	543	}
	544	_pairWiseCorrelationsAndNminSim.clear(); // clear huge vector when not required
	545	time(&t2);
	546	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	547	return numberOfHighCorrInSimulationOfMedianNminBin;
	548	}
	549
	550	/********************************************************************************************
	551	*********************************************************************************************/
	552	void computeCorrelations::printCorrelationsFrequencies(Vdouble& correlationsVecSorted, ofstream* simCorrelStream){
	553
	554	float valsToCheck [] = {0.95,0.99,0.999,0.999999999}; // NOTE - if change size => change in loop!
	555
	556	int numOfCorrelations = correlationsVecSorted.size();
	557	pair<vector<double>::iterator,vector<double>::iterator> bounds;
	558	if(!simCorrelStream==NULL)
	559	*simCorrelStream<<"Corr eq/above\tratioOfCorAbove\tnumAboveEq\n";
	560	else
	561	LOGnOUT(4,<<"Corr eq/above\tratioOfCorAbove\tnumAboveEq"<< endl);
	562	for (MDOUBLE val=-0.9; val<=0.9; val+=0.1){
	563	bounds = equal_range (correlationsVecSorted.begin(), correlationsVecSorted.end(), val);
	564	int lastIndexWithPValBiggerThanThreshold = int(bounds.first - correlationsVecSorted.begin());
	565	int numAboveEq = numOfCorrelations-lastIndexWithPValBiggerThanThreshold;
	566	MDOUBLE ratioOfCorAbove = double(numAboveEq)/numOfCorrelations;
	567	MDOUBLE rounded = floorf(val * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	568	if(!simCorrelStream==NULL)
	569	*simCorrelStream<<rounded<<"\t"<<ratioOfCorAbove<<"\t("<<numAboveEq<<")\n";
	570	else
	571	LOGnOUT(4,<<rounded<<"\t"<<ratioOfCorAbove<<"\t("<<numAboveEq<<")"<< endl);
	572	}
	573	for (int i=0; i<4; ++i){
	574	bounds = equal_range (correlationsVecSorted.begin(), correlationsVecSorted.end(), valsToCheck[i]);
	575	int lastIndexWithPValBiggerThanThreshold = int(bounds.first - correlationsVecSorted.begin());
	576	int numAboveEq = numOfCorrelations-lastIndexWithPValBiggerThanThreshold;
	577	MDOUBLE ratioOfCorAbove = double(numAboveEq)/numOfCorrelations;
	578	if(!simCorrelStream==NULL)
	579	*simCorrelStream<<valsToCheck[i]<<"\t"<<ratioOfCorAbove<<"\t("<<numAboveEq<<")\n";
	580	else
	581	LOGnOUT(4,<<valsToCheck[i]<<"\t"<<ratioOfCorAbove<<"\t("<<numAboveEq<<")"<< endl);
	582	}
	583	if(!simCorrelStream==NULL)
	584	*simCorrelStream<<"\n";
	585	else
	586	LOGnOUT(4,<< endl);
	587	}
	588
	589
	590
	591
	592	/********************************************************************************************
	593	*********************************************************************************************/
	594	int computeCorrelations::computedCorrelationsPValBasedOnSimulatedDataCoMapBins(VVVdouble& correlationPerSitePerPosReal,vector<vector<bool> >& isComputePairWithRateAboveNim,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval
	595	,map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData, Vdouble& rate4siteReal, Vint& selectedSites, Vint& numOfGapsTillSite, Vint& evolvingSites, bool isLastIteration){
	596	LOGnOUT(4,<<endl<<"computedCorrelationsPValBasedOnSimulatedDataCoMapBins..."<<endl);
	597	time_t t1,t2;
	598	time(&t1);
	599
	600	int numOfpairsWithRateAboveMinRequiredExp = 0;
	601	string pairWiseCorrelationsAndNmin = _outDir + "//" + "pairWiseCorrelationsAndNmin.txt";
	602	ofstream corrSigStream(pairWiseCorrelationsAndNmin.c_str());
	603	corrSigStream<<"site_A"<<"\t"<<"site_B"<<"\t"<<"Nmin_obs"<<"\t"<<"Corr_obs"<<"\n";
	604
	605	int numberOfcorrelationVec = correlationPerSitePerPosReal.size();
	606	int numOfSites_A = correlationPerSitePerPosReal[0].size();
	607	int numOfSites_B = correlationPerSitePerPosReal[0][0].size();
	608	_corrVector.resize(numberOfcorrelationVec);
	609
	610	VVVdouble map_PosXY;
	611	if(rate4siteReal.size()==0)
	612	computeRateValPerPos(expChanges_PosXYReal,map_PosXY);
	613
	614	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	615	LOGnOUT(4,<<" *** corIndex="<<corIndex<<endl);
	616	int typeIndex = corIndex % _EventTypes.size(); // in case both Spearman and pearson are used
	617	int numOfpairsWithRateBelowSimulation = 0;
	618	int numOfpairsWithRateAboveSimulation = 0;
	619	int numOfpairsWithRateBelowMinRequiredExp = 0;
	620	int pairNum = 0;
	621	bool computePValForPairWithNminAboveMin = true;
	622	MDOUBLE minExpThresholdForPValComputationForCorrelatingPair;
	623	minExpThresholdForPValComputationForCorrelatingPair = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	624
	625	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	626	int site_A_original = selectedSites[site_A];
	627	int site_A_RemovedGaps = site_A_original- numOfGapsTillSite[site_A];
	628	if((site_A)%100==0)
	629	cout<<"*";
	630	for (int site_B = site_A; site_B <numOfSites_B; ++site_B){
	631	int site_B_original = evolvingSites[site_B];
	632	computePValForPairWithNminAboveMin = true; // reset as new for each pair
	633	if(site_A_original == site_B_original){
	634	correlationPerSitePerPos_Pval[corIndex][site_A][site_B] = 0;
	635	continue;
	636	}
	637	pairNum++;
	638	MDOUBLE Corr_obs = correlationPerSitePerPosReal[corIndex][site_A][site_B]; // Real correlation from Input variable
	639	_corrVector[corIndex].push_back(Corr_obs);
	640	MDOUBLE Nmin_obs = 0;
	641
	642	if(rate4siteReal.size()==0)
	643	Nmin_obs = computeNminPerPair(site_A_RemovedGaps, site_B, typeIndex, map_PosXY);
	644	else
	645	Nmin_obs = min(rate4siteReal[site_A_original] , rate4siteReal[site_B_original]);
	646
	647	if(gainLossOptions::_isPrintpairWiseCorrelationsAndNmin)
	648	corrSigStream<<site_A_original+1<<"\t"<<site_B_original+1<<"\t"<<Nmin_obs<<"\t"<<Corr_obs<<"\n";
	649
	650	// find the bin with highest simulated Rate suitable of obsNmin
	651	int binForNmin_obs = 0;
	652	while(Nmin_obs>=_correlationSubSetsNminLimitValues[corIndex][binForNmin_obs] && binForNmin_obs<_correlationSubSetsNminLimitValues[corIndex].size()-1)
	653	binForNmin_obs++;
	654
	655	if(Nmin_obs < minExpThresholdForPValComputationForCorrelatingPair){
	656	computePValForPairWithNminAboveMin = false;
	657	numOfpairsWithRateBelowMinRequiredExp++;
	658	}
	659	if(Nmin_obs<*(_NminSortedSim[corIndex].begin())){
	660	LOGnOUT(7,<<"WARN: low Nmin_obs="<<Nmin_obs<<" Since no simulation support this rate, pVal computed as "<<1.0/_numOfSamplesInLowRateFirstBin<<" for site_A="<<site_A_original<<" and site_B="<<site_B_original<<" with corr="<<Corr_obs<<endl);
	661	//computePValForPairWithNminAboveMin = false;
	662	if(corIndex == 0) // done for only one type of correlation
	663	numOfpairsWithRateBelowSimulation++;
	664	}
	665	if(Nmin_obs> *(_NminSortedSim[corIndex].end()-1)){
	666	LOGnOUT(7,<<"WARN: high Nmin_obs="<<Nmin_obs<<" pVal is computed with lower Nmin simulations as referece for site_A="<<site_A_original<<" and site_B="<<site_B_original<<" with corr="<<Corr_obs<<endl);
	667	if(corIndex == 0) // done for only one type of correlation
	668	numOfpairsWithRateAboveSimulation++;
	669	}
	670
	671	MDOUBLE pVal = 0.99;
	672	MDOUBLE pValEVD = 0.99;
	673	int NumberOfSimulationsInRange = 0;
	674	int NumberOfSimulationPointsMoreExtremeOrEqToCorr = 0;
	675	int prevNumberOfSimulationsInRange = 1;
	676	int prevNumberOfSimulationPointsMoreExtremeOrEqToCorr = 1;
	677
	678	int numOfBinsWithLowerSignificance = 0; // allow 2 "bin-iteration" even with lower significance to mitigate chance of "missing" higher significance in lower bin
	679	bool isNextLowerBinAllowed = true;
	680	isComputePairWithRateAboveNim[site_A][site_B] = computePValForPairWithNminAboveMin;
	681	if(computePValForPairWithNminAboveMin){
	682	//MDOUBLE pVal_prev = 1;
	683	//while(binForNmin_obs>=0 && isNextLowerBinAllowed ){
	684	//pVal_prev = pVal;
	685	prevNumberOfSimulationsInRange = NumberOfSimulationsInRange;
	686	prevNumberOfSimulationPointsMoreExtremeOrEqToCorr = NumberOfSimulationPointsMoreExtremeOrEqToCorr;
	687	NumberOfSimulationsInRange = _correlationsSubSets[corIndex][binForNmin_obs].size();
	688	NumberOfSimulationPointsMoreExtremeOrEqToCorr = 0;
	689	pair<vector<double>::iterator,vector<double>::iterator> bounds;
	690	vector<double>::iterator startCorV = _correlationsSubSets[corIndex][binForNmin_obs].begin();
	691	vector<double>::iterator endCorV = _correlationsSubSets[corIndex][binForNmin_obs].end();
	692	bounds = equal_range (startCorV, endCorV, Corr_obs);
	693	//cout << "bounds at positions " << int(bounds.first - startCorV) << " and " << int(bounds.second - startCorV) << endl;
	694	NumberOfSimulationPointsMoreExtremeOrEqToCorr = NumberOfSimulationsInRange-int(bounds.first - startCorV);
	695
	696	if(gainLossOptions::_isConsiderNegativeCorrelations){
	697	int NumberOfSimulationPointsMoreExtremeOrEqToCorrNegative = int(bounds.second - startCorV);
	698	NumberOfSimulationPointsMoreExtremeOrEqToCorr = min(NumberOfSimulationPointsMoreExtremeOrEqToCorr, NumberOfSimulationPointsMoreExtremeOrEqToCorrNegative);
	699	pVal = (double(NumberOfSimulationPointsMoreExtremeOrEqToCorr+1)/(NumberOfSimulationsInRange+1)) *2; // multiplied by 2, since it's two-sided
	700	}else
	701	pVal = double(NumberOfSimulationPointsMoreExtremeOrEqToCorr+1)/(NumberOfSimulationsInRange+1);
	702
	703	if(gainLossOptions::_isCompExtremeValDistribution)
	704	pValEVD = 1- _extremeValDistributions[corIndex][binForNmin_obs].getCDF(Corr_obs);
	705
	706	//if(pVal_prev<pVal){
	707	// pVal= pVal_prev;
	708	// NumberOfSimulationsInRange = prevNumberOfSimulationsInRange;
	709	// NumberOfSimulationPointsMoreExtremeOrEqToCorr = prevNumberOfSimulationPointsMoreExtremeOrEqToCorr;
	710	// ++numOfBinsWithLowerSignificance; // the upper bin had more significant pVal. Count few such "steps down" and quite
	711	//}
	712	//binForNmin_obs--;
	713	//if(!gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound \|\| (pVal<pVal_prev && numOfBinsWithLowerSignificance<3))
	714	// isNextLowerBinAllowed = false;
	715	//}
	716	}
	717	else
	718	pVal = 1; // value that is not possible with computation
	719
	720	// Only pairs with pVal < cuttoff are re-computed in iterations
	721	if(pVal<=gainLossOptions::_pValueCutOffForBootStrap \|\| gainLossOptions::_selectedSitesForCorrelation!="" ){ //TEMP for selected sites, fill map for all correlations
	722	//cout<<site_A<<" "<<site_B<<" "<<pVal<<" "<<Nmin_obs<<" "<<Corr_obs<<endl;
	723	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["R"] = Corr_obs;
	724	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["Rate"] = Nmin_obs;
	725
	726	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["SimTotal"] += NumberOfSimulationsInRange;
	727	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["SimExtreme"] += NumberOfSimulationPointsMoreExtremeOrEqToCorr;
	728	pVal = (correlationsData[site_A_original][site_B_original][int2string(corIndex)]["SimExtreme"]+1)/(correlationsData[site_A_original][site_B_original][int2string(corIndex)]["SimTotal"]+1);
	729	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["pVal"] = pVal;
	730
	731	// take the higher pVal from all iterations
	732
	733	bool isFirstEstimation = false;
	734	if(correlationsData[site_A_original][site_B_original][int2string(corIndex)]["SimTotal"] == NumberOfSimulationsInRange)
	735	isFirstEstimation = true;
	736	if(gainLossOptions::_isCompExtremeValDistribution){
	737	if(!isFirstEstimation)
	738	pValEVD = max(pValEVD, correlationsData[site_A_original][site_B_original][int2string(corIndex)]["pValEVD"]);
	739	correlationsData[site_A_original][site_B_original][int2string(corIndex)]["pValEVD"] = pValEVD;
	740	}
	741
	742	}
	743	if(gainLossOptions::_selectedSitesForCorrelation==""){
	744	correlationPerSitePerPos_Pval[corIndex][site_A][site_B] = pVal;
	745	correlationPerSitePerPos_Pval[corIndex][site_B][site_A] = pVal;
	746	}
	747	}
	748	}
	749	cout<<"\n";
	750	numOfpairsWithRateAboveMinRequiredExp = pairNum-numOfpairsWithRateBelowMinRequiredExp;
	751	LOGnOUT(4,<<"numOfpairs With Rate - Below Simulation="<<numOfpairsWithRateBelowSimulation<<" - Above Simulation="<<numOfpairsWithRateAboveSimulation<<endl);
	752
	753	if(isLastIteration){
	754	LOGnOUT(4,<<"numOfpairs="<<pairNum<<endl);
	755	if(Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair") > 0){
	756	LOGnOUT(4,<<"numOfpairs With Rate below minimal Threshold="<<Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")<<" are "<<numOfpairsWithRateBelowMinRequiredExp<<endl);
	757	}
	758	LOGnOUT(4,<<"\nReal Data correlations frequencies:"<<endl);
	759	LOGnOUT(4,<<"num of correlations="<<_corrVector[corIndex].size()<<endl);
	760	sort(_corrVector[corIndex].begin(),_corrVector[corIndex].end());
	761	printCorrelationsFrequencies(_corrVector[corIndex]);
	762	}
	763	}
	764	time(&t2);
	765	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	766	return numOfpairsWithRateAboveMinRequiredExp;
	767	}
	768
	769
	770
	771
	772
	773	/********************************************************************************************
	774	*********************************************************************************************/
	775	void computeCorrelations::computedCorrelationsRankBasedOnSimulatedData(const Vint& selectedPositions, VVVdouble& correlationPerSitePerPos, VVVdouble& correlationPerSitePerPos_Simulations, VVVdouble& correlationPerSitePerPos_Pval){
	776	LOGnOUT(4,<<endl<<"computedCorrelationsRankBasedOnSimulatedData..."<<endl);
	777	time_t t1,t2;
	778	time(&t1);
	779
	780	int numberOfcorrelationVec = correlationPerSitePerPos.size();
	781	int numOfSites_A = correlationPerSitePerPos[0].size();
	782	int numOfSites_B = correlationPerSitePerPos[0][0].size();
	783	int numberOfSimulation = correlationPerSitePerPos_Simulations[0][0].size();
	784
	785	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	786	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	787	int selectedSite = site_A; //??
	788	for (int site_B = 0; site_B <numOfSites_B; ++site_B){
	789	MDOUBLE rank = 0; //numberOfSimulation
	790	MDOUBLE correlVal = correlationPerSitePerPos[corIndex][selectedSite][site_B];
	791	for (int pos = 0; pos<numberOfSimulation; ++pos){
	792	MDOUBLE correlSim = correlationPerSitePerPos_Simulations[corIndex][selectedSite][pos];
	793	if((correlVal>correlSim && correlVal>=0 ) \|\| (correlVal<correlSim && correlVal<0 ))
	794	rank++; // --
	795	}
	796	//MDOUBLE pVal = double((rank+1)/numberOfSimulation);
	797	correlationPerSitePerPos_Pval[corIndex][selectedSite][site_B] = rank; // pVal
	798	}
	799	}
	800	}
	801	time(&t2);
	802	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	803	}
	804
	805	/********************************************************************************************
	806	fill correlationPerSitePerPos_Pval,
	807	Compute pVal for correlationPerSitePerPos, taking into account expChanges_PosXY
	808	*********************************************************************************************/
	809	void computeCorrelations::computedCorrelationsPValBasedOnSimulatedDataCoMap(VVVdouble& correlationPerSitePerPosReal,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval){
	810	LOGnOUT(4,<<endl<<"computedCorrelationsRankBasedOnSimulatedDataCoMap..."<<endl);
	811	time_t t1,t2;
	812	time(&t1);
	813
	814	MDOUBLE theWfactor = 5;
	815	int numberOfcorrelationVec = correlationPerSitePerPosReal.size();
	816	int numOfSites_A = correlationPerSitePerPosReal[0].size();
	817	int numOfSites_B = correlationPerSitePerPosReal[0][0].size();
	818
	819	VVVdouble map_PosXY;
	820	computeRateValPerPos(expChanges_PosXYReal,map_PosXY);
	821
	822	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	823	LOGnOUT(4,<<" *** corIndex="<<corIndex<<endl);
	824	MDOUBLE Nmin_min = *_NminSortedSim[corIndex].begin();
	825	MDOUBLE Nmin_max = *(_NminSortedSim[corIndex].end()-1);
	826	MDOUBLE w_range = (Nmin_max-Nmin_min)/theWfactor;
	827
	828	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	829	if((site_A)%100==0)
	830	cout<<"*";
	831	for (int site_B = site_A; site_B <numOfSites_B; ++site_B){
	832	if(site_A == site_B){
	833	correlationPerSitePerPos_Pval[corIndex][site_A][site_B] = 0;
	834	continue;
	835	}
	836	MDOUBLE Corr_obs = correlationPerSitePerPosReal[corIndex][site_A][site_B]; // Real correlation from Input variable
	837	int typeIndex = corIndex%_EventTypes.size();
	838	string type = _EventTypes[typeIndex];
	839	int from = _EventTypesFromTo[type]["from"];
	840	int to = _EventTypesFromTo[type]["to"];
	841	MDOUBLE Nmin_obs = min(map_PosXY[site_A][from][to],map_PosXY[site_B][from][to]); // Real Nmin from Input variable
	842
	843	MDOUBLE Nmin_lower = Nmin_obs-w_range/2;
	844	MDOUBLE Nmin_upper = Nmin_obs+w_range/2;
	845
	846	pair<vector<double>::iterator,vector<double>::iterator> bounds;
	847	bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), Nmin_lower);
	848	int Nmin_startIndex = int(bounds.first - _NminSortedSim[corIndex].begin());
	849	bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), Nmin_upper);
	850	int Nmin_endIndex = int(bounds.second - _NminSortedSim[corIndex].begin());
	851	//cout <<Nmin_obs<< " is rang is " << Nmin_startIndex<< " and " << Nmin_endIndex << endl;
	852
	853	int NumberOfSimulationsInRange = Nmin_endIndex-Nmin_startIndex+1;
	854
	855	//for (int i = Nmin_startIndex; i < Nmin_endIndex; ++i){
	856	// CorrelationsSubSet.push_back(_pairWiseCorrelationsAndNminSim[corIndex][1][i]); // simulations based data
	857	//}
	858	//sort(CorrelationsSubSet.begin(),CorrelationsSubSet.end());
	859	//bounds = equal_range (CorrelationsSubSet.begin(), CorrelationsSubSet.end(), Corr_obs);
	860	//int NumberOfSimulationPointsGreaterOrEqToCorr = NumberOfSimulationsInRange-int(bounds.first - CorrelationsSubSet.begin());
	861	//cout <<"corr= "<<Corr_obs<<" is ranked " << NumberOfSimulationPointsGreaterOrEqToCorr<< " out of " << NumberOfSimulationsInRange << endl;
	862
	863	int NumberOfSimulationPointsGreaterOrEqToCorr =0;
	864	for (int i = 0; i<NumberOfSimulationsInRange; ++i){
	865	MDOUBLE correlSim = _pairWiseCorrelationsAndNminSim[corIndex][0][i+Nmin_startIndex];
	866	if(Corr_obs<=correlSim && Corr_obs>=0 )
	867	NumberOfSimulationPointsGreaterOrEqToCorr++; // --
	868	if(gainLossOptions::_isConsiderNegativeCorrelations && Corr_obs<0 && Corr_obs>=correlSim )
	869	NumberOfSimulationPointsGreaterOrEqToCorr++;
	870	}
	871
	872	MDOUBLE pVal = double(NumberOfSimulationPointsGreaterOrEqToCorr+1)/(NumberOfSimulationsInRange+1);
	873	//cout << "pVal="<<pVal<<endl;
	874	correlationPerSitePerPos_Pval[corIndex][site_A][site_B] = pVal;
	875	correlationPerSitePerPos_Pval[corIndex][site_B][site_A] = pVal;
	876	}
	877	}
	878	cout<<"\n";
	879	}
	880	time(&t2);
	881	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	882	}
	883
	884
	885
	886	/********************************************************************************************
	887	*********************************************************************************************/
	888	void computeCorrelations::produceSymeticMatrix(VVVdouble& correlationPerSitePerPos, bool isMin){
	889	LOGnOUT(4,<<endl<<"produceSymeticMatrix..."<<endl);
	890	int numberOfcorrelationVec = correlationPerSitePerPos.size();
	891	int numOfSites_A = correlationPerSitePerPos[0].size();
	892	int numOfSites_B = correlationPerSitePerPos[0][0].size();
	893	if(!numOfSites_A == numOfSites_B){
	894	LOGnOUT(6, <<"WARN dim not equal in produceSymeticMatrix "<<numOfSites_A<<" vs "<<numOfSites_B<<endl);
	895	return;
	896	}
	897	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	898	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	899	int selectedSite = site_A; //??
	900	for (int site_B = 0; site_B <numOfSites_B; ++site_B){
	901	MDOUBLE minVal = correlationPerSitePerPos[corIndex][selectedSite][site_B];
	902	if(correlationPerSitePerPos[corIndex][site_B][selectedSite]<minVal)
	903	correlationPerSitePerPos[corIndex][selectedSite][site_B] = correlationPerSitePerPos[corIndex][site_B][selectedSite];
	904	}
	905	}
	906	}
	907	}
	908
	909	/********************************************************************************************
	910	PrintExpPerPosPerBranchMatrix (CoMap input)
	911	NOTE!!! this version only consist of gain or loss values
	912	Alternatively, (1) abs(gain+loss) (2) gain-loss (3) separate gain and loss matrices
	913	*********************************************************************************************/
	914	void computeCorrelations::printComputedCorrelationsData(const bool isNormalizeForBranch, const bool correlationForZscore
	915	,map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData, Vdouble& T_BH, bool isPairsAboveBH)
	916	{
	917	LOGnOUT(4,<<endl<<"print Correlation data All significant sites..."<<endl);
	918	int precisionCorr = 8;
	919	string pairsAboveBH = "";
	920	if(isPairsAboveBH)
	921	pairsAboveBH = ".pairsAboveBH";
	922
	923	string corrSigSites = _outDir + "//" + "significantCorrelations.isNormForBr."+int2string(isNormalizeForBranch)+pairsAboveBH + ".txt";
	924	ofstream corrSigStream(corrSigSites.c_str());
	925	corrSigStream.precision(precisionCorr);
	926
	927	// _correlationsData["i"]["j"]["type"]["R" / "pVal" / "qVal" / "Nmin"]
	928	typedef map<int,map<int, map<string, map<string, MDOUBLE> > > >::iterator it_A;
	929	typedef map<int, map<string, map<string, MDOUBLE> > >::iterator it_B;
	930	typedef map<string, map<string, MDOUBLE> >::iterator it_CorrT;
	931	typedef map<string, MDOUBLE>::iterator it_valT;
	932
	933	it_A it1 = correlationsData.begin(); // COG A
	934	it_B it2 = it1->second.begin(); // COG B
	935	it_CorrT it3 = it2->second.begin(); // corrType
	936	it_valT it4 = it3->second.begin(); // valType, val (["R" / "pVal" / "qVal" / "Nmin"])
	937
	938	map<int, map<int,bool> > isPairWithSignificantPValAfterBH;
	939
	940	//if(!isPairsAboveBH){
	941	for(it1 = correlationsData.begin(); it1 != correlationsData.end(); it1++) {
	942	for(it2 = it1->second.begin(); it2 != it1->second.end(); it2++) {
	943	if( gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant)
	944	isPairWithSignificantPValAfterBH[it1->first][it2->first] = true;
	945	else
	946	isPairWithSignificantPValAfterBH[it1->first][it2->first] = false;
	947
	948	for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) {
	949	for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) {
	950	if( gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant && it4->first == "pVal" && it4->second > T_BH[ string2double(it3->first)])
	951	isPairWithSignificantPValAfterBH[it1->first][it2->first] = false; // sufficient that one corType results with pVal>BH[corType] not to print
	952	else if (! gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant && it4->first == "pVal" && it4->second<= T_BH[string2double(it3->first)])
	953	isPairWithSignificantPValAfterBH[it1->first][it2->first] = true; // sufficient that one corType results with pVal<=BH[corType] to print
	954	}
	955
	956	}
	957	}
	958	}
	959	//}
	960
	961
	962
	963	// Reset, before printing Header
	964	it1 = correlationsData.begin();
	965	it2 = it1->second.begin();
	966
	967	// print Header
	968	corrSigStream<<"posA"<<"\t"<<"posB"<<"\t";
	969	for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) {
	970	for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) { // iterate over all valTypes (["R" / "pVal" / "qVal" / "Nmin"])
	971	corrSigStream<<it3->first<<"_"<<it4->first<<"\t"; // the combination results with e.g., 0_R 0_pVal 1_R 1_pVal
	972	}
	973	}
	974	corrSigStream<<"\n";
	975
	976	// print pair-specific computations
	977	for(it1 = correlationsData.begin(); it1 != correlationsData.end(); it1++) {
	978	for(it2 = it1->second.begin(); it2 != it1->second.end(); it2++) {
	979	if(/isPairsAboveBH \|\|/ isPairWithSignificantPValAfterBH[it1->first][it2->first])
	980	corrSigStream<<it1->first+1<<"\t"<<it2->first+1<<"\t";
	981	for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) {
	982	for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) {
	983	if(/isPairsAboveBH \|\| /isPairWithSignificantPValAfterBH[it1->first][it2->first])
	984	corrSigStream<<it4->second<<"\t";
	985	}
	986	}
	987	if(/isPairsAboveBH \|\|/ isPairWithSignificantPValAfterBH[it1->first][it2->first])
	988	corrSigStream<<"\n";
	989	}
	990	}
	991	corrSigStream.close();
	992	}
	993
	994
	995
	996	/********************************************************************************************
	997	PrintExpPerPosPerBranchMatrix (CoMap input)
	998	NOTE!!! this version only consist of gain or loss values
	999	Alternatively, (1) abs(gain+loss) (2) gain-loss (3) separate gain and loss matrices
	1000	*********************************************************************************************/
	1001	void computeCorrelations::printComputedCorrelations(const Vint& selectedPositions,const Vint& evolvingSites, const bool isNormalizeForBranch, const bool correlationForZscore, VVVdouble* correlationsVec, string* valType)
	1002	{
	1003	// OLD version
	1004
	1005	bool isOldAllAgainstAllVersion = false;
	1006	bool isTransform = false;
	1007	bool isMinForPrint = true;
	1008	bool isPearson = false;
	1009	int precisionCorr = 8;
	1010	MDOUBLE minForPrint = 0.1; // max =1
	1011
	1012	string pVal = "";
	1013	if(valType)
	1014	pVal = *valType;
	1015	VVVdouble correlationsVec2print;
	1016	if(correlationsVec){
	1017	correlationsVec2print = *correlationsVec;
	1018	LOGnOUT(4, <<"Print correlation for external data"<<endl);
	1019	}
	1020	else
	1021	correlationsVec2print = _correlationsPerSitePerPosVec;
	1022
	1023	int numOfpositions = correlationsVec2print[0][0].size(); // assume all correlation vectors the same size
	1024	int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	1025
	1026	//// Mapping vectors
	1027	LOGnOUT(6, <<"Copy events vectors"<<endl);
	1028	//////////////////////////////////////////////////////////////////////////
	1029	if(!gainLossOptions::_printComputedCorrelationsAllSites){
	1030	LOGnOUT(4,<<"print Correlations selected sites..."<<endl);
	1031	for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	1032	int selectedSite = selectedPositions[selectedSiteIndex];
	1033	Vdouble MeansVal(_isPearson.size()*_EventTypes.size());
	1034	Vdouble SdVal(_isPearson.size()*_EventTypes.size());
	1035
	1036	// for each selectedSite a new file is created
	1037	LOGnOUT(4, <<"Correlations with site="<<selectedSite<<" With NormalizeForBranch "<<isNormalizeForBranch<<" With correlationForZscore "<<correlationForZscore<<endl);
	1038	string corrPerSite = _outDir + "//" + "selectedCorr.Site"+ int2string(selectedSite+1)+ ".isNormForBr."+int2string(isNormalizeForBranch)+pVal+/+ ".isCorrForZ."+int2string(correlationForZscore)/+ ".txt";
	1039
	1040	ofstream corrPerSiteStream(corrPerSite.c_str());
	1041	corrPerSiteStream.precision(precisionCorr);
	1042	corrPerSiteStream<<"# "<<selectedSite+1<<"\n";
	1043	int vecIndex=0;
	1044	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	1045	int typeIndex=0;
	1046	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	1047	LOGnOUT(6, <<"Compute correl isPearson="<<it<<" with type="<<evnt<<endl);
	1048	MeansVal[vecIndex] = computeAverage(correlationsVec2print[vecIndex][selectedSiteIndex]);
	1049	SdVal[vecIndex] = computeStd(correlationsVec2print[vecIndex][selectedSiteIndex]);
	1050	corrPerSiteStream<<"# Correlation isSpearman="<<it<<" with type="<<evnt<<" Mean="<<MeansVal[vecIndex]<<" Sd="<<SdVal[vecIndex]<<"\n";
	1051	typeIndex++;
	1052	vecIndex++;
	1053	}
	1054	}
	1055	corrPerSiteStream<<"pos";
	1056	vecIndex=0;
	1057	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	1058	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	1059	corrPerSiteStream<<"\t"<<evnt+int2string(it);
	1060	vecIndex++;
	1061	}
	1062	}
	1063	corrPerSiteStream<<"\n";
	1064	for (int posIndex = 0; posIndex<numOfpositions; ++posIndex){
	1065	int evolvingSite = evolvingSites[posIndex];
	1066	if(selectedSite == evolvingSite) // since selectedSite starts from 1
	1067	continue;
	1068	bool isPosOneOfSelectedSites = false;
	1069	if(gainLossOptions::_isIgnoreCorrelationAmongSelectedSites){
	1070	for (int selectedSiteI = 0; selectedSiteI <selectedPositions.size(); ++selectedSiteI){
	1071	int selectedS = selectedPositions[selectedSiteI];
	1072	if(selectedS == evolvingSite){
	1073	isPosOneOfSelectedSites = true;
	1074	continue;
	1075	}
	1076	}
	1077	if(isPosOneOfSelectedSites)
	1078	continue;
	1079	}
	1080	corrPerSiteStream<<evolvingSite+1;
	1081	int vecIndex=0;
	1082	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	1083	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	1084	corrPerSiteStream<<"\t"<<correlationsVec2print[vecIndex][selectedSiteIndex][posIndex];
	1085	vecIndex++;
	1086	}
	1087	}
	1088	corrPerSiteStream<<"\n";
	1089	}
	1090	}
	1091	}
	1092	////////////////////////////////////////////////////////////////////////// All-against-all different format
	1093	else if(isOldAllAgainstAllVersion){
	1094	LOGnOUT(4,<<endl<<"print Correlations All sites (old version)..."<<endl);
	1095	string corrAllSites = _outDir + "//" + "allCorrelations.isNormForBr."+int2string(isNormalizeForBranch)+pVal+/* ".isCorrForZ."+int2string(correlationForZscore)+*/ ".txt";
	1096	ofstream corrAllStream(corrAllSites.c_str());
	1097	corrAllStream.precision(precisionCorr);
	1098	corrAllStream<<"#COGA"<<"\t"<<"COGB"<<"\t"<<"posGainGain"<<"\t"<<"posLossLoss"<<"\t"<<"negGainGain"<<"\t"<<"negLossLoss"<<"\n";
	1099	for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	1100	int selectedSite = selectedPositions[selectedSiteIndex];
	1101
	1102	MDOUBLE meanCorrGainGain = computeAverage(_correlationsPerSitePerPosVec[0][selectedSiteIndex]);
	1103	MDOUBLE stdCorrGainGain = computeStd(_correlationsPerSitePerPosVec[0][selectedSiteIndex]);
	1104	MDOUBLE meanCorrLossLoss = computeAverage(_correlationsPerSitePerPosVec[1][selectedSiteIndex]);
	1105	MDOUBLE stdCorrLossLoss = computeStd(_correlationsPerSitePerPosVec[1][selectedSiteIndex]);
	1106
	1107	for (int posIndex = 0; posIndex<numOfpositions; ++posIndex){
	1108	int evolvingSite = evolvingSites[posIndex];
	1109	if(selectedSite == evolvingSite)
	1110	continue;
	1111	MDOUBLE correlationGainGain = _correlationsPerSitePerPosVec[0][selectedSiteIndex][posIndex];
	1112	MDOUBLE correlationLossLoss = _correlationsPerSitePerPosVec[1][selectedSiteIndex][posIndex];
	1113
	1114	if(correlationForZscore){
	1115	correlationGainGain = (correlationGainGain - meanCorrGainGain)/stdCorrGainGain;
	1116	correlationLossLoss = (correlationLossLoss - meanCorrLossLoss)/stdCorrLossLoss;
	1117	}
	1118	if(isMinForPrint && max(abs(correlationGainGain),abs(correlationLossLoss))<minForPrint)
	1119	continue;
	1120	MDOUBLE posCorrelationGainGain = (correlationGainGain >=0) ? correlationGainGain*1000-1 : 0;
	1121	MDOUBLE negCorrelationGainGain = (correlationGainGain < 0) ? correlationGainGain*1000-1 : 0;
	1122	MDOUBLE posCorrelationLossLoss = (correlationLossLoss >=0) ? correlationLossLoss*1000-1 : 0;
	1123	MDOUBLE negCorrelationLossLoss = (correlationLossLoss < 0) ? correlationLossLoss*1000-1 : 0;
	1124	if(isTransform){
	1125	posCorrelationGainGain = pow(posCorrelationGainGain/10,2)/10;
	1126	negCorrelationGainGain = pow(negCorrelationGainGain/10,2)/10;
	1127	posCorrelationLossLoss = pow(posCorrelationLossLoss/10,2)/10;
	1128	negCorrelationLossLoss = pow(negCorrelationLossLoss/10,2)/10;
	1129	}
	1130	corrAllStream<<selectedSiteIndex+1<<"\t"<<evolvingSite+1<<"\t"<<(int)posCorrelationGainGain<<"\t"<<(int)posCorrelationLossLoss<<"\t"<<(int)negCorrelationGainGain<<"\t"<<(int)negCorrelationLossLoss<<"\n";
	1131	}
	1132	}
	1133	}
	1134	else{
	1135	LOGnOUT(4,<<"print Correlations All sites ..."<<endl);
	1136	string corrAllSites = _outDir + "//" + "allCorrelations.isNormForBr."+int2string(isNormalizeForBranch)+pVal+ /".isCorrForZ."+int2string(correlationForZscore)+/ ".txt";
	1137	ofstream corrAllStream(corrAllSites.c_str());
	1138	corrAllStream.precision(precisionCorr);
	1139	corrAllStream<<"siteA"<<"\t"<<"siteB";
	1140	int vecIndex=0;
	1141	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	1142	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	1143	corrAllStream<<"\t"<<evnt+int2string(it);
	1144	vecIndex++;
	1145	}
	1146	}
	1147	corrAllStream<<"\n";
	1148
	1149	for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	1150	int selectedSite = selectedPositions[selectedSiteIndex];
	1151	for (int posIndex = 0; posIndex<numOfpositions; ++posIndex){
	1152	int evolvingSite = evolvingSites[posIndex];
	1153	if(selectedSite == evolvingSite)
	1154	continue;
	1155	corrAllStream<<selectedSite+1<<"\t"<<evolvingSite+1;
	1156	int vecIndex=0;
	1157	for (vector<bool>::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){
	1158	for (vector<string>::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int
	1159	corrAllStream<<"\t"<<correlationsVec2print[vecIndex][selectedSiteIndex][posIndex];
	1160	vecIndex++;
	1161	}
	1162	}
	1163	corrAllStream<<"\n";
	1164	}
	1165	}
	1166	}
	1167	}
	1168
	1169
	1170	/********************************************************************************************
	1171	*********************************************************************************************/
	1172	void computeCorrelations::fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,VVdouble& expEventsPerPosPerBranch_B,const int selectedSite, const bool isPearson){
	1173	int numOfpositions = expEventsPerPosPerBranch_B.size();
	1174	//correlationPerPos.resize(numOfpositions);
	1175
	1176
	1177	for (int pos = 0; pos <numOfpositions; ++pos){
	1178	MDOUBLE correlation = 0;
	1179	if(isMinEQMaxInVector(expEventsPerPosPerBranch[selectedSite]) \|\| isMinEQMaxInVector(expEventsPerPosPerBranch_B[pos]))
	1180	correlationPerPos[pos]=-99; // can't compute correlation
	1181	else{
	1182	if(isPearson)
	1183	correlation = calcPearsonCorrelation(expEventsPerPosPerBranch[selectedSite], expEventsPerPosPerBranch_B[pos]);
	1184	else{
	1185	//correlation = calcRankCorrelation(expEventsPerPosPerBranch[selectedSite], expEventsPerPosPerBranch_B[pos]); // seems to be problematic, diffrent results from R, Matlab
	1186	correlation = calcRankCorrelation2(expEventsPerPosPerBranch[selectedSite], expEventsPerPosPerBranch_B[pos]);
	1187	}
	1188	correlationPerPos[pos]=correlation;
	1189	}
	1190	}
	1191	}
	1192
	1193
	1194
	1195	/********************************************************************************************
	1196	fill expEventsPerPosPerBranch
	1197	*********************************************************************************************/
	1198	void computeCorrelations::fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const string type, VVVVdouble& expChanges_PosNodeXY
	1199	,const bool isNormalizeForBranch, MDOUBLE* cutOff_p){
	1200
	1201
	1202	int numOfpositions = expChanges_PosNodeXY.size();
	1203	int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	1204
	1205	int from = _EventTypesFromTo[type]["from"];
	1206	int to = _EventTypesFromTo[type]["to"];
	1207	expEventsPerPosPerBranch.resize(numOfpositions);
	1208	treeIterTopDownConst tIt(_tr);
	1209	for (int pos = 0; pos <numOfpositions; ++pos){
	1210	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next())
	1211	{
	1212	if(mynode->isRoot())
	1213	continue;
	1214	MDOUBLE val = 0;
	1215	MDOUBLE normalizationFactor = 1.0;
	1216	if(isNormalizeForBranch){
	1217	if(gainLossOptions::_isNormalizeByExpectationPerBranch){
	1218	if(_expChanges_NodeXY.size()==0)
	1219	sumExpectationPerBranch(expChanges_PosNodeXY, _expChanges_NodeXY); // filled once for both 0->1 and 1->0
	1220	normalizationFactor = _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches; // mynode->dis2father()
	1221	}else
	1222	normalizationFactor = mynode->dis2father();
	1223	}
	1224	val = (expChanges_PosNodeXY[pos][mynode->id()][from][to] ) / normalizationFactor;
	1225	if(cutOff_p){
	1226	if(val>= *cutOff_p)
	1227	expEventsPerPosPerBranch[pos].push_back(1);
	1228	else
	1229	expEventsPerPosPerBranch[pos].push_back(0);
	1230	}
	1231	else
	1232	expEventsPerPosPerBranch[pos].push_back(val);
	1233	}
	1234	}
	1235	}
	1236
	1237	/********************************************************************************************
	1238	*********************************************************************************************/
	1239	void computeCorrelations::sumExpectationPerBranch(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_NodeXY){
	1240	int numOfPositions = expChanges_PosNodeXY.size();
	1241	int numOfBranches = expChanges_PosNodeXY[0].size();
	1242	int AlphSize = expChanges_PosNodeXY[0][0].size(); // =2
	1243
	1244	treeIterTopDownConst tIt(_tr);
	1245	resizeVVV(numOfBranches,AlphSize,AlphSize,map_NodeXY);
	1246	for (int pos = 0; pos <numOfPositions; ++pos){
	1247	//int i=0;
	1248	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){
	1249	//for(int i=0;i<numOfBranches;++i){
	1250	for(int j=0;j<AlphSize;++j){
	1251	for(int k=0;k<AlphSize;++k){
	1252	map_NodeXY[mynode->id()][j][k] += expChanges_PosNodeXY[pos][mynode->id()][j][k];
	1253	}
	1254	}
	1255	//cout<<i<<" "<<mynode->id()<<endl; // DEBUG
	1256	//++i;
	1257	}
	1258	}
	1259	}
	1260
	1261	/********************************************************************************************
	1262	//Compute p-values of each statistic: P1
	1263	//, P2
	1264	//, P3
	1265	//, ??? , PN
	1266	//• Sort these: P(1)
	1267	//? P(2)
	1268	//? P(3)
	1269	//? ??? ? P(N)
	1270	//{subscript
	1271	//()
	1272	//? sorted}
	1273	//• For k =1..N, q(k)
	1274	//= minm ? k
	1275	//[ N?P(m)
	1276	///m]
	1277	//? Easily computed from sorted p-values by looping
	1278	//downwards from k= N to k =1
	1279	*********************************************************************************************/
	1280	VVVdouble computeCorrelations::pVals2qVals(VVVdouble& pValVec, map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData
	1281	, vector<vector<bool> >& isComputePairWithRateAboveNim, Vdouble& T_BH, Vint& selectedSites, Vint& evolvingSites)
	1282	{
	1283	LOGnOUT(4,<<endl<<"pVals2qVals..."<<endl);
	1284	time_t t1,t2;
	1285	time(&t1);
	1286
	1287	VVVdouble qValsVec;
	1288	if(gainLossOptions::_isComputeQVals)
	1289	qValsVec = pValVec; // instead of re-size, not required if not computed
	1290
	1291	int numberOfcorrelationVec = pValVec.size();
	1292	int numOfSites_A = pValVec[0].size();
	1293	int numOfSites_B = pValVec[0][0].size();
	1294
	1295	typedef map<int,map<int, map<string, map<string, MDOUBLE> > > >::iterator it_A;
	1296	typedef map<int, map<string, map<string, MDOUBLE> > >::iterator it_B;
	1297
	1298	it_A it_siteA = correlationsData.begin();
	1299	it_B it_siteB = it_siteA->second.begin();
	1300
	1301	for (int corIndex = 0; corIndex <numberOfcorrelationVec; ++corIndex){
	1302	LOGnOUT(4,<<" *** corIndex="<<corIndex<<endl);
	1303
	1304	Vdouble pVals;
	1305	Vdouble qVals;
	1306
	1307	// get pVals
	1308	LOGnOUT(6,<<"get pVals..."<<endl);
	1309
	1310	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	1311	int site_A_original = selectedSites[site_A];
	1312	for (int site_B = site_A; site_B <numOfSites_B; ++site_B){
	1313	int site_B_original = evolvingSites[site_B];
	1314	if(site_A_original == site_B_original){
	1315	continue;
	1316	}
	1317	MDOUBLE pVal = 1;
	1318	if(isComputePairWithRateAboveNim[site_A][site_B]){
	1319	if(gainLossOptions::_selectedSitesForCorrelation!="" ){
	1320	// consider only pairs with min Rate
	1321	//if(correlationsData[site_A_original][site_B][int2string(corIndex)]["Rate"]>Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair") ){ //TEMP
	1322	pVal = correlationsData[site_A_original][site_B_original][int2string(corIndex)]["pVal"]; //
	1323	//if(pValVec[corIndex][site_A][site_B] != pVal)
	1324	// cout<<"ERRRRRRR diff pval\n";
	1325	//if(correlationsData[site_A_original][site_B][int2string(corIndex)]["Rate"]>Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair") && pVal > 1.99)
	1326	// cout<<"ERRRRRRR diff pval\n";
	1327	}
	1328	else
	1329	pVal = pValVec[corIndex][site_A][site_B]; // Real correlation from Input variable
	1330
	1331	pVals.push_back(pVal);
	1332	}
	1333	//if(!(pVal > 1)) // pair is with Nmin below T, and ignored, since it's removed from both simulations and real data no need to correct for this hypothesis
	1334	// pVals.push_back(pVal);
	1335
	1336	}
	1337	}
	1338	// sort pVal
	1339	vector< vecElem<MDOUBLE> > orderVecPVal;
	1340	orderVec(pVals, orderVecPVal);
	1341	qVals.resize(pVals.size(),1);
	1342
	1343	sort(pVals.begin(),pVals.end()); // faster than using the "getValue"
	1344	pair<vector<double>::iterator,vector<double>::iterator> bounds;
	1345
	1346	float pVals2checkBeforeFDR [] = {gainLossOptions::_pValueCutOffForBootStrap, 0.05, 0.01, 0.005, 0.001, 0.0001};
	1347
	1348	int lastIndexWithPVal2check;
	1349	for (int i=0; i<6; ++i){
	1350	bounds = equal_range (pVals.begin(), pVals.end(), pVals2checkBeforeFDR[i]);
	1351	if(i==0)
	1352	lastIndexWithPVal2check = int(bounds.second - pVals.begin());
	1353	int lastIndexWithPValBiggerThanThreshold = int(bounds.second - pVals.begin());
	1354	LOGnOUT(4,<<"Before FDR correction there are "<<lastIndexWithPValBiggerThanThreshold<<" pairs with significant pVal="<< pVals2checkBeforeFDR[i]<<endl);
	1355	}
	1356
	1357	LOGnOUT(4,<<"Compute BH threshold for number of multiple tests="<<pVals.size()<< " ..."<<endl);
	1358	T_BH[corIndex] = 0;
	1359	T_BH[corIndex] = computeFDRthreshold(pVals, gainLossOptions::_pValueCutOffForBootStrap, true);
	1360
	1361
	1362	//for (int i=0; i<pVals.size(); ++i){
	1363	// MDOUBLE correctedVal = (double)(i+1)/(double)indexAll *pValcutOff;
	1364	// if( pVals[i] <= correctedVal){
	1365	// T_BH[corIndex] = pVals[i];
	1366	// }
	1367	//}
	1368	bounds = equal_range (pVals.begin(), pVals.end(),T_BH[corIndex]);
	1369	if(T_BH[corIndex] > 0.0){
	1370	LOGnOUT(4,<<"For FDR level of "<<gainLossOptions::_pValueCutOffForBootStrap<<" BH threshold="<<T_BH[corIndex]<<" with "<<int(bounds.first - pVals.begin())<<" "<<int(bounds.second - pVals.begin())<<" significant values"<<endl);}
	1371	else{
	1372	LOGnOUT(4,<<"For FDR level of "<<gainLossOptions::_pValueCutOffForBootStrap<<" BH threshold="<<T_BH[corIndex]<<" with no significant values"<<endl<<endl);}
	1373
	1374	// additional BH thresholds
	1375	float additionalFDRlevels [] = {0.1, 0.05, 0.01, 0.001};
	1376	for (int i=0; i< 4 ; ++i){ // must be length of additionalFDRlevels
	1377	if(gainLossOptions::_pValueCutOffForBootStrap == additionalFDRlevels[i])
	1378	continue;
	1379	MDOUBLE BH = computeFDRthreshold(pVals,additionalFDRlevels[i], true);
	1380	LOGnOUT(4,<<"For FDR level of "<<additionalFDRlevels[i]<<" BH threshold is "<<BH<<endl);
	1381	}
	1382
	1383	// compute q-vals
	1384	VVVdouble qValsVec;
	1385	if(gainLossOptions::_isComputeQVals){
	1386	// produce qVals by FDR, assume the pVal vector is sorted P_1<=P_2<=...<=P_N
	1387	LOGnOUT(4,<<"Compute FDR correction - get qVals..."<<endl);
	1388	for (int k=1; k<=lastIndexWithPVal2check; ++k){
	1389	if(k%1000==0)
	1390	cout<<"*";
	1391	int m = k;
	1392	MDOUBLE pVal = pVals[k-1];
	1393	MDOUBLE qVal = pVal;
	1394	//cout<<"pVal "<<k<<" "<<orderVecPVal[m-1].getValue()<<endl; //DEB
	1395	//MDOUBLE qVal = (double)indexAll*pVal/(double)m; // only init
	1396	if(pVal < gainLossOptions::_pValueCutOffForBootStrap && qVal < gainLossOptions::_pValueCutOffForBootStrap){ // since pVals are sorted, if last qVal computation yielded >0.05, no need to compute
	1397	qVal = 1; // init, not corrected
	1398	for (m=k; m<= lastIndexWithPVal2check; ++m){
	1399	MDOUBLE pValtemp = pVals[m-1];
	1400	MDOUBLE qValtemp = (double)pVals.size()*pValtemp/(double)m;
	1401	if(qValtemp < qVal)
	1402	qVal = qValtemp;
	1403	}
	1404	}else{
	1405	break;
	1406	}
	1407	//cout<<"pVal="<<pVal<<" and qVal="<<qVal<<"\n"; //DEB
	1408	qVals[orderVecPVal[k-1].getPlace()] = qVal; // fill values, related to original order of pVals
	1409	}
	1410	cout<<"\n";
	1411
	1412	// assign qVals
	1413	LOGnOUT(4,<<"Assign qVals..."<<endl);
	1414	int ind = 0;
	1415	for (int site_A = 0; site_A <numOfSites_A; ++site_A){
	1416	for (int site_B = site_A; site_B <numOfSites_B; ++site_B){
	1417	if(site_A == site_B){
	1418	continue;
	1419	}
	1420	MDOUBLE qVal = qVals[ind]; // this works only if qVals order is the same as the original pVal
	1421	qValsVec[corIndex][site_A][site_B] = qVal;
	1422	qValsVec[corIndex][site_B][site_A] = qVal;
	1423	//map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
	1424
	1425	it_A iterA = correlationsData.find(site_A);
	1426	it_B iterB = correlationsData[site_A].find(site_B);
	1427
	1428	if (!(iterA==correlationsData.end()) && !(iterB==correlationsData[site_A].end())){
	1429	//cout<<site_A<<" "<<site_B<<"\n";
	1430	correlationsData[site_A][site_B][int2string(corIndex)]["qVal"] = qVal;
	1431	}
	1432	ind++;
	1433	}
	1434	}
	1435	}
	1436
	1437	}
	1438	time(&t2);
	1439	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	1440	return qValsVec;
	1441	}

+118

-0

programs/gainLoss/computeCorrelations.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___computeCorrelations___GL
	19	#define ___computeCorrelations___GL
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "Parameters.h"
	24	#include "gainLoss.h"
	25	#include "extremeValDistribution.h"
	26
	27	/********************************************************************************************
	28	rate4siteGL
	29	*********************************************************************************************/
	30	class computeCorrelations{
	31	public:
	32	explicit computeCorrelations(tree& tr, string& outDir, VVVVdouble* expChanges_PosNodeXY, VVVVdouble* expChanges_PosNodeXY_B=NULL);
	33	virtual ~computeCorrelations() ;
	34
	35	computeCorrelations(const computeCorrelations& other) {*this = other;}
	36	computeCorrelations& operator=(const computeCorrelations &other);
	37
	38	void runComputeCorrelations(const Vint& selectedPositions, const Vint& numOfGapsTillSite, const bool isNormalizeForBranch = false);
	39	void printComputedCorrelations(const Vint& selectedPositions,const Vint& evolvingSites, const bool isNormalizeForBranch = false, const bool correlationForZscore = false, VVVdouble* correlationsVec=NULL, string* valType=NULL);
	40	//void computeMeanAndSdPerBranch(Vdouble& meanEventsPerBranch01, Vdouble& meanEventsPerBranch10, Vdouble& sdEventsPerBranch01,Vdouble& sdEventsPerBranch10);
	41	void fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const string type, VVVVdouble& expChanges_PosNodeXY,const bool isNormalizeForBranch = true, MDOUBLE* cutOff_p =NULL);
	42	void fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,VVdouble& expEventsPerPosPerBranch_B,const int selectedSite, const bool isPearson=true);
	43	void sumExpectationPerBranch(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_NodeXY);
	44	MDOUBLE computeNminPerPair(const int site_A, const int site_B, const int typeIndex, const VVVdouble& exp_PosXY);
	45
	46
	47	void computedCorrelationsRankBasedOnSimulatedData(const Vint& selectedPositions, VVVdouble& correlationPerSitePerPos, VVVdouble& correlationPerSitePerPos_B, VVVdouble& correlationPerSitePerPos_Pval);
	48	void computedCorrelationsPValBasedOnSimulatedDataCoMap(VVVdouble& correlationPerSitePerPosReal,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval);
	49	int computedCorrelationsPValBasedOnSimulatedDataCoMapBins(VVVdouble& correlationPerSitePerPosReal,vector<vector<bool> >& isComputePairWithRateAboveNim,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval
	50	,map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData, Vdouble& rate4siteReal, Vint& selectedSites, Vint& numOfGapsTillSite, Vint& evolvingSites, bool isLastIteration);
	51	void printComputedCorrelationsData(const bool isNormalizeForBranch, const bool correlationForZscore
	52	,map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData, Vdouble& T_BH, bool isPairsAboveBH = false);
	53	void printCorrelationsFrequencies(Vdouble& correlationsVecSorted, ofstream* simCorrelStream=NULL);
	54
	55
	56	int produceSortedVectorsOfCorrelationsBinedByRate(MDOUBLE medianNminOfRealData, ofstream* simCorrelStream);
	57
	58	void produceSortedVectorsOfAllCorrelations(Vdouble& rate4siteSim);
	59
	60	VVVdouble pVals2qVals(VVVdouble& correlationsVec,map<int, map<int, map<string, map<string, MDOUBLE > > > >& correlationsData
	61	, vector<vector<bool> >& isComputePairWithRateAboveNim, Vdouble& T_BH, Vint& selectedSites, Vint& evolvingSites);
	62
	63	void produceSymeticMatrix(VVVdouble& correlationPerSitePerPos_Pval, bool isMin=true);
	64	void produceSortedVectorsOfAllCorrelations(const VVVdouble& correlationPerSitePerPos, Vdouble& pairWiseCorrelations, Vdouble& NminForPairsInPairWiseCorrelations);
	65
	66	VVVdouble getcorrelationPerSitePerPosVec(){return _correlationsPerSitePerPosVec;};
	67
	68
	69	protected:
	70	//members
	71	int _alphabetSize;
	72	tree _tr;
	73	//sequenceContainer _sc;
	74
	75	sequence* _refSeq; // the reference sequence
	76	string _outDir;
	77	bool _isSilent;
	78
	79
	80	VVVVdouble _expChanges_PosNodeXY; // Input, expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	81	VVVdouble _expChanges_NodeXY; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState]
	82	VVVdouble _exp_PosXY; // Summed from _expChanges_PosNodeXY - to expChanges_PosXY[Pos][fatherState][sonState]
	83
	84	bool _isTwoSetsOfInputForCorrelation; // when B is given
	85	VVVVdouble _expChanges_PosNodeXY_B; // Input B (optional), expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	86	VVVdouble _expChanges_NodeXY_B; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState]
	87
	88	// V required for correlation analysis
	89	VVVdouble _expPerPosPerBranchVec; // expChanges_PosNodeXY[type][pos][nodeID], for specific type of event (from, to), may be adjusted for branch expectation
	90	VVVdouble _expPerPosPerBranchVec_B;
	91
	92	// correlation vectors
	93	VVVdouble _correlationsPerSitePerPosVec;
	94	vector<bool> _isPearson; // [true, false]
	95	vector<string> _EventTypes; // ['gain', 'loss', 'both']
	96	map<string, int> _EventTypesMap;
	97	map<string, map<string, int> > _EventTypesFromTo;
	98
	99	//vector< vector< map<string, MDOUBLE> > > _pairWiseCorrelationsAndNminSim; // pairWiseCorrelationsAndNmin[corrIndex][pairIndex][CorOrNmin][val], if CorOrNmin=0, val=correlation, if =1, val=Nmin
	100	VVVdouble _pairWiseCorrelationsAndNminSim; // pairWiseCorrelationsAndNmin[corrIndex][0/1][pairIndex][val], if CorOrNmin=0, val=correlation, if =1, val=Nmin
	101	VVdouble _corrVector;
	102	VVdouble _NminSortedSim; // _NminSortedSim[CorType][], vector of all Nmins = Rates
	103	vector<vector<extremeValDistribution > > _extremeValDistributions; // _NminSortedSim[CorType][], vector of all distributions, per bin
	104
	105	VVVdouble _correlationsSubSets; // to be filled by produceSortedVectorsOfCorrelationsBinedByRate
	106	VVdouble _correlationSubSetsNminLimitValues; // to be filled by produceSortedVectorsOfCorrelationsBinedByRate
	107
	108	Vint _selectedSites;
	109
	110	int _numOfSamplesInLowRateFirstBin; // thus, the lowest p-value for correlations with low rate (below simulations) is limited
	111	};
	112
	113	#endif
	114
	115
	116
	117

+928

-0

programs/gainLoss/computeCountsGL.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "computeCountsGL.h"
	17	#include "gainLossUtils.h"
	18	#include "gainLossAlphabet.h"
	19	#include "computePosteriorExpectationOfChange.h"
	20	#include "computeJumps.h"
	21
	22
	23
	24	/********************************************************************************************
	25	computeCountsGL
	26	*********************************************************************************************/
	27	computeCountsGL::computeCountsGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, VVdouble& logLpostPerCatPerPos, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent):
	28	_tr(tr),_sp(sp),_sc(sc),_outDir(outDir),_postProbPerCatPerPos(logLpostPerCatPerPos),_distanceFromNearestOTUForRecent(distanceFromNearestOTUForRecent), _isSilent(isSilent)
	29	{
	30	_alphabetSize = _sp->alphabetSize();
	31	}
	32	computeCountsGL::computeCountsGL(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution* lossDist, string& outDir, VVVdouble& logLpostPerSpPerCatPerPos, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent):
	33	_tr(tr),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_sc(sc),_outDir(outDir),_postProbPerSpPerCatPerPos(logLpostPerSpPerCatPerPos),_distanceFromNearestOTUForRecent(distanceFromNearestOTUForRecent), _isSilent(isSilent)
	34	{
	35	_alphabetSize = _spVVec[0][0]->alphabetSize();
	36	}
	37
	38	computeCountsGL::~computeCountsGL(){
	39	//clearVVVV(_jointProb_PosNodeXY);
	40	}
	41
	42	computeCountsGL& computeCountsGL::operator=(const computeCountsGL &other){
	43	if (this != &other) { // Check for self-assignment
	44	}
	45	return *this;
	46	}
	47
	48
	49	/********************************************************************************************
	50	*********************************************************************************************/
	51	void computeCountsGL::run()
	52	{
	53	LOGnOUT(4, <<endl<<"Computation stochastic mapping"<<endl);
	54	time_t t1,t2;
	55	time(&t1);
	56
	57	_expV01.resize(_sc.seqLen());
	58	_expV10.resize(_sc.seqLen());
	59	_probV01.resize(_sc.seqLen());
	60	_probV10.resize(_sc.seqLen());
	61	resizeVVV(_sc.seqLen(),_alphabetSize,_alphabetSize,_expV);
	62	resizeVVV(_sc.seqLen(),_alphabetSize,_alphabetSize,_probV);
	63	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_jointProb_PosNodeXY);
	64	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_probChanges_PosNodeXY);
	65	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_expChanges_PosNodeXY);
	66
	67	if(!gainLossOptions::_gainLossDist){
	68	computePosteriorOfChangeGivenTerminalsPerCat();
	69	}
	70	else
	71	computePosteriorOfChangeGivenTerminalsPerSpPerCat(); // GLM - multiple SPs
	72
	73	time(&t2);
	74	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	75	}
	76	/********************************************************************************************
	77	*********************************************************************************************/
	78	void computeCountsGL::computePosteriorOfChangeGivenTerminalsPerCat()
	79	{
	80	// Per RateCategory -- All the computations is done while looping over rate categories
	81	for (int rateIndex=0 ; rateIndex< _sp->categories(); ++rateIndex)
	82	{
	83	tree copy_et = _tr;
	84	MDOUBLE rateVal = _sp->rates(rateIndex);
	85	MDOUBLE minimumRate = 0.000000001; //0.0000001
	86	MDOUBLE rate2multiply = max(rateVal,minimumRate);
	87	if(rateVal<minimumRate){
	88	LOGnOUT(4, <<" >>> NOTE: the rate category "<<rateVal<<" is too low for computePosteriorExpectationOfChangePerSite"<<endl); }
	89	copy_et.multipleAllBranchesByFactor(rate2multiply);
	90	if(!_isSilent)
	91	LOGnOUT(4, <<"Computation performed analytically for rate "<<rate2multiply<<endl);
	92	//gainLossAlphabet alph; // needed for Alphabet size
	93	//int alphSize = ;
	94	simulateJumps simPerRateCategory(copy_et,*_sp,_alphabetSize);
	95	// Per POS
	96	for (int pos = 0; pos <_sc.seqLen(); ++pos)
	97	{
	98	LOG(9,<<"pos "<<pos+1<<endl);
	99	// I) computeJoint "computePosteriorOfChangeGivenTerminals" (posteriorPerNodePer2States[mynode->id()][fatherState][sonState])
	100	VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos;
	101	computePosteriorExpectationOfChange cpecPerRateCategoryPerPos(copy_et,_sc,_sp); // Per POS,CAT
	102	cpecPerRateCategoryPerPos.computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos);
	103
	104	// Exp vars - allocate
	105	VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp
	106	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos);
	107	VVdouble expVV; // Per POS
	108
	109	// Prob vars - allocate
	110	VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob
	111	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos);
	112	VVdouble probVV;
	113
	114	////////////////////////////////////////////////////////////////////////// Analytical
	115	if(gainLossOptions::_isAnaliticComputeJumps){
	116	MDOUBLE Lambda1 = static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getMu1();
	117	MDOUBLE Lambda2 = static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getMu2();
	118	if(Lambda1 == Lambda2)
	119	Lambda2 += 0.000000000000001; //NOTE: this is required for analyticComputeSimulateion, to avoid Lambda1=Lambda2
	120	computeJumps computeJumpsObj(Lambda1,Lambda2);
	121
	122	// II) PostExp: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal
	123	VVVdouble expChangesForBranchPerRateCategoryPerPosAnal;
	124	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPosAnal);
	125	VVdouble expVVAnal = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,expChangesForBranchPerRateCategoryPerPosAnal);
	126	expVV = expVVAnal;
	127	expChangesForBranchPerRateCategoryPerPos = expChangesForBranchPerRateCategoryPerPosAnal;
	128
	129	// III) PostProbChange: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal
	130	VVVdouble probChangesForBranchPerRateCategoryPerPosAnal;
	131	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPosAnal);
	132	VVdouble probVVAnal = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPosAnal);
	133	probVV = probVVAnal;
	134	probChangesForBranchPerRateCategoryPerPos = probChangesForBranchPerRateCategoryPerPosAnal;
	135	}
	136	else{
	137	if(!_isSilent)
	138	LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
	139	simPerRateCategory.runSimulation(gainLossOptions::_numOfSimulationsForPotExp);
	140	if(!_isSilent )
	141	LOGnOUT(4,<<"finished simulations"<<endl);
	142
	143	// II) PostExp: take in account both: 1) simulations 2) posteriorsGivenTerminal
	144	expVV = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	145	expChangesForBranchPerRateCategoryPerPos);
	146	// III) PostProbChange: take in account both: 1) simulations 2) posteriorsGivenTerminal
	147	probVV = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	148	probChangesForBranchPerRateCategoryPerPos);
	149
	150	}
	151	//////////////////////////////////////////////////////////////////////////
	152
	153	MDOUBLE exp01 = expVV[0][1];
	154	MDOUBLE exp10 = expVV[1][0];
	155	_expV01[pos]+=exp01*_postProbPerCatPerPos[rateIndex][pos];
	156	_expV10[pos]+=exp10*_postProbPerCatPerPos[rateIndex][pos];
	157	_expV[pos][0][1]+=exp01*_postProbPerCatPerPos[rateIndex][pos];
	158	_expV[pos][1][0]+=exp10*_postProbPerCatPerPos[rateIndex][pos];
	159
	160	MDOUBLE prob01 = probVV[0][1];
	161	MDOUBLE prob10 = probVV[1][0];
	162	_probV01[pos]+=prob01*_postProbPerCatPerPos[rateIndex][pos];
	163	_probV10[pos]+=prob10*_postProbPerCatPerPos[rateIndex][pos];
	164	_probV[pos][0][1]+=prob01*_postProbPerCatPerPos[rateIndex][pos];
	165	_probV[pos][1][0]+=prob10*_postProbPerCatPerPos[rateIndex][pos];
	166
	167
	168	// Store all information PerCat,PerPOS
	169	for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId
	170	for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState
	171	for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState
	172	_probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerCatPerPos[rateIndex][pos];
	173	_expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerCatPerPos[rateIndex][pos];
	174	_jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_postProbPerCatPerPos[rateIndex][pos];
	175	}
	176	}
	177	}
	178	}
	179	}
	180	}
	181
	182	/********************************************************************************************
	183	spVV
	184	*********************************************************************************************/
	185	void computeCountsGL::computePosteriorOfChangeGivenTerminalsPerSpPerCat()
	186	{
	187	int numOfSPs = _gainDist->categories()*_lossDist->categories();
	188
	189	// per Sp
	190	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	191	int gainIndex =fromIndex2gainIndex(spIndex,_gainDist->categories(),_lossDist->categories());
	192	int lossIndex =fromIndex2lossIndex(spIndex,_gainDist->categories(),_lossDist->categories());
	193	_sp = _spVVec[gainIndex][lossIndex];
	194	if(!_isSilent){
	195	LOGnOUT(4,<<"computePosteriorOfChangeGivenTerminalsPerSpPerCat with sp:\n Gain= "<<static_cast<gainLossModel>((_sp).getPijAccelerator()->getReplacementModel())->getMu1() <<endl);
	196	if(!gainLossOptions::_isReversible)LOGnOUT(4,<<" Loss= "<<static_cast<gainLossModelNonReversible>((_sp).getPijAccelerator()->getReplacementModel())->getMu2() <<endl);
	197	}
	198	// Per RateCategory -- All the computations is done while looping over rate categories
	199	int numOfRateCategories = _spVVec[gainIndex][lossIndex]->categories(); // same for all SPs
	200	for (int rateIndex=0 ; rateIndex< numOfRateCategories; ++rateIndex)
	201	{
	202	tree copy_et = _tr;
	203	MDOUBLE rateVal = _sp->rates(rateIndex);
	204	MDOUBLE minimumRate = 0.000000001; //0.0000001
	205	MDOUBLE rate2multiply = max(rateVal,minimumRate);
	206	if(rateVal<minimumRate){
	207	LOGnOUT(4, <<" >>> NOTE: the rate category "<<rateVal<<" is too low for computePosteriorExpectationOfChangePerSite"<<endl); }
	208	copy_et.multipleAllBranchesByFactor(rate2multiply);
	209
	210
	211	//if(!_isSilent)
	212	// LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
	213	////gainLossAlphabet alph; // needed for Alphabet size
	214	//simulateJumps simPerRateCategory(copy_et,*_sp,_alphabetSize);
	215	//simPerRateCategory.runSimulation(gainLossOptions::_numOfSimulationsForPotExp);
	216	//if(!_isSilent)
	217	// LOGnOUT(4,<<"finished simulations"<<endl);
	218
	219
	220	simulateJumps simPerRateCategory(copy_et,*_sp,_alphabetSize);
	221	// Per POS
	222	for (int pos = 0; pos <_sc.seqLen(); ++pos)
	223	{
	224	LOG(7,<<"pos "<<pos+1<<endl);
	225	// I) computeJoint "computePosteriorOfChangeGivenTerminals" (posteriorPerNodePer2States[mynode->id()][fatherState][sonState])
	226	VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos;
	227	computePosteriorExpectationOfChange cpecPerRateCategoryPerPos(copy_et,_sc,_sp); // Per POS,CAT
	228	cpecPerRateCategoryPerPos.computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos);
	229
	230	// Exp vars - allocate
	231	VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp
	232	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos);
	233	VVdouble expVV; // Per POS
	234
	235	// Prob vars - allocate
	236	VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob
	237	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos);
	238	VVdouble probVV;
	239
	240	////////////////////////////////////////////////////////////////////////// Analytical
	241	if(gainLossOptions::_isAnaliticComputeJumps){
	242	MDOUBLE Lambda1 = static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getMu1();
	243	MDOUBLE Lambda2 = static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getMu2();
	244	computeJumps computeJumpsObj(Lambda1,Lambda2);
	245
	246	// II) PostExp: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal
	247	VVVdouble expChangesForBranchPerRateCategoryPerPosAnal;
	248	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPosAnal);
	249	VVdouble expVVAnal = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,expChangesForBranchPerRateCategoryPerPosAnal);
	250	expVV = expVVAnal;
	251	expChangesForBranchPerRateCategoryPerPos = expChangesForBranchPerRateCategoryPerPosAnal;
	252
	253	// III) PostProbChange: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal
	254	VVVdouble probChangesForBranchPerRateCategoryPerPosAnal;
	255	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPosAnal);
	256	VVdouble probVVAnal = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPosAnal);
	257	probVV = probVVAnal;
	258	probChangesForBranchPerRateCategoryPerPos = probChangesForBranchPerRateCategoryPerPosAnal;
	259	}
	260	else{
	261	if(!_isSilent)
	262	LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
	263	simPerRateCategory.runSimulation(gainLossOptions::_numOfSimulationsForPotExp);
	264	if(!_isSilent )
	265	LOGnOUT(4,<<"finished simulations"<<endl);
	266
	267	// II) PostExp: take in account both: 1) simulations 2) posteriorsGivenTerminal
	268	expVV = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	269	expChangesForBranchPerRateCategoryPerPos);
	270	// III) PostProbChange: take in account both: 1) simulations 2) posteriorsGivenTerminal
	271	probVV = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	272	probChangesForBranchPerRateCategoryPerPos);
	273
	274	}
	275	//////////////////////////////////////////////////////////////////////////
	276
	277
	278
	279	//// II) Exp - take in account both: 1) simulations 2) posteriorsGivenTerminal
	280	//VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp
	281	//resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos);
	282
	283	//VVdouble expVV = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
	284	// expChangesForBranchPerRateCategoryPerPos); // Per POS
	285	MDOUBLE exp01 = expVV[0][1];
	286	MDOUBLE exp10 = expVV[1][0];
	287	_expV01[pos]+=exp01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	288	_expV10[pos]+=exp10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	289	_expV[pos][0][1]+=exp01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	290	_expV[pos][1][0]+=exp10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	291
	292	//// III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal
	293	//VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob
	294	//resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos);
	295	//VVdouble probVV = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos);
	296	MDOUBLE prob01 = probVV[0][1];
	297	MDOUBLE prob10 = probVV[1][0];
	298	_probV01[pos]+=prob01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	299	_probV10[pos]+=prob10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	300	_probV[pos][0][1]+=prob01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	301	_probV[pos][1][0]+=prob10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	302
	303	// Store all information PerCat,PerPOS
	304	for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId
	305	for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState
	306	for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState
	307	_jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	308	_probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	309	_expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos];
	310	}
	311	}
	312	}
	313	}
	314	// Per POS
	315	}
	316	// per rateCat
	317	}
	318	// Per Sp
	319	}
	320
	321
	322
	323	/********************************************************************************************
	324	printProbExp()
	325	print perPos (over all branches)
	326	use the members _expV01, _expV10 for basic
	327	*********************************************************************************************/
	328	void computeCountsGL::printProbExp()
	329	{
	330
	331	string posteriorExpectationOfChangeString = _outDir + "//" + "PosteriorExpectationOfChange.txt";
	332	ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str());
	333	posteriorExpectationStream.precision(PRECISION);
	334	string posteriorProbabilityOfChangeString = _outDir + "//" + "PosteriorProbabilityOfChange.txt";
	335	ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str());
	336	posteriorProbabilityStream.precision(PRECISION);
	337
	338	posteriorExpectationStream<<"POS"<<"\t"<<"exp01"<<"\t"<<"exp10"<<endl;
	339	posteriorProbabilityStream<<"POS"<<"\t"<<"prob01"<<"\t"<<"prob10"<<endl;
	340	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	341	posteriorExpectationStream<<pos+1<<"\t"<<_expV01[pos]<<"\t"<<_expV10[pos]<<endl;
	342	posteriorProbabilityStream<<pos+1<<"\t"<<_probV01[pos]<<"\t"<<_probV10[pos]<<endl;
	343
	344	}
	345	}
	346
	347
	348	/********************************************************************************************
	349	printProbabilityPerPosPerBranch 1
	350	produce 2 print files:
	351	1. print detailed file (out)
	352	2. print summary over all branches (outSum)
	353	*********************************************************************************************/
	354	void computeCountsGL::printProbabilityPerPosPerBranch()
	355	{
	356	string gainLossProbabilityPerPosPerBranch = gainLossOptions::_outDir + "//" + "ProbabilityPerPosPerBranch.txt";
	357	ofstream gainLossProbabilityPerPosPerBranchStream(gainLossProbabilityPerPosPerBranch.c_str());
	358	gainLossProbabilityPerPosPerBranchStream.precision(PRECISION);
	359
	360	gainLossProbabilityPerPosPerBranchStream<<"# print values over probCutOff "<<gainLossOptions::_probCutOffPrintEvent<<endl;
	361	gainLossProbabilityPerPosPerBranchStream<<"G/L"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"probability"<<endl;
	362	string gainLossCountProbPerPos = _outDir + "//" + "ProbabilityPerPos.txt";
	363	ofstream gainLossCountProbPerPosStream(gainLossCountProbPerPos.c_str());
	364	gainLossCountProbPerPosStream.precision(PRECISION);
	365
	366	//gainLossCountProbPerPosStream<<"# print values over probCutOff "<<gainLossOptions::_probCutOffSum<<endl;
	367	gainLossCountProbPerPosStream<<"POS"<<"\t"<<"prob01"<<"\t"<<"prob10"<<endl;
	368
	369	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	370	printGainLossProbabilityPerPosPerBranch(pos, gainLossOptions::_probCutOffPrintEvent, _probChanges_PosNodeXY[pos],gainLossProbabilityPerPosPerBranchStream,gainLossCountProbPerPosStream);
	371	}
	372	}
	373	/********************************************************************************************
	374	printGainLossProbabilityPerPosPerBranch 1.1
	375	*********************************************************************************************/
	376	void computeCountsGL::printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out, ostream& outCount)
	377	{
	378	MDOUBLE count01 =0;
	379	MDOUBLE count10 =0;
	380	treeIterTopDownConst tIt(_tr);
	381	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	382	if (probChanges[mynode->id()][0][1] >= probCutOff){
	383	out<<"gain"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][0][1]<<endl;
	384	}
	385	count01+= probChanges[mynode->id()][0][1];
	386	if (probChanges[mynode->id()][1][0] >= probCutOff){
	387	out<<"loss"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][1][0]<<endl;
	388	}
	389	count10+= probChanges[mynode->id()][1][0];
	390	}
	391	outCount<<pos+1<<"\t"<<count01<<"\t"<<count10<<endl;
	392	}
	393
	394
	395	/********************************************************************************************
	396	*********************************************************************************************/
	397	void computeCountsGL::produceExpectationPerBranch(){
	398	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),_expChanges_NodeXY);
	399	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	400	for(int i=0;i<_expChanges_PosNodeXY[pos].size();++i){
	401	for(int j=0;j<_expChanges_PosNodeXY[pos][i].size();++j){
	402	for(int k=0;k<_expChanges_PosNodeXY[pos][i][j].size();++k){
	403	_expChanges_NodeXY[i][j][k] += _expChanges_PosNodeXY[pos][i][j][k];
	404	}
	405	}
	406	}
	407	}
	408	}
	409
	410	/********************************************************************************************
	411	*********************************************************************************************/
	412	void computeCountsGL::printExpectationPerBranch()
	413	{
	414	string gainLossExpectationPerBranch = _outDir + "//" + "ExpectationPerBranch.txt";
	415	ofstream gainLossExpectationPerBranchStream(gainLossExpectationPerBranch.c_str());
	416	gainLossExpectationPerBranchStream.precision(PRECISION);
	417	printGainLossExpectationPerBranch(_expChanges_NodeXY,gainLossExpectationPerBranchStream);
	418	}
	419	/********************************************************************************************
	420	*********************************************************************************************/
	421	void computeCountsGL::printGainLossExpectationPerBranch(VVVdouble& expectChanges, ostream& out)
	422	{
	423	treeIterTopDownConst tIt(_tr);
	424	out<<"# Gain and Loss"<<"\n";
	425	out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"exp01"<<"\t"<<"exp10"<<endl;
	426	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	427	if(mynode->isRoot())
	428	continue;
	429	out<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<expectChanges[mynode->id()][0][1]<<"\t"<<expectChanges[mynode->id()][1][0]<<endl;
	430	}
	431	}
	432
	433	/********************************************************************************************
	434	*********************************************************************************************/
	435	void computeCountsGL::updateTreeByGainLossExpectationPerBranch(tree& tr, int from, int to)
	436	{
	437	tr = _tr;
	438	treeIterTopDownConst tIt(tr);
	439	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	440	if(mynode->isRoot())
	441	continue;
	442	mynode->setDisToFather(_expChanges_NodeXY[mynode->id()][from][to]);
	443	}
	444	}
	445
	446
	447	/********************************************************************************************
	448	*********************************************************************************************/
	449	void computeCountsGL::printTreesWithExpectationValuesAsBP()
	450	{
	451	// ExpectationPerPosPerBranch - Print Trees
	452	Vstring Vnames;
	453	fillVnames(Vnames,_tr);
	454	createDir(gainLossOptions::_outDir, "TreesWithExpectationValuesAsBP");
	455	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	456	string strTreeNum = _outDir + "//" + "TreesWithExpectationValuesAsBP" + "//" + "expTree" + int2string(pos+1) + ".ph";
	457	ofstream tree_out(strTreeNum.c_str());
	458	tree_out.precision(PRECISION);
	459	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_expChanges_PosNodeXY[pos]);
	460	}
	461	}
	462
	463	/********************************************************************************************
	464	*********************************************************************************************/
	465	void computeCountsGL::printTreesWithProbabilityValuesAsBP()
	466	{
	467	// ProbabilityPerPosPerBranch - Print Trees
	468	Vstring Vnames;
	469	fillVnames(Vnames,_tr);
	470	createDir(_outDir, "TreesWithProbabilityValuesAsBP");
	471	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	472	string strTreeNum = _outDir + "//" + "TreesWithProbabilityValuesAsBP"+ "//" + "probTree" + int2string(pos+1) + ".ph";
	473	ofstream tree_out(strTreeNum.c_str());
	474	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_probChanges_PosNodeXY[pos]);
	475	}
	476	}
	477
	478	/********************************************************************************************
	479	printProbExpPerPosPerBranch 1
	480	produce 2 print files:
	481	1. print detailed file (out)
	482	2. print summary over all branches (outSum)
	483	*********************************************************************************************/
	484	void computeCountsGL::printProbExpPerPosPerBranch(MDOUBLE probCutOff, MDOUBLE countsCutOff)
	485	{
	486	string gainLossProbExpPerPosPerBranch = _outDir + "//" + "gainLossProbExpPerPosPerBranch.txt";
	487	ofstream gainLossProbExpPerPosPerBranchStream(gainLossProbExpPerPosPerBranch.c_str());
	488	gainLossProbExpPerPosPerBranchStream.precision(PRECISION);
	489	gainLossProbExpPerPosPerBranchStream<<"# print values over probCutOff "<<probCutOff<<endl;
	490	gainLossProbExpPerPosPerBranchStream<<"G/L"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"probability"<<"\t"<<"expectation"<<endl;
	491	string gainLossProbExpPerPos = _outDir + "//" + "gainLossProbExpCountPerPos.txt";
	492	ofstream gainLossCountProbPerPosStream(gainLossProbExpPerPos.c_str());
	493	gainLossCountProbPerPosStream.precision(PRECISION);
	494	gainLossCountProbPerPosStream<<"# print count over countsCutOff "<<countsCutOff<<endl;
	495	gainLossCountProbPerPosStream<<"POS"<<"\t"<<"prob01"<<"\t"<<"prob10"<<"\t"<<"exp01"<<"\t"<<"exp10"<<"\t"<<"count01"<<"\t"<<"count10"<<endl;
	496
	497	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	498	printGainLossProbExpPerPosPerBranch(pos, probCutOff,countsCutOff, _probChanges_PosNodeXY[pos],_expChanges_PosNodeXY[pos],gainLossProbExpPerPosPerBranchStream,gainLossCountProbPerPosStream);
	499	}
	500	}
	501	/********************************************************************************************
	502	PrintExpPerPosPerBranchMatrix (CoMap input)
	503	NOTE!!! this version only consist of gain or loss values
	504	Alternatively, (1) abs(gain+loss) (2) gain-loss (3) separate gain and loss matrices
	505	*********************************************************************************************/
	506	void computeCountsGL::printExpPerPosPerBranchMatrix(const int from, const int to)
	507	{
	508	int numOfpositions = _sc.seqLen();
	509	int numOfbranches = _tr.getNodesNum()-1; // minus the root node
	510
	511	string expPerPosPerBranchMatrix = _outDir + "//" + "expPerPosPerBranchMatrix."+ int2string(from)+int2string(to)+".txt";
	512	ofstream expPerPosPerBranchMatrixStream(expPerPosPerBranchMatrix.c_str());
	513	expPerPosPerBranchMatrixStream.precision(6);
	514	expPerPosPerBranchMatrixStream<<"Name\tLength\tBranches\tMean";
	515	for (int pos = 0; pos <numOfpositions; ++pos){
	516	expPerPosPerBranchMatrixStream<<"\tSite"<<pos+1;
	517	}
	518	expPerPosPerBranchMatrixStream<<"\n";
	519	treeIterTopDownConst tIt(_tr);
	520	int branchNum = 0;
	521	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	522	if(mynode->isRoot())
	523	continue;
	524	expPerPosPerBranchMatrixStream<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<branchNum<<"\t"<<_expChanges_NodeXY[mynode->id()][from][to]/numOfbranches;
	525	for (int pos = 0; pos <numOfpositions; ++pos){
	526	expPerPosPerBranchMatrixStream<<"\t"<<_expChanges_PosNodeXY[pos][mynode->id()][from][to];
	527	}
	528	expPerPosPerBranchMatrixStream<<"\n";
	529	++branchNum;
	530	}
	531	expPerPosPerBranchMatrixStream.close();
	532	}
	533
	534
	535	///********************************************************************************************
	536	//*********************************************************************************************/
	537	//void computeCountsGL::fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,VVdouble& expEventsPerPosPerBranch_B,const int selectedSite, const bool isPearson){
	538	// int numOfpositions = expEventsPerPosPerBranch_B.size();
	539	// //correlationPerPos.resize(numOfpositions);
	540	// for (int pos = 0; pos <numOfpositions; ++pos){
	541	// MDOUBLE correlation = 0;
	542	// if(isMinEQMaxInVector(expEventsPerPosPerBranch[selectedSite]) \|\| isMinEQMaxInVector(expEventsPerPosPerBranch_B[pos]))
	543	// correlationPerPos[pos]=-99; // can't compute correlation
	544	// else{
	545	// if(isPearson)
	546	// correlation = calcPearsonCorrelation(expEventsPerPosPerBranch[selectedSite], expEventsPerPosPerBranch_B[pos]);
	547	// else
	548	// correlation = calcRankCorrelation(expEventsPerPosPerBranch[selectedSite], expEventsPerPosPerBranch_B[pos]);
	549	// correlationPerPos[pos]=correlation;
	550	// }
	551	// }
	552	//}
	553	/********************************************************************************************
	554	Compute the Pearson / Spearman correlation among sites.
	555	*********************************************************************************************/
	556	//void computeCountsGL::computedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch)
	557	//{
	558	// int numOfpositions = _sc.seqLen();
	559	// int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	560	//
	561	// //// Mapping vectors
	562	// LOGnOUT(6, <<"Copy events vectors"<<endl);
	563	// // Expectation
	564	// fillMapValPerPosPerBranch(_expPerPosPerBranch01,0,1,_expChanges_PosNodeXY,isNormalizeForBranch);
	565	// fillMapValPerPosPerBranch(_expPerPosPerBranch10,1,0,_expChanges_PosNodeXY,isNormalizeForBranch);
	566	// _expPerPosPerBranch = _expPerPosPerBranch01; // gain and loss appended (double size vector)
	567	// appendVectors(_expPerPosPerBranch, _expPerPosPerBranch10);
	568	//
	569	// //// correlation vectors, filled below
	570	// LOGnOUT(6, <<"Resize correlation vectors vectors"<<endl);
	571	// resizeMatrix(_correlationPerSitePerPosGainGainSpearman, selectedPositions.size(), numOfpositions);
	572	// resizeMatrix(_correlationPerSitePerPosLossLossSpearman, selectedPositions.size(), numOfpositions);
	573	// resizeMatrix(_correlationPerSitePerPosBothSpearman, selectedPositions.size(), numOfpositions);
	574	//
	575	// resizeMatrix(_correlationPerSitePerPosGainGainPearson, selectedPositions.size(), numOfpositions);
	576	// resizeMatrix(_correlationPerSitePerPosLossLossPearson, selectedPositions.size(), numOfpositions);
	577	// resizeMatrix(_correlationPerSitePerPosBothPearson, selectedPositions.size(), numOfpositions);
	578	//
	579	// for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	580	// int selectedSite = selectedPositions[selectedSiteIndex];
	581	// LOGnOUT(6, <<"Compute pearson for G-G, L-L, both site"<<selectedSiteIndex<<endl);
	582	// fillCorrPerSelectedSites(_correlationPerSitePerPosGainGainPearson[selectedSiteIndex],_expPerPosPerBranch01,selectedSite,true);
	583	// fillCorrPerSelectedSites(_correlationPerSitePerPosLossLossPearson[selectedSiteIndex],_expPerPosPerBranch10,selectedSite,true);
	584	// fillCorrPerSelectedSites(_correlationPerSitePerPosBothPearson[selectedSiteIndex],_expPerPosPerBranch,selectedSite,true);
	585	//
	586	// LOGnOUT(6, <<"Compute spearman for G-G, L-L site"<<selectedSiteIndex<<endl);
	587	// fillCorrPerSelectedSites(_correlationPerSitePerPosGainGainSpearman[selectedSiteIndex],_expPerPosPerBranch01,selectedSite,false);
	588	// fillCorrPerSelectedSites(_correlationPerSitePerPosLossLossSpearman[selectedSiteIndex],_expPerPosPerBranch10,selectedSite,false);
	589	// fillCorrPerSelectedSites(_correlationPerSitePerPosBothSpearman[selectedSiteIndex],_expPerPosPerBranch,selectedSite,false);
	590	// }
	591	//}
	592
	593
	594	/********************************************************************************************
	595	PrintExpPerPosPerBranchMatrix (CoMap input)
	596	NOTE!!! this version only consist of gain or loss values
	597	Alternatively, (1) abs(gain+loss) (2) gain-loss (3) separate gain and loss matrices
	598	*********************************************************************************************/
	599	//void computeCountsGL::printComputedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch, const bool correlationForZscore)
	600	//{
	601	// bool isTransform = false;
	602	// bool isMinForPrint = true;
	603	// bool isPearson = false;
	604	// int precisionCorr = 8;
	605	// MDOUBLE minForPrint = 0.1; // max =1
	606	//
	607	// int numOfpositions = _sc.seqLen();
	608	// int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	609	//
	610	// //// Mapping vectors
	611	// LOGnOUT(6, <<"Copy events vectors"<<endl);
	612	//
	613	// //////////////////////////////////////////////////////////////////////////
	614	// if(!gainLossOptions::_printComputedCorrelationsAllSites){
	615	// for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	616	// int selectedSite = selectedPositions[selectedSiteIndex];
	617	//
	618	// MDOUBLE meanCorrBoth = computeAverage(_correlationPerSitePerPosBothPearson[selectedSiteIndex]);
	619	// MDOUBLE stdCorrBoth = computeStd(_correlationPerSitePerPosBothPearson[selectedSiteIndex]);
	620	// MDOUBLE meanCorrGainGain = computeAverage(_correlationPerSitePerPosGainGainPearson[selectedSiteIndex]);
	621	// MDOUBLE stdCorrGainGain = computeStd(_correlationPerSitePerPosGainGainPearson[selectedSiteIndex]);
	622	// MDOUBLE meanCorrLossLoss = computeAverage(_correlationPerSitePerPosLossLossPearson[selectedSiteIndex]);
	623	// MDOUBLE stdCorrLossLoss = computeStd(_correlationPerSitePerPosLossLossPearson[selectedSiteIndex]);
	624	//
	625	//
	626	// // for each selectedSite a new file is created
	627	// LOGnOUT(4, <<"Correlations with site="<<selectedSite<<" With NormalizeForBranch "<<isNormalizeForBranch<<" With correlationForZscore "<<correlationForZscore<<endl);
	628	// string corrPerSite = _outDir + "//" + "selectedCorr.Site"+ int2string(selectedSite+1)+ ".isNormForBr."+int2string(isNormalizeForBranch)/+ ".isCorrForZ."+int2string(correlationForZscore)/+ ".txt";
	629	// //string corrPerSite = _outDir + "//" + "selectedCorr.Site"+ int2string(selectedSite+1)+".txt";
	630	//
	631	// ofstream corrPerSiteStream(corrPerSite.c_str());
	632	// corrPerSiteStream.precision(precisionCorr);
	633	// corrPerSiteStream<<"# "<<selectedSite+1<<"\n";
	634	// corrPerSiteStream<<"# Both(gain N loss concat) correlation(Pearson): Ave= "<<meanCorrBoth<<" Std= "<<stdCorrBoth<<"\n";
	635	// corrPerSiteStream<<"# Gain correlation(Pearson): Ave= "<<meanCorrGainGain<<" Std= "<<stdCorrGainGain<<"\n";
	636	// corrPerSiteStream<<"# Loss correlation: Ave= "<<meanCorrLossLoss<<" Std= "<<stdCorrLossLoss<<"\n";
	637	// corrPerSiteStream<<"pos"<<"\t"<<"bothPearson"<<"\t"<<"bothSpearman"<<"\t"<<"ExpGainGainPearson"<<"\t"<<"ExpLossLossPearson"<<"\t"<<"ExpGainGainSpearman"<<"\t"<<"ExpLossLossSpearman"<<"\n";
	638	//
	639	// for (int pos = 0; pos<numOfpositions; ++pos){
	640	// if(selectedSite == pos) // since selectedSite starts from 1
	641	// continue;
	642	// bool isPosOneOfSelectedSites = false;
	643	// if(gainLossOptions::_isIgnoreCorrelationAmongSelectedSites){
	644	// for (int selectedSiteI = 0; selectedSiteI <selectedPositions.size(); ++selectedSiteI){
	645	// int selectedS = selectedPositions[selectedSiteI];
	646	// if(selectedS == pos){
	647	// isPosOneOfSelectedSites = true;
	648	// continue;
	649	// }
	650	// }
	651	// if(isPosOneOfSelectedSites)
	652	// continue;
	653	// }
	654	// corrPerSiteStream<<pos+1
	655	// <<"\t"<<_correlationPerSitePerPosBothPearson[selectedSiteIndex][pos]<<"\t"<<_correlationPerSitePerPosBothSpearman[selectedSiteIndex][pos]
	656	// <<"\t"<<_correlationPerSitePerPosGainGainPearson[selectedSiteIndex][pos]<<"\t"<<_correlationPerSitePerPosLossLossPearson[selectedSiteIndex][pos]
	657	// <<"\t"<<_correlationPerSitePerPosGainGainSpearman[selectedSiteIndex][pos]<<"\t"<<_correlationPerSitePerPosLossLossSpearman[selectedSiteIndex][pos]<<"\n";
	658	// }
	659	// }
	660	// }
	661	// ////////////////////////////////////////////////////////////////////////// All-against-all different format
	662	// else{
	663	// string corrAllSites = _outDir + "//" + "allCorrelations.isNormForBr."+int2string(isNormalizeForBranch)+ ".isCorrForZ."+int2string(correlationForZscore)+ ".txt";
	664	// ofstream* corrAllStream_p;
	665	// corrAllStream_p = new ofstream(corrAllSites.c_str());
	666	// corrAllStream_p->precision(precisionCorr);
	667	// *corrAllStream_p<<"#COGA"<<"\t"<<"COGB"<<"\t"<<"posGainGain"<<"\t"<<"posLossLoss"<<"\t"<<"negGainGain"<<"\t"<<"negLossLoss"<<"\n";
	668	// for (int selectedSiteIndex = 0; selectedSiteIndex <selectedPositions.size(); ++selectedSiteIndex){
	669	// int selectedSite = selectedPositions[selectedSiteIndex];
	670	//
	671	// MDOUBLE meanCorrGainGain = computeAverage(_correlationPerSitePerPosGainGainPearson[selectedSiteIndex]);
	672	// MDOUBLE stdCorrGainGain = computeStd(_correlationPerSitePerPosGainGainPearson[selectedSiteIndex]);
	673	// MDOUBLE meanCorrLossLoss = computeAverage(_correlationPerSitePerPosLossLossPearson[selectedSiteIndex]);
	674	// MDOUBLE stdCorrLossLoss = computeStd(_correlationPerSitePerPosLossLossPearson[selectedSiteIndex]);
	675	//
	676	// for (int pos = 0; pos<numOfpositions; ++pos){
	677	// if(selectedSite == pos)
	678	// continue;
	679	// MDOUBLE correlationGainGain = _correlationPerSitePerPosGainGainPearson[selectedSiteIndex][pos];
	680	// MDOUBLE correlationLossLoss = _correlationPerSitePerPosLossLossPearson[selectedSiteIndex][pos];
	681	//
	682	// if(correlationForZscore){
	683	// correlationGainGain = (correlationGainGain - meanCorrGainGain)/stdCorrGainGain;
	684	// correlationLossLoss = (correlationLossLoss - meanCorrLossLoss)/stdCorrLossLoss;
	685	// }
	686	// if(isMinForPrint && max(abs(correlationGainGain),abs(correlationLossLoss))<minForPrint)
	687	// continue;
	688	// MDOUBLE posCorrelationGainGain = (correlationGainGain >=0) ? correlationGainGain*1000-1 : 0;
	689	// MDOUBLE negCorrelationGainGain = (correlationGainGain < 0) ? correlationGainGain*1000-1 : 0;
	690	// MDOUBLE posCorrelationLossLoss = (correlationLossLoss >=0) ? correlationLossLoss*1000-1 : 0;
	691	// MDOUBLE negCorrelationLossLoss = (correlationLossLoss < 0) ? correlationLossLoss*1000-1 : 0;
	692	// if(isTransform){
	693	// posCorrelationGainGain = pow(posCorrelationGainGain/10,2)/10;
	694	// negCorrelationGainGain = pow(negCorrelationGainGain/10,2)/10;
	695	// posCorrelationLossLoss = pow(posCorrelationLossLoss/10,2)/10;
	696	// negCorrelationLossLoss = pow(negCorrelationLossLoss/10,2)/10;
	697	// }
	698	// *corrAllStream_p<<selectedSiteIndex+1<<"\t"<<pos+1<<"\t"<<(int)posCorrelationGainGain<<"\t"<<(int)posCorrelationLossLoss<<"\t"<<(int)negCorrelationGainGain<<"\t"<<(int)negCorrelationLossLoss<<"\n";
	699	// }
	700	// }
	701	// }
	702	//}
	703	//
	704	///********************************************************************************************
	705	//*********************************************************************************************/
	706	//void computeCountsGL::fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const int from, const int to, VVVVdouble& map_PosNodeXY
	707	// ,const bool isNormalizeForBranch, MDOUBLE* cutOff_p){
	708	//
	709	// int numOfpositions = _sc.seqLen();
	710	// int numOfbranches = _tr.getNodesNum()-1; // was -1, minus the root node
	711	//
	712	// expEventsPerPosPerBranch.resize(numOfpositions);
	713	// treeIterTopDownConst tIt(_tr);
	714	// for (int pos = 0; pos <numOfpositions; ++pos){
	715	// for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next())
	716	// {
	717	// if(mynode->isRoot())
	718	// continue;
	719	// MDOUBLE val = 0;
	720	// if(isNormalizeForBranch){
	721	// MDOUBLE normalizationFactor = _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches; // _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches
	722	// val = (map_PosNodeXY[pos][mynode->id()][from][to] ) / normalizationFactor;
	723	// }else{
	724	// val = map_PosNodeXY[pos][mynode->id()][from][to];
	725	// }
	726	//
	727	// if(cutOff_p){
	728	// if(val>= *cutOff_p)
	729	// expEventsPerPosPerBranch[pos].push_back(1);
	730	// else
	731	// expEventsPerPosPerBranch[pos].push_back(0);
	732	// }
	733	// else
	734	// expEventsPerPosPerBranch[pos].push_back(val);
	735	// }
	736	// }
	737	//}
	738
	739
	740
	741
	742	/********************************************************************************************
	743	printGainLossProbExpPerPosPerBranch 1.1
	744	Get pos, and iterate over all branches:
	745	1. print detailed file (out)
	746	2. print summary over all branches (outSum)
	747	*********************************************************************************************/
	748	void computeCountsGL::printGainLossProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum)
	749	{
	750	MDOUBLE prob01 =0;
	751	MDOUBLE prob10 =0;
	752	MDOUBLE exp01 =0;
	753	MDOUBLE exp10 =0;
	754	MDOUBLE count01 =0;
	755	MDOUBLE count10 =0;
	756
	757	countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	758
	759	treeIterTopDownConst tIt(_tr);
	760	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	761	if(mynode->isRoot()) continue;
	762	if (probChanges[mynode->id()][0][1] >= probCutOff \|\| probCutOff == 0) // only per branch print must exceed cutoff
	763	out<<"gain"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][0][1]<<"\t"<<expChanges[mynode->id()][0][1]<<endl;
	764	if (probChanges[mynode->id()][0][1] > countCutOff)
	765	count01+= 1;
	766	prob01+= probChanges[mynode->id()][0][1];
	767	exp01+= expChanges[mynode->id()][0][1];
	768	if (probChanges[mynode->id()][1][0] >= probCutOff \|\| probCutOff == 0) // only per branch print must exceed cutoff
	769	out<<"loss"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][1][0]<<"\t"<<expChanges[mynode->id()][1][0]<<endl;
	770	if (probChanges[mynode->id()][1][0] > countCutOff)
	771	count10+= 1;
	772	prob10+= probChanges[mynode->id()][1][0];
	773	exp10+= expChanges[mynode->id()][1][0];
	774	}
	775	outSum<<pos+1<<"\t"<<prob01<<"\t"<<prob10<<"\t"<<exp01<<"\t"<<exp10<<"\t"<<count01<<"\t"<<count10<<endl;
	776	}
	777
	778
	779
	780	/********************************************************************************************
	781	FewCutOffs
	782	*********************************************************************************************/
	783	void computeCountsGL::printProbExpPerPosPerBranchFewCutOffs(MDOUBLE probCutOff)
	784	{
	785	MDOUBLE countCutOff;
	786	MDOUBLE countCutOffLow = 0.1;
	787	MDOUBLE countCutOffIncrem = 0.05;
	788	MDOUBLE countCutOffHigh = 0.9;
	789	string count01 = "count01_";
	790	string count10 = "count10_";
	791
	792	//Math::Round(3.44, 1);
	793
	794
	795	string gainLossProbExpPerPosPerBranch = _outDir + "//" + "gainLossProbExpPerPosPerBranch.txt";
	796	ofstream gainLossProbExpPerPosPerBranchStream(gainLossProbExpPerPosPerBranch.c_str());
	797	gainLossProbExpPerPosPerBranchStream.precision(PRECISION);
	798
	799	gainLossProbExpPerPosPerBranchStream<<"# print values over probCutOff "<<probCutOff<<endl;
	800	gainLossProbExpPerPosPerBranchStream<<"G/L"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"probability"<<"\t"<<"expectation"<<endl;
	801	string gainLossProbExpPerPos = _outDir + "//" + "gainLossProbExpCountPerPos.txt";
	802	ofstream gainLossCountProbPerPosStream(gainLossProbExpPerPos.c_str());
	803	gainLossCountProbPerPosStream.precision(PRECISION);
	804
	805
	806	gainLossCountProbPerPosStream<<"# print count over countCutOffLow="<<countCutOffLow<<" to countCutOffHigh="<<countCutOffHigh<<" with increment="<<countCutOffIncrem<<endl;
	807	gainLossCountProbPerPosStream<<"POS"<<"\t"<<"prob01"<<"\t"<<"prob10"<<"\t"<<"exp01"<<"\t"<<"exp10"<<"\t"<<"prob01_Rec"<<"\t"<<"prob10_Rec"<<"\t"<<"exp01_Rec"<<"\t"<<"exp10_Rec"<<"\t"<<"prob01_Anc"<<"\t"<<"prob10_Anc"<<"\t"<<"exp01_Anc"<<"\t"<<"exp10_Anc"<<"\t";
	808	// print all cut-offs
	809	for(countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ;countCutOff+=countCutOffIncrem){
	810	countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	811	gainLossCountProbPerPosStream<<count01+double2string(countCutOff)<<"\t"<<count10+double2string(countCutOff)<<"\t";
	812	}
	813	gainLossCountProbPerPosStream<<endl;
	814
	815	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	816	printGainLossProbExpPerPosPerBranchFewCutOffs(pos, probCutOff,countCutOffLow,countCutOffIncrem,countCutOffHigh, _probChanges_PosNodeXY[pos],_expChanges_PosNodeXY[pos],gainLossProbExpPerPosPerBranchStream,gainLossCountProbPerPosStream);
	817	}
	818	}
	819	/********************************************************************************************
	820	*********************************************************************************************/
	821	void computeCountsGL::printGainLossProbExpPerPosPerBranchFewCutOffs(int pos, MDOUBLE probCutOff,
	822	MDOUBLE countCutOffLow,MDOUBLE countCutOffIncrem, MDOUBLE countCutOffHigh, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum)
	823	{
	824	MDOUBLE prob01 =0;
	825	MDOUBLE prob10 =0;
	826	MDOUBLE exp01 =0;
	827	MDOUBLE exp10 =0;
	828
	829	MDOUBLE prob01_R =0;
	830	MDOUBLE prob10_R =0;
	831	MDOUBLE exp01_R =0;
	832	MDOUBLE exp10_R =0;
	833
	834	MDOUBLE prob01_Anc =0;
	835	MDOUBLE prob10_Anc =0;
	836	MDOUBLE exp01_Anc =0;
	837	MDOUBLE exp10_Anc =0;
	838
	839	int FewCutOffsSize = (int)ceil((countCutOffHigh-countCutOffLow)/countCutOffIncrem)+1;
	840	Vdouble count01(FewCutOffsSize);
	841	Vdouble count10(FewCutOffsSize);
	842
	843	MDOUBLE countCutOff;
	844	int i;
	845
	846	treeIterTopDownConst tIt(_tr);
	847	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	848	if (probChanges[mynode->id()][0][1] >= probCutOff) // only per branch print must exceed cutoff
	849	out<<"gain"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][0][1]<<"\t"<<expChanges[mynode->id()][0][1]<<endl;
	850	prob01+= probChanges[mynode->id()][0][1];
	851	exp01+= expChanges[mynode->id()][0][1];
	852	// if(mynode->isLeaf() \|\| (mynode->getDistance2ROOT()<_distanceFromRootForRecent) ){
	853	if(mynode->isLeaf() \|\| (mynode->getMinimalDistance2OTU()<_distanceFromNearestOTUForRecent) ){
	854	prob01_R+= probChanges[mynode->id()][0][1];
	855	exp01_R+= expChanges[mynode->id()][0][1];
	856	}
	857	else{
	858	prob01_Anc+= probChanges[mynode->id()][0][1];
	859	exp01_Anc+= expChanges[mynode->id()][0][1];
	860	}
	861	i = 0;
	862	for( countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ; countCutOff+=countCutOffIncrem){
	863	countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	864
	865	if (probChanges[mynode->id()][0][1] > countCutOff)
	866	count01[i]+= 1;
	867	++i;
	868	}
	869	if (probChanges[mynode->id()][1][0] >= probCutOff) // only per branch print must exceed cutoff
	870	out<<"loss"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<probChanges[mynode->id()][1][0]<<"\t"<<expChanges[mynode->id()][1][0]<<endl;
	871	prob10+= probChanges[mynode->id()][1][0];
	872	exp10+= expChanges[mynode->id()][1][0];
	873	// if(mynode->isLeaf() \|\| mynode->getDistance2ROOT() < _distanceFromRootForRecent){
	874	if(mynode->isLeaf() \|\| mynode->getMinimalDistance2OTU() < _distanceFromNearestOTUForRecent){
	875	prob10_R+= probChanges[mynode->id()][1][0];
	876	exp10_R+= expChanges[mynode->id()][1][0];
	877	}
	878	else{
	879	prob10_Anc+= probChanges[mynode->id()][1][0];
	880	exp10_Anc+= expChanges[mynode->id()][1][0];
	881	}
	882	i = 0;
	883	for(countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ; countCutOff+=countCutOffIncrem){
	884	countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	885	if (probChanges[mynode->id()][1][0] > countCutOff)
	886	count10[i]+= 1;
	887	++i;
	888	}
	889	}
	890	outSum<<pos+1<<"\t"<<prob01<<"\t"<<prob10<<"\t"<<exp01<<"\t"<<exp10
	891	<<"\t"<<prob01_R<<"\t"<<prob10_R<<"\t"<<exp01_R<<"\t"<<exp10_R
	892	<<"\t"<<prob01_Anc<<"\t"<<prob10_Anc<<"\t"<<exp01_Anc<<"\t"<<exp10_Anc<<"\t";
	893
	894	// print all cut-offs
	895	i = 0;
	896	for(countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ; countCutOff+=countCutOffIncrem){
	897	countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example
	898	outSum<<count01[i]<<"\t"<<count10[i]<<"\t";
	899	++i;
	900	}
	901	outSum<<endl;
	902	}
	903
	904	/********************************************************************************************
	905	*********************************************************************************************/
	906	//void computeCountsGL::computeMeanAndSdPerBranch(Vdouble& meanEventsPerBranch01, Vdouble& meanEventsPerBranch10, Vdouble& sdEventsPerBranch01,Vdouble& sdEventsPerBranch10){
	907	// int numOfpositions = _sc.seqLen();
	908	// Vdouble eventsAllPos01(numOfpositions);
	909	// Vdouble eventsAllPos10(numOfpositions);
	910	//
	911	// treeIterTopDownConst tIt(_tr);
	912	// for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next())
	913	// {
	914	// if(mynode->isRoot())
	915	// continue;
	916	// for (int pos = 0; pos <numOfpositions; ++pos){
	917	// eventsAllPos01[pos] = _expChanges_PosNodeXY[pos][mynode->id()][0][1];
	918	// eventsAllPos10[pos] = _expChanges_PosNodeXY[pos][mynode->id()][1][0];
	919	// }
	920	// meanEventsPerBranch01[mynode->id()]= computeAverage(eventsAllPos01);
	921	// meanEventsPerBranch10[mynode->id()]= computeAverage(eventsAllPos10);
	922	// sdEventsPerBranch01[mynode->id()] = computeStd(eventsAllPos01);
	923	// sdEventsPerBranch10[mynode->id()] = computeStd(eventsAllPos10);
	924	// }
	925	//}
	926
	927

+169

-0

programs/gainLoss/computeCountsGL.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___computeCountsGL___GL
	19	#define ___computeCountsGL___GL
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "gainLoss.h"
	24
	25	/********************************************************************************************
	26	rate4siteGL
	27	*********************************************************************************************/
	28	class computeCountsGL{
	29	public:
	30	explicit computeCountsGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, VVdouble& LpostPerCat, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent =false);
	31	explicit computeCountsGL(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution* lossDist, string& outDir, VVVdouble& LpostPerSpPerCat, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent=false);
	32	virtual ~computeCountsGL() ;
	33
	34	computeCountsGL(const computeCountsGL& other) {*this = other;}
	35	computeCountsGL& operator=(const computeCountsGL &other);
	36	void run();
	37	void computePosteriorOfChangeGivenTerminalsPerCat();
	38	void computePosteriorOfChangeGivenTerminalsPerSpPerCat();
	39
	40	void printProbExp();
	41	void printProbabilityPerPosPerBranch();
	42	void printProbExpPerPosPerBranch(MDOUBLE probCutOff =0.0,MDOUBLE countsCutOff= 0.2);
	43	void printExpPerPosPerBranchMatrix(const int from, const int to);
	44
	45	void printProbExpPerPosPerBranchFewCutOffs(MDOUBLE probCutOff);
	46
	47	void produceExpectationPerBranch();
	48	void printExpectationPerBranch();
	49	void updateTreeByGainLossExpectationPerBranch(tree& tr, int from, int to);
	50
	51	void printTreesWithExpectationValuesAsBP();
	52	void printTreesWithProbabilityValuesAsBP();
	53
	54	//void computedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch = false);
	55	//void printComputedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch = false, const bool correlationForZscore = false);
	56	////void computeMeanAndSdPerBranch(Vdouble& meanEventsPerBranch01, Vdouble& meanEventsPerBranch10, Vdouble& sdEventsPerBranch01,Vdouble& sdEventsPerBranch10);
	57	//void fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const int from, const int to, VVVVdouble& map_PosNodeXY
	58	// ,const bool isNormalizeForBranch = true, MDOUBLE* cutOff_p =NULL);
	59	//void fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,const int selectedSite, const bool isPearson=true);
	60
	61
	62	Vdouble get_expV01(){return _expV01;};
	63	Vdouble get_expV10(){return _expV10;};
	64	VVVdouble get_expV(){return _expV;};
	65
	66	Vdouble get_probV01(){return _probV01;};
	67	Vdouble get_probV10(){return _probV10;};
	68	VVVdouble get_probV(){return _probV;};
	69
	70	VVVVdouble getExpChanges(){return _expChanges_PosNodeXY;}; // expChanges_PosNodeXY[pos][nodeID][x][y]
	71	VVVVdouble getProbChanges(){return _probChanges_PosNodeXY;}; // probChangesForBranch[pos][nodeID][x][y]
	72	VVVVdouble getJointProb(){return _jointProb_PosNodeXY;}; // _jointProb_PosNodeXY[pos][nodeID][x][y]
	73
	74
	75	//VVdouble getPerPosPerBranch01(){return _expPerPosPerBranch01;};
	76	//VVdouble getPerPosPerBranch10(){return _expPerPosPerBranch10;};
	77	//VVdouble getPerPosPerBranch(){return _expPerPosPerBranch;}; // vector of both, concatenated
	78
	79	//VVdouble getcorrelationPerSitePerPosGainGainSpearman(){return _correlationPerSitePerPosGainGainSpearman;};
	80	//VVdouble getcorrelationPerSitePerPosLossLossSpearman(){return _correlationPerSitePerPosLossLossSpearman;};
	81	//VVdouble getcorrelationPerSitePerPosBothSpearman(){return _correlationPerSitePerPosBothSpearman;};
	82
	83	//VVdouble getcorrelationPerSitePerPosGainGainPearson(){return _correlationPerSitePerPosGainGainPearson;};
	84	//VVdouble getcorrelationPerSitePerPosLossLossPearson(){return _correlationPerSitePerPosLossLossPearson;};
	85	//VVdouble getcorrelationPerSitePerPosBothPearson(){return _correlationPerSitePerPosBothPearson;};
	86
	87
	88
	89	protected:
	90	//func
	91	void printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out, ostream& outCount);
	92	void printGainLossExpectationPerBranch(VVVdouble& expectChanges, ostream& out);
	93
	94	void printGainLossProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outCount);
	95	void printGainLossProbExpPerPosPerBranchFewCutOffs(int pos, MDOUBLE probCutOff,
	96	MDOUBLE countCutOffLow,MDOUBLE countCutOffIncrem, MDOUBLE countCutOffHigh, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum);
	97
	98
	99	protected:
	100	//members
	101	stochasticProcess *_sp;
	102	int _alphabetSize;
	103	tree _tr;
	104	sequenceContainer _sc;
	105
	106	vector<vector<stochasticProcess*> > _spVVec; //save stochasticProcess for each category
	107	distribution* _gainDist;
	108	distribution* _lossDist;
	109
	110	sequence* _refSeq; // the reference sequence
	111	string _outDir;
	112	bool _isSilent;
	113
	114	VVdouble _postProbPerCatPerPos; // the posterior probability for each position for each rate category
	115	VVVdouble _postProbPerSpPerCatPerPos; // _LpostPerSpPerCat[sp][rateCat][pos]
	116	MDOUBLE _distanceFromNearestOTUForRecent;
	117
	118	Vdouble _expV01;
	119	Vdouble _expV10;
	120	VVVdouble _expV;
	121
	122	Vdouble _probV01;
	123	Vdouble _probV10;
	124	VVVdouble _probV;
	125
	126	//VVVVdouble _posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y]
	127	VVVVdouble _probChanges_PosNodeXY; // probChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations
	128	VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	129	VVVdouble _expChanges_NodeXY; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState]
	130	VVVVdouble _jointProb_PosNodeXY; // probJoint_PosNodeXY[pos][nodeID][fatherState][sonState] - after computePosteriorOfChangeGivenTerminals
	131
	132	//// required for correlation analysis
	133	//VVdouble _expPerPosPerBranch01;
	134	//VVdouble _expPerPosPerBranch10;
	135	//VVdouble _expPerPosPerBranch;
	136	//// correlation vectors
	137	//VVdouble _correlationPerSitePerPosGainGainSpearman;
	138	//VVdouble _correlationPerSitePerPosLossLossSpearman;
	139	//VVdouble _correlationPerSitePerPosBothSpearman;
	140
	141	//VVdouble _correlationPerSitePerPosGainGainPearson;
	142	//VVdouble _correlationPerSitePerPosLossLossPearson;
	143	//VVdouble _correlationPerSitePerPosBothPearson;
	144
	145	};
	146
	147	#endif
	148
	149
	150
	151
	152
	153
	154
	155
	156
	157
	158
	159
	160
	161
	162
	163
	164
	165
	166
	167
	168

+392

-0

programs/gainLoss/computePosteriorExpectationOfChange.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "computePosteriorExpectationOfChange.h"
	17	#include "definitions.h"
	18	#include "computeDownAlg.h"
	19	#include "computeUpAlg.h"
	20	#include "matrixUtils.h"
	21	#include "treeIt.h"
	22	#include "likelihoodComputation.h"
	23	#include "gainLossOptions.h"
	24	#include "gainLossModel.h"
	25	#include "definitions.h"
	26
	27	using namespace std;
	28
	29	/********************************************************************************************
	30	computePosteriorExpectationOfChange
	31	*********************************************************************************************/
	32	computePosteriorExpectationOfChange::computePosteriorExpectationOfChange(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp):
	33	_tr(tr), _sc(sc){
	34	if(!sp){
	35	errorMsg::reportError("error in the constructor computePosteriorExpectationOfChange sp argument is NULL");
	36	}
	37	else{
	38	_sp = sp;
	39	}
	40	}
	41	/********************************************************************************************
	42	Expectation of number of changes from character u to v --- =
	43	sum over all changes x,y:
	44	Posterior(Node=x,Father=y\|D)*Exp(changes u to v\|Node=x,Father=y)
	45	The second term is given to the function as input (can be obtained via simulations)
	46	*********************************************************************************************/
	47	VVdouble computePosteriorExpectationOfChange::computeExpectationAcrossTree(
	48	simulateJumps &sim, //input given from simulation studies
	49	const VVVdouble &posteriorProbs,
	50	VVVdouble &expForBranch)
	51	{
	52	int alphabetSize = _sp->alphabetSize();
	53	VVdouble res;
	54	resizeMatrix(res,alphabetSize,alphabetSize);
	55	treeIterTopDownConst tIt(_tr);
	56	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	57	for (int fromState=0;fromState<alphabetSize;++fromState)
	58	{
	59	for (int toState=0;toState<alphabetSize;++toState)
	60	{
	61	if (fromState==toState)
	62	continue;
	63	expForBranch[mynode->id()][fromState][toState] = computeExpectationOfChangePerBranch(sim,posteriorProbs,mynode,fromState,toState);
	64	res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState];
	65
	66	}
	67	}
	68	}
	69	return res;
	70	}
	71	/********************************************************************************************
	72	*********************************************************************************************/
	73	MDOUBLE computePosteriorExpectationOfChange::computeExpectationOfChangePerBranch(
	74	simulateJumps &sim, //input given from simulation studies
	75	const VVVdouble &posteriorProbsGivenTerminals,
	76	tree::nodeP node,int fromState, int toState)
	77	{
	78	int alphabetSize = _sp->alphabetSize();
	79
	80	MDOUBLE nodeExpectation = 0;
	81	for (int x = 0; x<alphabetSize; ++x){
	82	for (int y = 0; y<alphabetSize; ++y){
	83	nodeExpectation+=(posteriorProbsGivenTerminals[node->id()][x][y]*
	84	sim.getExpectation(node->name(),x,y,fromState,toState));
	85
	86	if(node->name()=="A" && x==0){ //DEBUG
	87	LOG(9,<<"node "<<node->name()<<" from "<<fromState<<" to "<<toState<<" given "<<x<<" and "<<y
	88	<<" sim= "<< sim.getExpectation(node->name(),x,y,fromState,toState)
	89	<<" sim*'Pij'= "<< sim.getExpectation(node->name(),x,y,fromState,toState)
	90	*sim.getTotalTerminal(node->name(),x,y)/gainLossOptions::_numOfSimulationsForPotExp
	91	//*sim.getTotalTerminal(node->name(),x,y)/(sim.getTotalTerminal(node->name(),x,1)+sim.getTotalTerminal(node->name(),x,0))
	92	//<<" terminals x and y= "<< sim.getTotalTerminal(node->name(),x,y)<<" terminal start x= "<< (sim.getTotalTerminal(node->name(),x,1)+sim.getTotalTerminal(node->name(),x,0))
	93	<<endl);
	94	}
	95	}
	96	}
	97	if(node->name()=="A" ){ // DEBUG
	98	//LOGnOUT(9,<<"Sim node A "<<node->dis2father()<<" from "<<fromState<<" to "<<toState<<" exp="<<expGivenStart0nodeA<<endl); // DEBUG
	99	LOGnOUT(9,<<"nodeExpectation fromState "<<fromState<<" toState "<<toState<<" = "<<nodeExpectation<<endl);
	100	}
	101	return nodeExpectation;
	102	}
	103
	104	/********************************************************************************************
	105	Posterior probabilities computed across entire tree, for all changes from character u to v
	106	*********************************************************************************************/
	107	VVdouble computePosteriorExpectationOfChange::computePosteriorAcrossTree(
	108	simulateJumps &sim, //input given from simulation studies
	109	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch)
	110	{
	111	int alphabetSize = _sp->alphabetSize();
	112	// N: resized before
	113	//probsForBranch.resize(numNodes);
	114	//for (int n=0;n<numNodes;++n)
	115	// resizeMatrix(probsForBranch[n],alphabetSize,alphabetSize);
	116
	117	VVdouble res;
	118	resizeMatrix(res,alphabetSize,alphabetSize);
	119	treeIterTopDownConst tIt(_tr);
	120	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	121	for (int fromState=0;fromState<alphabetSize;++fromState)
	122	{
	123	for (int toState=0;toState<alphabetSize;++toState)
	124	{
	125	if (fromState==toState)
	126	continue;
	127	probsForBranch[mynode->id()][fromState][toState]= computePosteriorOfChangePerBranch(sim,posteriorProbsGivenTerminals,mynode,fromState,toState);
	128	res[fromState][toState] +=probsForBranch[mynode->id()][fromState][toState];
	129	}
	130	}
	131	}
	132	return res;
	133	}
	134
	135
	136	/********************************************************************************************
	137	*********************************************************************************************/
	138	MDOUBLE computePosteriorExpectationOfChange::computePosteriorOfChangePerBranch(simulateJumps &sim, //input given from simulation studies
	139	const VVVdouble &posteriorProbs,
	140	tree::nodeP node,
	141	int fromState, int toState)
	142	{
	143	int alphabetSize = _sp->alphabetSize();
	144	MDOUBLE nodeProbability = 0;
	145
	146	for (int x=0;x<alphabetSize;++x)
	147	{
	148	for (int y=0;y<alphabetSize;++y)
	149	{
	150	nodeProbability+=sim.getProb(node->name(),x,y,fromState,toState)*posteriorProbs[node->id()][x][y];
	151	if(node->name()=="A" ){ //// DEBUG && x==0 && y==0
	152	LOGnOUT(9,<<"Sim nodeProbability, given start "<<x<<" end "<<y<<" fromState "<<fromState<<" toState "<<toState<<" = "
	153	<<sim.getProb(node->name(),x,y,fromState,toState)
	154	<<endl);
	155	}
	156	}
	157	}
	158	if(node->name()=="A"){ //// DEBUG
	159	LOGnOUT(8,<<"Sim nodeProbability fromState "<<fromState<<" toState "<<toState<<" = "<<nodeProbability<<endl);
	160	}
	161	return nodeProbability;
	162	}
	163
	164
	165	/********************************************************************************************
	166	Posterior of observing a certain state change along a branch (joint):
	167	P(Node=x,Father=y\|D) = P(D,Node=x,Father=y)/P(D)
	168	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	169	*********************************************************************************************/
	170	void computePosteriorExpectationOfChange::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){
	171	int numNodes = _tr.getNodesNum();
	172	int alphabetSize = _sp->alphabetSize();
	173	posteriorPerNodePer2States.resize(numNodes);
	174	for (int n=0;n<posteriorPerNodePer2States.size();++n)
	175	resizeMatrix(posteriorPerNodePer2States[n],alphabetSize,alphabetSize);
	176	suffStatGlobalHomPos sscUp;
	177	suffStatGlobalGamPos sscDownNonRev; // The "Gam" is used for the letter at father - sscGivenRoot
	178	//suffStatGlobalHomPos sscDown;
	179	sscUp.allocatePlace(numNodes,alphabetSize);
	180	computePijHom pi;
	181	pi.fillPij(_tr,*_sp);
	182
	183	computeUpAlg comp_Up;
	184	computeDownAlg comp_Down;
	185	comp_Up.fillComputeUp(_tr,_sc,pos,pi,sscUp);
	186	//if(!_sp->isReversible())
	187	comp_Down.fillComputeDownNonReversible(_tr,_sc,pos,pi,sscDownNonRev,sscUp);
	188	//else
	189	//comp_Down.fillComputeDown(_tr,_sc,pos,pi,sscDown,sscUp);
	190	//errorMsg::reportError("error @computePosteriorExpectationOfChange::computePosteriorOfChangeGivenTerminals - Reversible not implemented\n");
	191
	192	treeIterTopDownConst tIt(_tr);
	193	doubleRep Ldata = likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp);
	194	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	195	for (int sonState = 0; sonState<alphabetSize; ++sonState){
	196	for (int fatherState = 0; fatherState<alphabetSize; ++fatherState){
	197	posteriorPerNodePer2States[mynode->id()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDownNonRev, pi,Ldata,mynode->name());
	198	LOGnOUT(7,<<"mynode"<<"\t"<<"fatherState"<<"\t"<<"sonState"<<"\t"<<"posterior(joint)"<<endl);
	199	LOGnOUT(7,<<mynode->name()<<"\t"<<fatherState<<"\t"<<sonState<<"\t"<<posteriorPerNodePer2States[mynode->id()][fatherState][sonState]<<endl);
	200	}
	201	}
	202	}
	203	}
	204	/********************************************************************************************
	205	Posterior of observing a certain state change along a branch:
	206	P(Node=sonState,Father=fatherState\|D) = P(D,Node=sonState,Father=fatherState)/P(D)
	207	usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
	208	*********************************************************************************************/
	209	MDOUBLE computePosteriorExpectationOfChange::computePosterioGivenTerminalsPerBranch
	210	(int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	211	suffStatGlobalGamPos &sscDown,computePijHom &pi, doubleRep &Ldata, const string nodeName)
	212	{
	213	doubleRep res=0.0;
	214	doubleRep resDXY, Down, Up;
	215	MDOUBLE pij;
	216	for (int stateAtRoot = 0; stateAtRoot<_sp->alphabetSize(); ++stateAtRoot){
	217	Down = sscDown.get(stateAtRoot,nodeId,fatherState);
	218	Up = sscUp.get(nodeId,sonState);
	219	pij = pi.getPij(nodeId,fatherState,sonState);
	220
	221	res+=(_sp->freq(stateAtRoot)*
	222	Down*
	223	Up*
	224	pij);
	225	}
	226	resDXY = res;
	227	res/=Ldata;
	228	if(gainLossOptions::_printDEBUGinfo)
	229	LOG(3,<<nodeName<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<Ldata<<" prob "<<res<<endl);
	230
	231	if (res >1+1e-4){
	232	LOGnOUT(2,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<Ldata<<" prob "<<res<<endl);
	233	res = 1;
	234	}
	235	if (res<-1e-4){
	236	LOGnOUT(2,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<Ldata<<" prob "<<res<<endl);
	237	res = 0;
	238	}
	239	if ((res > 1 +0.01) \|\| (res< -0.01)){
	240	string err = "Error in computePosteriorExpectationOfChange::computePosterioGivenTerminalsPerBranch, non probability value ";
	241	err+=double2string(convert(res));
	242	err+=" at node ";
	243	err+=int2string(nodeId);
	244	err+= " sonState ";
	245	err+= int2string(sonState);
	246	err+= " fatherState ";
	247	err+= int2string(fatherState);
	248	errorMsg::reportError(err);
	249	}
	250	return convert(res);
	251	}
	252
	253
	254
	255
	256
	257
	258	/********************************************************************************************
	259	Suchard - Analytic solution - Expectation
	260	*********************************************************************************************/
	261	/********************************************************************************************
	262	Expectation of number of changes from character u to v --- =
	263	Suchard...
	264	*********************************************************************************************/
	265	VVdouble computePosteriorExpectationOfChange::computeExpectationAcrossTree(
	266	computeJumps &computeJumpsObj, // object for Analytical computation
	267	const VVVdouble &posteriorProbs,
	268	VVVdouble &expForBranch) // 2 be filled
	269	{
	270	int alphabetSize = _sp->alphabetSize();
	271	VVdouble res;
	272	resizeMatrix(res,alphabetSize,alphabetSize);
	273	treeIterTopDownConst tIt(_tr);
	274	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	275	for (int fromState=0;fromState<alphabetSize;++fromState)
	276	{
	277	for (int toState=0;toState<alphabetSize;++toState)
	278	{
	279	if (fromState==toState)
	280	continue;
	281	expForBranch[mynode->id()][fromState][toState] = computeExpectationOfChangePerBranch(computeJumpsObj,posteriorProbs,mynode,fromState,toState);
	282	res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState];
	283	}
	284	}
	285	}
	286	return res;
	287	}
	288	/********************************************************************************************
	289	computeExpectationOfChangePerBranch - Analytic...
	290	*********************************************************************************************/
	291	MDOUBLE computePosteriorExpectationOfChange::computeExpectationOfChangePerBranch(
	292	computeJumps &computeJumpsObj, // object for analytical computation
	293	const VVVdouble &posteriorProbsGivenTerminals,
	294	tree::nodeP node,int fromState, int toState)
	295	{
	296	MDOUBLE nodeExpectation = 0;
	297	//MDOUBLE expGivenStart0nodeA = 0; // DEBUG
	298	//LOG(6,<<"\n analytic "<<endl);
	299
	300
	301	if(node->dis2father()<0) // ROOT
	302	return nodeExpectation;
	303	int alphabetSize = _sp->alphabetSize();
	304	for (int x = 0; x<alphabetSize; ++x){
	305	for (int y = 0; y<alphabetSize; ++y){
	306	nodeExpectation+=(posteriorProbsGivenTerminals[node->id()][x][y]*
	307	computeJumpsObj.getExpectation(node->dis2father(),x,y,fromState,toState));
	308	if(node->name()=="A" && x==0){ //// DEBUG
	309	LOG(9,<<"node "<<node->name()<<" All transitions "<<" given "<<x<<" and "<<y
	310	<<" m= "<< computeJumpsObj.getTotalExpectation(node->dis2father(),x,y)
	311	<<" m/pij= "<< computeJumpsObj.getTotalExpectation(node->dis2father(),x,y)/static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->Pij_t(x,y,node->dis2father())
	312	<<" Pij=" << static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->Pij_t(x,y,node->dis2father())<<endl);
	313	}
	314	LOG(7,<<"node "<<node->name()<<" given St="<<x<<" and Ed="<<y<<" from "<<fromState<<" to "<<toState
	315	<<" JointPost= "<< posteriorProbsGivenTerminals[node->id()][x][y]
	316	<<" Exp= "<< computeJumpsObj.getExpectation(node->dis2father(),x,y,fromState,toState)
	317	<<endl);
	318	}
	319	}
	320	if(node->name()=="A"){ //// DEBUG
	321	LOGnOUT(9,<<"ComG node A All "<<node->dis2father()<<" exp="<<computeJumpsObj.gFunc_dr(node->dis2father(),0)<<endl);
	322	LOGnOUT(9,<<"nodeExpectation fromState "<<fromState<<" toState "<<toState<<" = "<<nodeExpectation<<endl);
	323	}
	324	return nodeExpectation;
	325	}
	326	/********************************************************************************************
	327	Suchard - Analytic solution - Probability
	328	*********************************************************************************************/
	329	/********************************************************************************************
	330	computePosteriorAcrossTree...
	331	*********************************************************************************************/
	332	VVdouble computePosteriorExpectationOfChange::computePosteriorAcrossTree(
	333	computeJumps &computeJumpsObj, //input given from simulation studies
	334	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch)
	335	{
	336	int alphabetSize = _sp->alphabetSize();
	337	// N: resized before
	338	//probsForBranch.resize(numNodes);
	339	//for (int n=0;n<numNodes;++n)
	340	// resizeMatrix(probsForBranch[n],alphabetSize,alphabetSize);
	341
	342	VVdouble res;
	343	resizeMatrix(res,alphabetSize,alphabetSize);
	344	treeIterTopDownConst tIt(_tr);
	345	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	346	for (int fromState=0;fromState<alphabetSize;++fromState)
	347	{
	348	for (int toState=0;toState<alphabetSize;++toState)
	349	{
	350	if (fromState==toState)
	351	continue;
	352	probsForBranch[mynode->id()][fromState][toState]= computePosteriorOfChangePerBranch(computeJumpsObj,posteriorProbsGivenTerminals,mynode,fromState,toState);
	353	res[fromState][toState] +=probsForBranch[mynode->id()][fromState][toState];
	354	}
	355	}
	356	}
	357	return res;
	358	}
	359	/********************************************************************************************
	360	Suchard
	361	*********************************************************************************************/
	362	MDOUBLE computePosteriorExpectationOfChange::computePosteriorOfChangePerBranch(
	363	computeJumps &computeJumpsObj, //input given from simulation studies
	364	const VVVdouble &posteriorProbs,
	365	tree::nodeP node,int fromState, int toState)
	366	{
	367	int alphabetSize = _sp->alphabetSize();
	368	MDOUBLE nodeProbability = 0;
	369
	370	for (int x=0;x<alphabetSize;++x)
	371	{
	372	for (int y=0;y<alphabetSize;++y)
	373	{
	374	nodeProbability+=computeJumpsObj.getProb(node->dis2father(),x,y,fromState,toState)*posteriorProbs[node->id()][x][y];
	375	if(node->name()=="A" ){ //// DEBUG && x==0 && y==0
	376	LOGnOUT(9,<<"Anal nodeProbability, given start "<<x<<" end "<<y<<" fromState "<<fromState<<" toState "<<toState<<" = "
	377	<<computeJumpsObj.getProb(node->dis2father(),x,y,fromState,toState)
	378	<<endl);
	379	}
	380	}
	381	}
	382	if(node->name()=="A"){ //// DEBUG
	383	LOGnOUT(8,<<"Anal nodeProbability fromState "<<fromState<<" toState "<<toState<<" = "<<nodeProbability<<endl);
	384	}
	385
	386	return nodeProbability;
	387	}
	388
	389
	390
	391

+92

-0

programs/gainLoss/computePosteriorExpectationOfChange.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18
	19	#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_CHANGE
	20	#define ___COMPUTE_POSTERIOR_EXPECTATION_OF_CHANGE
	21
	22	#include "definitions.h"
	23	#include "simulateJumps.h"
	24	#include "computeJumps.h"
	25
	26	#include "tree.h"
	27	#include "sequenceContainer.h"
	28	#include "stochasticProcess.h"
	29	#include "suffStatComponent.h"
	30	#include "computePijComponent.h"
	31
	32	class computePosteriorExpectationOfChange {
	33
	34	public:
	35	explicit computePosteriorExpectationOfChange(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp);
	36	virtual ~computePosteriorExpectationOfChange(){};
	37
	38
	39	VVdouble computeExpectationAcrossTree(simulateJumps &sim, //input given from simulation studies
	40	const VVVdouble &posteriorProbs, VVVdouble &expForBranch);
	41
	42	VVdouble computeExpectationAcrossTree(computeJumps &computeJumpsObj, //Suchard
	43	const VVVdouble &posteriorProbs,VVVdouble &expForBranch);
	44
	45	VVdouble computePosteriorAcrossTree(simulateJumps &sim, //input given from simulation studies
	46	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch);
	47
	48	VVdouble computePosteriorAcrossTree(computeJumps &computeJumpsObj, //Suchard
	49	const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch);
	50
	51
	52	void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos);
	53
	54	private:
	55	MDOUBLE computePosteriorOfChangePerBranch(
	56	simulateJumps &sim, //input given from simulation studies
	57	const VVVdouble &posteriorProbs,
	58	tree::nodeP node,
	59	int fromState, int toState);
	60
	61	MDOUBLE computePosteriorOfChangePerBranch(
	62	computeJumps &computeJumpsObj, //Suchard
	63	const VVVdouble &posteriorProbs,
	64	tree::nodeP node,
	65	int fromState, int toState);
	66
	67
	68	MDOUBLE computeExpectationOfChangePerBranch(
	69	simulateJumps &sim, //input given from simulation studies
	70	const VVVdouble &posteriorProbsGivenTerminals,
	71	tree::nodeP node,
	72	int fromState, int toState);
	73
	74	MDOUBLE computeExpectationOfChangePerBranch( //Suchard
	75	computeJumps &computeJumpsObj, //Suchard
	76	const VVVdouble &posteriorProbsGivenTerminals,
	77	tree::nodeP node,int fromState, int toState);
	78
	79
	80	MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
	81	suffStatGlobalGamPos &sscDown,computePijHom &pi, doubleRep &LData, const string nodeName);
	82
	83
	84	private:
	85	const tree &_tr;
	86	const sequenceContainer &_sc;
	87	stochasticProcess *_sp;
	88	};
	89
	90
	91	#endif

+5986

-0

programs/gainLoss/gainLoss.cpp less more

	0
	1	/*
	2	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	3
	4	This program is free software: you can redistribute it and/or modify
	5	it under the terms of the GNU General Public License as published by
	6	the Free Software Foundation, either version 3 of the License, or
	7	(at your option) any later version.
	8
	9	This program is distributed in the hope that it will be useful,
	10	but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	GNU General Public License for more details.
	13
	14	You should have received a copy of the GNU General Public License
	15	along with this program. If not, see <http://www.gnu.org/licenses/>.
	16	*/
	17
	18	#include "computePosteriorExpectationOfChange.h"
	19	#include "gainLoss.h"
	20	#include "gainLossOptimizer.h"
	21	#include "gainLossOptions.h"
	22	#include "gainLossUtils.h"
	23	#include "gammaDistributionFixedCategories.h"
	24	#include "gammaDistributionPlusInvariant.h"
	25	#include "mixtureDistribution.h"
	26	#include "simulateTree.h"
	27	#include "someUtil.h"
	28	#include "phylipFormat.h"
	29	#include "maseFormat.h"
	30	#include "fastaFormat.h"
	31	#include "clustalFormat.h"
	32	#include "rate4siteGL.h"
	33	#include "gainLoss4site.h"
	34	#include "computeCountsGL.h"
	35	#include "computeCorrelations.h"
	36
	37	#include "simulateOnePos.h"
	38	#include "Parameters.h"
	39	#include "sankoffReconstructGL.h"
	40	#include "bblLS.h"
	41	//#include "branchScaleTree.h"
	42	#include "bblEMfixRoot.h"
	43	#include "bblEM.h"
	44
	45	#include <cstring>
	46
	47
	48	using namespace std;
	49	/********************************************************************************************
	50	gainLoss TOC (group of functions by order of appearance):
	51	-constructor+destructor
	52	-initialize
	53	-run
	54	-start(basics): SequenceContainer, StochasticProcess(+Generic,+Vec), EvolTreeTopology,initMissingDataInfo
	55	-optimize: startOptimizations, optimizationsManyStarts(+NoVec, +VV), initParamsAtRandPoints(+SPvv)
	56	-start(computations):
	57	-prints
	58	-Mixture
	59	-simulate
	60	-Old function, now inside other classes
	61	*********************************************************************************************/
	62	gainLoss::gainLoss(): _sp(NULL),_unObservableData_p(NULL),_lossDist(NULL), _gainDist(NULL), _refSeq(NULL), _weightsUniqPatterns(NULL)
	63	{
	64	_weightsUniqPatterns = gainLossOptions::_weights; // since - no weights are used over positions, it is NULL
	65	_logL = 1;
	66	//_maxNumberOfSpeciesForFullOptimization = 200;
	67	//_maxSequenceLengthForFullOptimization = 20000;
	68	//_maxSpeciesNumSequenceLengthMultipForFullOptimization = 500000;
	69	}
	70	/********************************************************************************************/
	71	gainLoss::~gainLoss() {
	72	if(gainLossOptions::_gainLossDist){
	73	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	74	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	75	stochasticProcess* sp2delete = _spVVec[gainCategor][lossCategor];
	76	delete sp2delete;
	77	}
	78	}
	79	delete _gainDist;
	80	delete _lossDist;
	81	}
	82	else
	83	if (_sp) delete _sp;
	84
	85	if(_unObservableData_p) delete _unObservableData_p;
	86	if(_weightsUniqPatterns) delete _weightsUniqPatterns;
	87	if(_spSimple) delete _spSimple;
	88	}
	89	/********************************************************************************************
	90	*********************************************************************************************/
	91	void gainLoss::initialize(bool isComputeLikelihood)
	92	{
	93	printProgramInfo();
	94	printOptionParameters();
	95	if(gainLossOptions::_seqFile!=""){
	96	startSequenceContainer();
	97	fillReferenceSequence();
	98	}
	99	countOccurPerPos();
	100
	101	if(gainLossOptions::_isRemovePositionsWithHighPercentOfMissingData)
	102	removePositionsWithHighPercentOfMissingData(0.5);
	103
	104	if(gainLossOptions::_isSequenceUniqPattern)
	105	startSequenceContainerUniqPatterns();
	106
	107	startStochasticProcess(gainLossOptions::_gainLossDist);
	108	MDOUBLE epsilon2add = 0.0;
	109	//if(_gainExp<1e-08)
	110	//epsilon2add = 1e-08;
	111	_spSimple = startStochasticProcessSimpleGamma(_gainExp+epsilon2add,_lossExp,_freq); // simple initialization, based on empiricalCounting of '1' and '0'
	112	MDOUBLE norm_factor = normalizeQ(_spSimple);
	113	LOGnOUT(4,<<"Stochastic process 'simple' normalized with norm_factor="<<norm_factor<<endl);
	114
	115	startEvolTreeTopology();
	116
	117	if(gainLossOptions::_intersectTreeAndSeq){ // input tree and seq (not the same taxa) - intersect, write seq and tree
	118	intersectNamesInTreeAndSequenceContainer(_tr,_sc);
	119	LOGnOUT(4,<<"NumOfSeq= "<<_sc.numberOfSeqs()<<endl);
	120	LOGnOUT(4,<<"NumOfTaxa= "<<_tr.getLeavesNum()<<endl);
	121	bool isRemovePosNotWithinMinMax=true;
	122	int minNumOfOnes = Parameters::getInt("_minNumOfOnes");
	123	int minNumOfZeros = Parameters::getInt("_minNumOfZeros");
	124	//sequenceContainer& sc, int minNumOfOnes, int minNumOfZeros, bool isRemovePosNotWithinMinMax
	125	bool isReportRemovedPos = true;
	126	checkMinNumOfOnesOrZeros(_sc,minNumOfOnes,minNumOfZeros, isRemovePosNotWithinMinMax, isReportRemovedPos);
	127	_scWithFullLength = _sc;
	128	_scUniqPatterns = _sc;
	129	_trOrig = _tr;
	130	string strSeqNum = gainLossOptions::_outDir + "//" + "seq" + ".fa";
	131	ofstream seq_out(strSeqNum.c_str());
	132	fastaFormat:: write(seq_out,_scWithFullLength);
	133	ofstream treeStream(gainLossOptions::_treeOutFile.c_str());
	134	_tr.output(treeStream);
	135	treeStream.close();
	136	return;
	137	}
	138	if(gainLossOptions::_seqFile!="")
	139	checkThatNamesInTreeAreSameAsNamesInSequenceContainer(_tr,_sc);
	140	if(gainLossOptions::_seqFile!="" && Parameters::getInt("_accountForMissingData") && (Parameters::getInt("_minNumOfOnes")>0 \|\| Parameters::getInt("_minNumOfZeros")>0)){
	141	initializeUnObservableData();
	142	}
	143	if(gainLossOptions::_seqFile!="" && isComputeLikelihood){
	144	printTreeLikelihoodAllPosAlphTheSame(); // update of _logL is done as well
	145	}
	146	if(Parameters::getInt("_isNormalizeAtStart")){
	147	bool isNormalizeBothQandTree = false; // Under the assumption that the input tree was normalized, only need to start with Q
	148	normalizeQandTree(isComputeLikelihood, isNormalizeBothQandTree);
	149	}
	150	printTree(_tr);
	151	printModellValuesOfParams();
	152
	153	if(gainLossOptions::_printSeq && _sc.seqLen() != _scWithFullLength.seqLen() ){
	154	string strSeqNum = gainLossOptions::_outDir + "//" + "seq.not.full.length.fa";
	155	ofstream seq_out(strSeqNum.c_str());
	156	fastaFormat:: write(seq_out,_sc); // not full length
	157	}
	158	}
	159
	160
	161	/********************************************************************************************
	162	*********************************************************************************************/
	163	void gainLoss::bBLEMwithSimpleSpBeforeFullOptimization(tree& tr, const sequenceContainer& sc, stochasticProcess* spSimple,
	164	stochasticProcess* sp,
	165	const vector<vector<stochasticProcess> >& spVVec,const distribution gainDist, const distribution * lossDist,
	166	unObservableData *unObservableData_p)
	167	{
	168	LOGnOUT(4,<<" *** Starting bbBLEMwithSimpleSpBeforeFullOptimization"<<endl);
	169	//MDOUBLE oldLnoUnObservableDataCorrection = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,*spSimple,_weightsUniqPatterns,NULL);
	170	//MDOUBLE newLnoUnObservableDataCorrection;
	171	MDOUBLE oldL = _logL;
	172	tree oldTree = tr;
	173	MDOUBLE tollForPairwiseDist=0.01; // the BBL default, epsilon per branch (brent's value)
	174	MDOUBLE epsilonOptimizationBBLIter = max(0.1,abs(_logL)/10000);
	175	int maxNumOfIterationsBBL =50;
	176	//bool isFixedRoot = !gainLossOptions::_isReversible && !gainLossOptions::_isRootFreqEQstationary;
	177	//if(isFixedRoot){
	178	// LOGnOUT(4,<<"*** Fix Root BBL-EM. Likelihood="<<oldL<<endl);
	179	// LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	180	// bblEMfixRoot bblEM1(_tr, _sc, *_spSimple, NULL, maxNumOfIterationsBBL , epsilonOptimizationBBLIter,tollForPairwiseDist
	181	// ,NULL,&oldLnoUnObservableDataCorrection);
	182	// newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	183	//}
	184	//else{
	185	LOGnOUT(4,<<"*** BBL-EM. Likelihood="<<oldL<<endl);
	186	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	187	bblEM bblEM1(tr, sc, *spSimple, NULL, maxNumOfIterationsBBL , epsilonOptimizationBBLIter,tollForPairwiseDist
	188	,NULL,NULL); // last argument optional: &oldLnoUnObservableDataCorrection
	189
	190	//newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	191	//}
	192	if(_unObservableData_p){
	193	if(!gainLossOptions::_gainLossDist)
	194	_unObservableData_p->setLforMissingData(tr,sp);
	195	else
	196	_unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	197	}
	198	if(!gainLossOptions::_gainLossDist)
	199	_logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,*sp,_weightsUniqPatterns,unObservableData_p);
	200	else
	201	_logL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,unObservableData_p);
	202	if(_logL < oldL){
	203	LOGnOUT(4,<<"Likelihood went down with simplified BBL-EM. from "<<oldL<<" to "<<_logL<<" Go back to old tree."<<endl);
	204	tr = oldTree;
	205	_logL = oldL;
	206	}
	207	else{
	208	LOGnOUT(4,<<"Likelihood after BBL-EM="<<_logL<<endl);
	209	}
	210	printTree(_tr);
	211	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	212	}
	213
	214	/********************************************************************************************
	215	*********************************************************************************************/
	216	void gainLoss::initializeBranchLengthDiff()
	217	{
	218	printProgramInfo();
	219	printOptionParameters();
	220	startSequenceContainer();
	221	fillReferenceSequence();
	222	startStochasticProcess(gainLossOptions::_gainLossDist);
	223	//startEvolTreeTopology();
	224	_tr= tree(gainLossOptions::_treeFile);
	225	checkThatNamesInTreeAreSameAsNamesInSequenceContainer(_tr,_sc);
	226	if(Parameters::getInt("_accountForMissingData")){
	227	_unObservableData_p = new unObservableData(_sc, _sp, gainLossAlphabet() ,Parameters::getInt("_minNumOfOnes"), Parameters::getInt("_minNumOfZeros"));
	228	LOGnOUT(4,<<"unObservableData object initialized with number of unObservable patterns= "<<_unObservableData_p->getNumOfUnObservablePatterns() <<endl);
	229	if(Parameters::getInt("_minNumOfOnes") >= _sc.numberOfSeqs())
	230	errorMsg::reportError("Error: number of seqs smaller than minNumOfOnes\n");
	231	updateSetLofMissingData();
	232	}
	233	printTreeLikelihoodAllPosAlphTheSame();
	234	//if(!gainLossOptions::_gainLossDist)
	235	// _logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p);
	236	//else{
	237	// _logL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	238	//}
	239	}
	240
	241	/********************************************************************************************
	242	*********************************************************************************************/
	243	void gainLoss::initializeUnObservableData(){
	244	if(Parameters::getInt("_minNumOfOnes") >= _sc.numberOfSeqs())
	245	errorMsg::reportError("Error: number of seqs smaller than minNumOfOnes\n");
	246	if( (_sc.numberOfSeqs()>250) && (Parameters::getInt("_minNumOfOnes") >1) )
	247	LOGnOUT(4,<< "WARNING: There are more than 250 sequences. Using more than 1 unObseravable pattern will run to slow\n");
	248
	249	_unObservableData_p = new unObservableData(_scWithFullLength, _sp, gainLossAlphabet() ,Parameters::getInt("_minNumOfOnes"), Parameters::getInt("_minNumOfZeros"));
	250
	251	LOGnOUT(4,<<"unObservableData object initialized with number of unObservable patterns= "<<_unObservableData_p->getNumOfUnObservablePatterns() <<endl);
	252	if(!gainLossOptions::_gainLossDist)
	253	_unObservableData_p->setLforMissingData(_tr,_sp);
	254	else
	255	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	256	}
	257
	258
	259	/********************************************************************************************
	260	*********************************************************************************************/
	261	void gainLoss::run(){
	262	// Special options, partial runs
	263	if(gainLossOptions::_calculeBranchLegthDiffFactorFromInputTrees){ // if BBL is used for each branch - compare length before/after
	264	LOGnOUT(4,<<"\n\n RUN type: calculeBranchLegthDiffFactorFromInputTrees and return \n\n"<<endl);
	265	initialize();
	266	//initializeBranchLengthDiff();
	267	_trOrig= tree(gainLossOptions::_treeFileOrig);
	268	string branchLegthDiffFactor = gainLossOptions::_outDir + "//" + "branchLegthDiffFactor.txt";
	269	ofstream branchLegthDiffFactorStream(branchLegthDiffFactor.c_str());
	270	branchLegthDiffFactorStream.precision(PRECISION);
	271	computeBranchLegthDiffFactor(branchLegthDiffFactorStream);
	272	return;
	273	}
	274	if(gainLossOptions::_intersectTreeAndSeq){
	275	LOGnOUT(4,<<"\n\n RUN type: intersect Tree And Seq, write them and return \n\n"<<endl);
	276	initialize();
	277	return;
	278	}
	279	if(gainLossOptions::_simulatePosteriorExpectationOfChange){ // Test the simulation based counting
	280	LOGnOUT(4,<<"\n\n RUN type: simulatePosteriorExpectationOfChange \n\n"<<endl);
	281	initialize();
	282	if(Parameters::getInt("_performOptimizations") ){ //&& ! Parameters::getInt("_keepUserGainLossRatio")
	283	startOptimizations();
	284	}
	285	if (gainLossOptions::_treeFile=="")
	286	errorMsg::reportError("SimulatePosteriorExpectationOfChange require input tree ");
	287	startSimultePosteriorExpectationOfChange(gainLossOptions::_numberOfPositions2simulate,gainLossOptions::_numberOfSequences2simulate);
	288	return;
	289	}
	290	if(gainLossOptions::_printLikelihoodLandscape){
	291	LOGnOUT(4,<<"\n\n RUN type: printLikelihoodLandscape \n\n"<<endl);
	292	//printLikelihoodLandscape(_sp); //UtilFunction (Ad-hoc)
	293	initialize();
	294	printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio(); //UtilFunction (Ad-hoc)
	295	return;
	296	}
	297	if(gainLossOptions::_printP11forgain){
	298	LOGnOUT(4,<<"\n\n RUN type: _printP11forgain \n\n"<<endl);
	299	initialize();
	300	P11forgain(); //UtilFunction (Ad-hoc)
	301	return;
	302	}
	303	if(gainLossOptions::_isOnlyParsimony){
	304	initialize(false);
	305	startMaxParsimonyChange();
	306	//printTree(_tr); // already done in initialize
	307	return;
	308	}
	309	bool Normalize =false;
	310	if(Normalize){
	311	bool isComputeL = false;
	312	initialize(isComputeL);
	313	bool isNormalizeBothQandTree = false; // Under the assumption that the input tree was normalized, only need to start with Q
	314	normalizeQandTree(isComputeL, isNormalizeBothQandTree);
	315	//printTree(_tr); // already done in initialize
	316	return;
	317	}
	318
	319	//////////////////////////////////////////////////////////////////////////
	320	////////////////// Start normal run
	321	initialize(gainLossOptions::_isComputeLikelihoodDuringInit);
	322	printTree(_tr);
	323	//printQ();
	324	if(gainLossOptions::_calculeMaxParsimonyChange \|\| gainLossOptions::_isRemoveSimulatedPositionsBasedOnMP \|\| gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping){ // simulation based counting
	325	startMaxParsimonyChange();
	326	}
	327
	328	startOptimizations();
	329	if(Parameters::getInt("_isNormalizeQandTreeafterOpt")){
	330	normalizeQandTree();
	331	}
	332	printModellValuesOfParams();
	333
	334	// intersect according to missing data
	335	if(gainLossOptions::_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation){
	336	LOGnOUT(4,<<"\n Remove Seq With Unknown For Last Position. \n Compute correlation for last position"<<endl);
	337	RemoveSeqWithUnknownForSelectedSiteForCorrelation(_sc,_tr);
	338	}
	339
	340	if(gainLossOptions::_printTree){
	341	printTree(_tr); // Two trees are printed. ("TheTree.INodes.ph" with internal nodes)
	342	}
	343	if(gainLossOptions::_printSeq){
	344	string strSeqNum = gainLossOptions::_outDir + "//" + "seq.fa";
	345	ofstream seq_out(strSeqNum.c_str());
	346	fastaFormat:: write(seq_out,_scWithFullLength);
	347	}
	348	if (gainLossOptions::_isComputeDistanceFromRootForRecent){
	349	LOGnOUT(4,<<"\n Estimate: distanceFromRootForRecent, distanceFromNearestOTUForRecent"<<endl);
	350	_distanceFromRootForRecent = computeDistanceFromRootForRecent(_tr);
	351	_distanceFromNearestOTUForRecent = computeDistanceNearestOTUforRecent(_tr);
	352	}else{
	353	LOGnOUT(4,<<"\n WARN: distanceFromRootForRecent=1, distanceFromNearestOTUForRecent=0.000001 are not estimated"<<endl);
	354	_distanceFromRootForRecent = 1;
	355	_distanceFromNearestOTUForRecent = 0.000001;
	356	}
	357	if(gainLossOptions::_printPij_t){
	358	printPij_t(0.1);
	359	printQ();
	360	}
	361	if(gainLossOptions::_printLofPos){
	362	printLofPos();
	363	}
	364	if(gainLossOptions::_printLofPosBothModels){
	365	//LOGnOUT(4,<<"_printLofPosBothModels not implemented in this version"<<endl);
	366	printLofPosBothModels(); // Print Likelihood of each position for m0 and m1
	367	}
	368	if(gainLossOptions::_calculateRate4site && !gainLossOptions::_gainLossDist && gainLossOptions::_rateDistributionType!=gainLossOptions::UNIFORM){
	369	startRate4Site(_scWithFullLength,_tr,_sp,gainLossOptions::_outDir,_unObservableData_p);
	370	}
	371	if(gainLossOptions::_calculeGainLoss4site && gainLossOptions::_gainLossDist){
	372	startGainLoss4Site(_scWithFullLength,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_unObservableData_p);
	373	}
	374
	375
	376	// Note: fill VVVVdouble _probChanges_PosNodeXY - Delete it after AncestralReconstruct?
	377	if(gainLossOptions::_calculePosteriorExpectationOfChange && !gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping){
	378	startComputePosteriorExpectationOfChange();
	379	}
	380	if(gainLossOptions::_printComputedCorrelations){
	381	startComputeAmongSitesCorrelations();
	382	}
	383	if(gainLossOptions::_performParametricBootstapCorrelation){
	384	startParametricBootstapCorrelation();
	385	}
	386	if(gainLossOptions::_printTree && gainLossOptions::_calculePosteriorExpectationOfChange && _SMPerPos.size()>0){ // to check if it was done
	387	string treeGain = gainLossOptions::_outDir + "//" + "TheTree.Gain.ph";
	388	printTree(_trGain, treeGain);
	389	string treeLoss = gainLossOptions::_outDir + "//" + "TheTree.Loss.ph";
	390	printTree(_trLoss, treeLoss);
	391	}
	392	if(gainLossOptions::_calculateAncestralReconstruct){
	393	//LOGnOUT(4,<<"_calculateAncestralReconstruct not implemented in this version"<<endl);
	394	ancestralReconstructorBasedOnJoint();
	395	//ancestralReconstructor();
	396	}
	397	if(gainLossOptions::_calculeBranchLegthDiffFactor){ // if BBL is used for each branch - compare length before/after
	398	//LOGnOUT(4,<<"_calculeBranchLegthDiffFactor not implemented in this version"<<endl);
	399	string branchLegthDiffFactor = gainLossOptions::_outDir + "//" + "branchLegthDiffFactor.txt";
	400	ofstream branchLegthDiffFactorStream(branchLegthDiffFactor.c_str());
	401	branchLegthDiffFactorStream.precision(PRECISION);
	402	computeBranchLegthDiffFactor(branchLegthDiffFactorStream);
	403	}
	404	if(gainLossOptions::_simulateSequences){ // Test the rate4site computation
	405	//LOGnOUT(4,<<"_simulateSequences not implemented in this version"<<endl);
	406	startSimulateSequences(gainLossOptions::_numberOfSequences2simulate, _sc.seqLen());
	407	}
	408	if(gainLossOptions::_findCoEvolvingSitesOldNotWorking){
	409	//LOGnOUT(4,<<"_findCoEvolvingSitesOldNotWorking not implemented in this version"<<endl);
	410	findCoEvolvingSites(gainLossOptions::_numberOfSequences2simulateForCoEvol);
	411	}
	412	}
	413
	414
	415
	416	// start(basics)
	417	/********************************************************************************************
	418	startSequenceContainer
	419	*********************************************************************************************/
	420	void gainLoss::startSequenceContainer(){
	421	LOGnOUT(4,<<"\n startSequenceContainer"<<endl);
	422	bool isCountUnknownChars = true; // move to options
	423
	424	gainLossAlphabet alph;
	425	ifstream in(gainLossOptions::_seqFile.c_str());
	426	sequenceContainer original = recognizeFormat::read(in,&alph);
	427	original.changeGaps2MissingData();
	428	_sc = original;
	429	_scWithFullLength = original;
	430	_scUniqPatterns = original;
	431	if(Parameters::getInt("_accountForMissingData")){
	432	int minNumOfOnes = Parameters::getInt("_minNumOfOnes");
	433	int minNumOfZeros = Parameters::getInt("_minNumOfZeros");
	434
	435	bool isRemovePosNotWithinMinMax=false;
	436	bool isReportRemovedPos=false;
	437	checkMinNumOfOnesOrZeros(_sc,minNumOfOnes,minNumOfZeros, isRemovePosNotWithinMinMax, isReportRemovedPos);
	438	}
	439	if(gainLossOptions::_checkCoEvolWithUnionPAP_against_pos){
	440	produceUnionPAP_against_pos(_sc, gainLossOptions::_checkCoEvolWithUnionPAP_against_pos);
	441	}
	442
	443
	444
	445	int alphSize = alph.size();
	446	if(isCountUnknownChars)
	447	alphSize++;
	448	_alphVecDist.resize(alphSize);
	449	_alphVecDist = _sc.getAlphabetDistribution(isCountUnknownChars);
	450	LOGnOUT(4,<<"numberOfSeqs="<<_sc.numberOfSeqs()<<endl);
	451	LOGnOUT(4,<<"seqLen="<<_sc.seqLen()<<endl);
	452	LOGnOUT(4,<<"Num of zeros="<<_alphVecDist[0]<<"\nNum of ones="<<_alphVecDist[1]<<endl);
	453	if(isCountUnknownChars)
	454	LOGnOUT(4,<<"Num of unKnowns="<<_alphVecDist[2]<<endl);
	455
	456	//bool isOverRideDataSizeForOptimization = false;
	457	//if(!isOverRideDataSizeForOptimization
	458	// && (_sc.numberOfSeqs()>maxNumberOfSpeciesForFullOptimization
	459	// \|\| _sc.seqLen()>maxSequenceLengthForFullOptimization
	460	// \|\| _sc.numberOfSeqs()* sc.seqLen()>maxSpeciesNumSequenceLengthMultipForFullOptimization) ){
	461	// LOGnOUT(2,<<"WARN: optimization level is reduced with too large dataset.\n To overRide re-run with _isOverRideDataSizeForOptimization 0"<<_sc.numberOfSeqs()<<endl);
	462	//}
	463
	464	//Parameters::updateParameter("_calculeBranchLegthDiffFactor","0"); // why is it here???
	465
	466	}
	467
	468	/********************************************************************************************
	469	The likelihood correction, requires that unObservable patterns do not exist
	470	*********************************************************************************************/
	471	void gainLoss::produceUnionPAP_against_pos(sequenceContainer& sc, int pos_for_union, bool is_ignore_last_pos){
	472	LOGnOUT(4,<<"produceUnionPAP_against_pos with "<<pos_for_union<<endl);
	473	int seq_length_to_union = sc.seqLen();
	474	if(is_ignore_last_pos){
	475	seq_length_to_union--;
	476	LOGnOUT(4,<<"Ignore last pos for union. Modify positions "<<seq_length_to_union<<endl);
	477	seq_length_to_union--;
	478	}
	479	int pos_for_union_start_count_from_0 = pos_for_union-1;
	480	for (int pos = 0; pos < seq_length_to_union; ++pos){
	481	for (int seqID = 0; seqID < sc.numberOfSeqs(); ++seqID){
	482	if(sc[seqID][pos] == 1 \|\| sc[seqID][pos_for_union_start_count_from_0] == 1 ){
	483	sc[seqID][pos] = 1;
	484	}
	485	}
	486	}
	487	}
	488
	489
	490
	491
	492
	493	/********************************************************************************************
	494	The likelihood correction, requires that unObservable patterns do not exist
	495	*********************************************************************************************/
	496	void gainLoss::checkMinNumOfOnesOrZeros(sequenceContainer& sc, int minNumOfOnes, int minNumOfZeros, bool isRemovePosNotWithinMinMax, bool isReportRemovedPos){
	497	vector<int> posToRemove(sc.seqLen(),false);
	498	vector<int> _alphVecDist = sc.getAlphabetDistribution();
	499	int numOfPosBelowMinNumOfOnes = 0;
	500	int numOfPosBelowMinNumOfZeros = 0;
	501	for (int pos = 0; pos < sc.seqLen(); ++pos){
	502	Vint alphVecPerPos = sc.getAlphabetDistribution(pos);
	503	if(alphVecPerPos[1]< minNumOfOnes){
	504	if(isRemovePosNotWithinMinMax \|\| gainLossOptions::_intersectTreeAndSeq){
	505	posToRemove[pos] = true;
	506	numOfPosBelowMinNumOfOnes++;
	507	if(isReportRemovedPos \|\| gainLossOptions::_intersectTreeAndSeq)
	508	LOGnOUT(4,<<"Belove minOnes, Remove pos="<<pos+1<<endl);
	509	}
	510	else{
	511	LOGnOUT(4,<<"! WARN: Illegal minNumOfOnes found in pos="<<pos+1<<" Reset to minNumOfOnes="<<alphVecPerPos[1]<<endl);
	512	Parameters::updateParameter("_minNumOfOnes",int2string(alphVecPerPos[1]).c_str());
	513	minNumOfOnes = alphVecPerPos[1];
	514	}
	515	}
	516	if(alphVecPerPos[0]< minNumOfZeros){
	517	if(isRemovePosNotWithinMinMax \|\| gainLossOptions::_intersectTreeAndSeq){
	518	posToRemove[pos] = true;
	519	numOfPosBelowMinNumOfZeros++;
	520	if(isReportRemovedPos \|\| gainLossOptions::_intersectTreeAndSeq)
	521	LOGnOUT(4,<<"Belove minZeros, Remove pos="<<pos+1<<endl);
	522	}
	523	else{
	524	LOGnOUT(4,<<"! WARN: Illegal minNumOfZeros found in pos="<<pos+1<<" Reset to minNumOfZeros="<<alphVecPerPos[0]<<endl);
	525	Parameters::updateParameter("_minNumOfZeros",int2string(alphVecPerPos[0]).c_str());
	526	minNumOfZeros = alphVecPerPos[0];
	527	}
	528	}
	529	}
	530	if(minNumOfOnes==0 && minNumOfZeros==0){
	531	LOGnOUT(4,<<"!!! WARN: both minNumOfOnes and minNumOfZeros=0. Thus, no need perform likelihood correction (accountForMissingData)"<<endl);
	532	Parameters::updateParameter("_accountForMissingData","0");
	533	}
	534	if(numOfPosBelowMinNumOfOnes>0)
	535	LOGnOUT(4,<<"WARN: removed "<<numOfPosBelowMinNumOfOnes<<" positions below minNumOfOnes="<<minNumOfOnes<<endl);
	536	if(numOfPosBelowMinNumOfZeros>0)
	537	LOGnOUT(4,<<"WARN: removed "<<numOfPosBelowMinNumOfZeros<<" positions below minNumOfZeros="<<minNumOfZeros<<endl);
	538	sc.removePositions(posToRemove);
	539	}
	540
	541
	542	/********************************************************************************************
	543	The likelihood correction, requires that unObservable patterns do not exist
	544	*********************************************************************************************/
	545	//void gainLoss::removePositionsBelowNmin(sequenceContainer& sc, int minNumOfOnes, int MinVal, bool isRemovePosNotWithinMinMax, bool isReportRemovedPos){
	546	// vector<int> posToRemove(sc.seqLen(),false);
	547	// vector<int> _alphVecDist = sc.getAlphabetDistribution();
	548	// for (int pos = 0; pos < sc.seqLen(); ++pos){
	549	// Vint alphVecPerPos = sc.getAlphabetDistribution(pos);
	550	// if(alphVecPerPos[1]< minNumOfOnes){
	551	// if(isRemovePosNotWithinMinMax \|\| gainLossOptions::_intersectTreeAndSeq){
	552	// posToRemove[pos] = true;
	553	// if(isReportRemovedPos)
	554	// LOGnOUT(4,<<"Belove minOnes, Remove pos="<<pos+1<<endl);
	555	// }
	556	// else{
	557	// LOGnOUT(4,<<"! WARN: Illegal minNumOfOnes found in pos="<<pos+1<<" Reset to minNumOfOnes="<<alphVecPerPos[1]<<endl);
	558	// Parameters::updateParameter("_minNumOfOnes",int2string(alphVecPerPos[1]).c_str());
	559	// }
	560	// }
	561	// if(alphVecPerPos[0]< minNumOfZeros){
	562	// if(isRemovePosNotWithinMinMax \|\| gainLossOptions::_intersectTreeAndSeq){
	563	// if(isReportRemovedPos)
	564	// posToRemove[pos] = true;
	565	// LOGnOUT(4,<<"Belove minZeros, Remove pos="<<pos+1<<endl);
	566	// }
	567	// else{
	568	// LOGnOUT(4,<<"! WARN: Illegal minNumOfZeros found in pos="<<pos+1<<" Reset to minNumOfZeros="<<alphVecPerPos[0]<<endl);
	569	// Parameters::updateParameter("_minNumOfZeros",int2string(alphVecPerPos[0]).c_str());
	570	// }
	571	// }
	572	// }
	573	// if(minNumOfOnes==0 && minNumOfZeros==0){
	574	// LOGnOUT(4,<<"!!! WARN: both minNumOfOnes and minNumOfZeros=0. Thus, no need perform likelihood correction (accountForMissingData)"<<endl);
	575	// Parameters::updateParameter("_accountForMissingData","0");
	576	// }
	577	// sc.removePositions(posToRemove);
	578	//}
	579
	580	/********************************************************************************************
	581	*********************************************************************************************/
	582	void gainLoss::countOccurPerPos(){
	583	string occur = gainLossOptions::_outDir + "//" + "occurPerPos.txt";
	584	ofstream occurStream(occur.c_str());
	585	occurStream.precision(PRECISION);
	586	occurStream<<"POS"<<"\t"<<"occur"<<"\t"<<"unknown"<<endl;
	587
	588	char missigDataChar = -2;
	589	char occurChar = 1;
	590	for(int pos=0; pos<_sc.seqLen(); pos++){
	591	int NumOfOccurancesPerPos = _sc.getNumOfOccurancesPerPos(pos, occurChar);
	592	_occurPerPos.push_back(NumOfOccurancesPerPos);
	593	int NumOfUnknownPerPos = _sc.getNumOfOccurancesPerPos(pos, missigDataChar);
	594	_unknownPerPos.push_back(NumOfUnknownPerPos);
	595	occurStream<<pos+1<<"\t"<<NumOfOccurancesPerPos<<"\t"<<NumOfUnknownPerPos<<endl;
	596	}
	597	}
	598
	599
	600	/********************************************************************************************
	601	*********************************************************************************************/
	602	void gainLoss::removePositionsWithHighPercentOfMissingData(MDOUBLE fractionOfMissingDataToRemove){
	603	char missigDataChar = -2;
	604	MDOUBLE numberOfSeq = _sc.numberOfSeqs();
	605	//MDOUBLE numberOfMissinPerPos;
	606
	607	vector<int> posToRemove(_sc.seqLen(),false);
	608	for(int pos=0; pos<_sc.seqLen(); ++pos){
	609	int NumOfOccurancesPerPos =_unknownPerPos[pos-1]; // pre-computed
	610	if( (float)NumOfOccurancesPerPos/numberOfSeq >= fractionOfMissingDataToRemove ){
	611	posToRemove[pos] = true;
	612	}
	613	}
	614	_scFilterMissingData = _sc;
	615	_scFilterMissingData.removePositions(posToRemove);
	616	LOGnOUT(4,<<"The number of positions with missing less than "<<fractionOfMissingDataToRemove<<" is "<<_scFilterMissingData.seqLen()<<endl);
	617	string strSeq = gainLossOptions::_outDir + "//" + "seqFilterMissingData."+ double2string(fractionOfMissingDataToRemove) + ".fa";
	618	ofstream seq_out(strSeq.c_str());
	619	fastaFormat::write(seq_out,_scFilterMissingData);
	620
	621	//int startPosInLoop = 1;
	622	// start the first position
	623	//for (int firstPos = 0; firstPos<_sc.seqLen(); ++firstPos){
	624	// if((float)_sc.getNumOfOccurancesPerPos(firstPos,missigDataChar)/numberOfSeq < fractionOfMissingDataToRemove ){
	625	// _scFilterMissingData = _sc.getSubSeq(firstPos,firstPos); //start with first position
	626	// LOGnOUT(5,<<"The first position with more missing data less than "<<fractionOfMissingDataToRemove<<" is "<<firstPos<<endl);
	627	// startPosInLoop = firstPos+1;
	628	// break;
	629	// }
	630	//}
	631
	632	//if(_scFilterMissingData.seqLen()<1){
	633	// LOGnOUT(4,<<"WARN: there is no position with more than "<<fractionOfMissingDataToRemove<<endl);
	634	// return;
	635	//}
	636	//
	637	//for(int pos=startPosInLoop; pos<_sc.seqLen(); ++pos){
	638	// if(pos%1000==0)
	639	// cout<<pos<<endl; // DEB
	640	// if(! ((float)_sc.getNumOfOccurancesPerPos(pos,missigDataChar)/numberOfSeq >= fractionOfMissingDataToRemove) ){
	641	// _scFilterMissingData.concatenate(_sc.getSubSeq(pos,pos));
	642	// }
	643
	644	//}
	645	//cout<<_scFilterMissingData.seqLen()<<" "<<_sc.seqLen()<<endl; //DEB
	646	}
	647
	648
	649	/********************************************************************************************
	650	startSequenceContainer
	651	*********************************************************************************************/
	652	void gainLoss::startSequenceContainerUniqPatterns(){
	653	LOGnOUT(4,<<" *** Starting compute Unique patterns"<<endl);
	654
	655	time_t t1;
	656	time(&t1);
	657	time_t t2;
	658
	659	gainLossAlphabet alph;
	660	Vint scUniqPatternsNumberOfOnesPerPos;
	661	vector<sequenceContainer> sequenceContainerVector;
	662	_scUniqPatterns = _sc.getSubSeq(0,0); //start with first position
	663	sequenceContainerVector.push_back(_scUniqPatterns.getSubSeq(0,0));
	664	scUniqPatternsNumberOfOnesPerPos.push_back(_scUniqPatterns.getNumOfOccurancesPerPos(0,1));
	665
	666	Vint posWeights;
	667	posWeights.push_back(1);
	668
	669	for(int pos=1; pos<_sc.seqLen(); ++pos){
	670	if(pos%1000==0)
	671	cout<<pos<<endl; // DEB
	672	bool isPosUniq = true;
	673	sequenceContainer seqPos(_sc.getSubSeq(pos,pos));
	674	int numberOfOnesPerPos = seqPos.getNumOfOccurancesPerPos(0,1);
	675
	676	for(int i=0; i<_scUniqPatterns.seqLen(); ++i){
	677	if(scUniqPatternsNumberOfOnesPerPos.size()<i){
	678	scUniqPatternsNumberOfOnesPerPos.push_back(_scUniqPatterns.getNumOfOccurancesPerPos(i,1));
	679	sequenceContainerVector.push_back(_scUniqPatterns.getSubSeq(i,i));
	680	}
	681
	682	if(numberOfOnesPerPos == scUniqPatternsNumberOfOnesPerPos[i] && sequenceContainerVector[i] == seqPos){
	683	isPosUniq = false;
	684	posWeights[i] = posWeights[i]+1;
	685	break;
	686	}
	687	}
	688	if(isPosUniq){
	689	_scUniqPatterns.concatenate(seqPos);
	690	posWeights.push_back(1);
	691	}
	692	}
	693	_weightsUniqPatterns = new Vdouble;
	694	_weightsUniqPatterns->resize(posWeights.size());
	695
	696	string posWeightsSt = gainLossOptions::_outDir + "//" + "posWeights" + ".txt";
	697	ofstream posWeights_out(posWeightsSt.c_str());
	698
	699	if(posWeights.size() == _scUniqPatterns.seqLen()){
	700	for(int i=0; i<posWeights.size(); ++i){
	701	posWeights_out<<posWeights[i]<<endl;
	702	(*_weightsUniqPatterns)[i] = posWeights[i];
	703	}
	704	}
	705	else
	706	errorMsg::reportError("posWeights and _scUniqPatterns - Not the same length");
	707
	708	string strSeq = gainLossOptions::_outDir + "//" + "seqUniq" + ".fa";
	709	ofstream seq_out(strSeq.c_str());
	710	fastaFormat::write(seq_out,_scUniqPatterns);
	711
	712	//_sc = sequenceContainer(_scUniqPatterns,&alph);
	713	time(&t2);
	714	LOGnOUT(4,<<"Computed Unique pattern RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	715	LOGnOUT(4,<<"seqLenUnique pattern="<<_scUniqPatterns.seqLen()<<endl);
	716	}
	717
	718
	719
	720
	721	/********************************************************************************************
	722	fillReferenceSequence
	723	*********************************************************************************************/
	724	void gainLoss::fillReferenceSequence(){
	725	if (strcmp(gainLossOptions::_referenceSeq.c_str(),"non")==0) {
	726	_refSeq = &(_sc[0]);
	727	}
	728	else {
	729	int id1 = _sc.getId(gainLossOptions::_referenceSeq,true);
	730	_refSeq = (&_sc[id1]);
	731	}
	732	}
	733	/********************************************************************************************
	734	startStochasticProcess
	735	*********************************************************************************************/
	736	void gainLoss::startStochasticProcess(bool gainLossDist){
	737	if(!gainLossDist)
	738	startStochasticProcess();
	739	else
	740	startStochasticProcessVec(); //gain, loss ~ gamma
	741	}
	742
	743
	744	/********************************************************************************************
	745	*********************************************************************************************/
	746	void gainLoss::setRootFreq(){
	747	_freq.resize(gainLossOptions::_alphabet_size);
	748	if(gainLossOptions::_isRootFreqEQstationary){
	749	_freq[1]=gainLossOptions::_userGain/(gainLossOptions::_userGain+gainLossOptions::_userLoss);
	750	_freq[0]=1-_freq[1];
	751	}else{
	752	if(gainLossOptions::_userTheta != 0.5){
	753	_freq[1]=gainLossOptions::_userTheta;
	754	_freq[0]=1-_freq[1];
	755	}
	756	else if (gainLossOptions::_userGainLossRatio <VERYBIG ){ // then it was given specifically
	757	_freq[1]= gainLossOptions::_userGainLossRatio/(1+gainLossOptions::_userGainLossRatio);
	758	_freq[0]=1-_freq[1];
	759	}
	760	else{
	761	_freq = computeFreq(); // if user didnt provide the data computeFreq.
	762	}
	763	}
	764	}
	765
	766
	767	/********************************************************************************************
	768	*********************************************************************************************/
	769	void gainLoss::startStochasticProcess()
	770	{
	771	LOGnOUT(4,<<"\n startStochasticProcess..."<<endl);
	772	MDOUBLE init_gain = gainLossOptions::_userGain;
	773	MDOUBLE init_loss = gainLossOptions::_userLoss;
	774	if(Parameters::getInt("_isInitGainLossByEmpiricalFreq")){
	775	_freq = evaluateCharacterFreq(_scWithFullLength);
	776	init_gain = _freq[1];
	777	init_loss = _freq[0];
	778	LOGnOUT(4,<<endl<<"InitGainLossByEmpiricalFreq: freq 1=init_gain= "<<_freq[1]<<endl);
	779	}
	780	setRootFreq();
	781
	782	_gainExp = init_gain;
	783	_lossExp = init_loss;
	784	replacementModel* glm;
	785	if(!gainLossOptions::_isReversible){
	786	glm = new gainLossModelNonReversible(init_gain,init_loss,_freq,gainLossOptions::_isRootFreqEQstationary,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	787	}
	788	else{
	789	glm = new gainLossModel(init_gain, _freq,gainLossOptions::_isRootFreqEQstationary, true,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	790	}
	791	trivialAccelerator* pijAcc = new trivialAccelerator(glm);
	792	MDOUBLE initAlphaRate = gainLossOptions::_userAlphaRate;
	793	MDOUBLE initBetaRate = gainLossOptions::_userBetaRate;
	794	MDOUBLE initProbInvariant = gainLossOptions::_userProbInvariantRate;
	795	MDOUBLE initGlobalRate =1;
	796	MDOUBLE initRateInvariantVal = gainLossOptions::_userRateInvariantVal;
	797
	798	//Mixture
	799	int numOfGammaComp = gainLossOptions::_numberOfRateComponents;
	800	int numOfRateCategories = gainLossOptions::_numberOfRateCategories;
	801	Vdouble initAlphaRates;
	802	Vdouble initBetaRates;
	803	Vdouble initCompProbRates;
	804
	805	distribution* rateDist;
	806	distribution* baseDistr;
	807	switch (gainLossOptions::_rateDistributionType){
	808	case (gainLossOptions::UNIFORM):
	809	rateDist = new uniDistribution();
	810	LOGnOUT(4,<<"rateDist UNIFORM" <<endl);
	811	break;
	812	case (gainLossOptions::GAMMA):
	813	rateDist = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories); //
	814	LOGnOUT(4,<<"rateDist GAMMA with: initAlphaRate="<<initAlphaRate<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfRateCategories<<endl);
	815	break;
	816	case (gainLossOptions::GENERAL_GAMMA):
	817	rateDist = new generalGammaDistribution(initAlphaRate,initBetaRate,gainLossOptions::_numberOfRateCategories); //
	818	LOGnOUT(4,<<"rateDist GENERAL_GAMMA with: initAlphaRate="<<initAlphaRate<<" initBetaRate="<<initBetaRate<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfRateCategories<<endl);
	819	break;
	820	case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	821	if(!( (numOfRateCategories==1)\|\|(numOfRateCategories==2)\|\|(numOfRateCategories==4)\|\|(numOfRateCategories==5)\|\|(numOfRateCategories==16) )){
	822	string err = "in rate distr. GAMMA_FIXED_CATEGORIES only #cat={1,2,4,5,16} is supported. Not #cat=";
	823	err+=int2string(numOfRateCategories);
	824	errorMsg::reportError(err);
	825	}
	826	rateDist = new gammaDistributionFixedCategories(initAlphaRate, gainLossOptions::_numberOfRateCategories);
	827	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfRateCategories<<" initAlphaRate= "<<initAlphaRate<<endl);
	828	break;
	829	case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	830	if(!( (numOfRateCategories==1)\|\|(numOfRateCategories==2)\|\|(numOfRateCategories==4)\|\|(numOfRateCategories==5)\|\|(numOfRateCategories==16) )){
	831	string err = "in rate distr. GENERAL_GAMMA_FIXED_CATEGORIES only #cat={1,2,4,5,16} is supported. Not #cat=";
	832	err+=int2string(numOfRateCategories);
	833	errorMsg::reportError(err);
	834	}
	835	rateDist = new generalGammaDistributionFixedCategories(initAlphaRate, initBetaRate,gainLossOptions::_numberOfRateCategories);
	836	LOGnOUT(4,<<"The rateDist was initialized as GENERAL_GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfRateCategories<<" initAlphaRate= "<<initAlphaRate<<" initBetaRate="<<initBetaRate<<endl);
	837	break;
	838	case (gainLossOptions::GAMMA_MIXTURE):
	839	if(gainLossOptions::_initRandomGammaMixuteParam){
	840	if(gainLossOptions::_rateDiscretizationType == gainLossOptions::QUANTILE)
	841	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,QUANTILE);
	842	if(gainLossOptions::_rateDiscretizationType == gainLossOptions::LAGUERRE)
	843	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,LAGUERRE);
	844	}
	845	else{
	846	initMixtureParams(initAlphaRates,initBetaRates,initCompProbRates,numOfGammaComp,initAlphaRate, initBetaRate);
	847	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,initAlphaRates,initBetaRates,initCompProbRates);
	848	}
	849	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_MIXTURE"<<endl);
	850	break;
	851	case (gainLossOptions::GAMMA_PLUS_INV):
	852	baseDistr = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories);
	853	rateDist = new gammaDistributionPlusInvariant(baseDistr,initProbInvariant,initGlobalRate,initRateInvariantVal);
	854	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_PLUS_INV with: initAlphaRate="<<initAlphaRate<<" and initProbInvariant="<<initProbInvariant<<endl);
	855	if(baseDistr) delete baseDistr;
	856	break;
	857	case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):{
	858	baseDistr = new generalGammaDistribution(initAlphaRate,initBetaRate,gainLossOptions::_numberOfRateCategories);
	859	rateDist = new generalGammaDistributionPlusInvariant(baseDistr,initProbInvariant,initGlobalRate,initRateInvariantVal);
	860	LOGnOUT(4,<<"The rateDist was initialized as GENERAL_GAMMA_PLUS_INV with: initBetaRate="<<initBetaRate<<" and initProbInvariant="<<initProbInvariant<<endl);
	861	if(baseDistr) delete baseDistr;
	862	}
	863	}
	864	_sp = new stochasticProcess(rateDist,pijAcc,gainLossOptions::_isReversible);
	865	MDOUBLE norm_factor = normalizeQ(_sp);
	866	LOGnOUT(4,<<"Stochastic process normalized with norm_factor="<<norm_factor<<endl);
	867
	868	if (rateDist) delete rateDist; //at r4s after the sp object is created all other objects dynamically constructed are deleted
	869	if (pijAcc) delete pijAcc;
	870	if (glm) delete glm;
	871	}
	872
	873
	874
	875	/********************************************************************************************
	876	startStochasticProcessGeneric
	877	*********************************************************************************************/
	878	stochasticProcess* gainLoss::startStochasticProcessGeneric(gainLossOptions::distributionType rateDistributionType, const bool isReversible)
	879	{
	880	LOGnOUT(4,<<"\n startStochasticProcessGeneric..."<<endl);
	881
	882	MDOUBLE init_gain = gainLossOptions::_userGain;
	883	MDOUBLE init_loss = gainLossOptions::_userLoss;
	884	if(Parameters::getInt("_isInitGainLossByEmpiricalFreq")){
	885	_freq = evaluateCharacterFreq(_scWithFullLength);
	886	init_gain = _freq[1];
	887	init_loss = _freq[0];
	888	LOGnOUT(4,<<endl<<"InitGainLossByEmpiricalFreq: freq 1=init_gain= "<<_freq[1]<<endl);
	889	}
	890	setRootFreq();
	891
	892	replacementModel* glm;
	893	if(!isReversible){
	894	glm = new gainLossModelNonReversible(init_gain,init_loss,_freq,gainLossOptions::_isRootFreqEQstationary,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	895	}
	896	else{
	897	glm = new gainLossModel(init_gain, _freq,gainLossOptions::_isRootFreqEQstationary, true,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	898	}
	899	trivialAccelerator* pijAcc = new trivialAccelerator(glm);
	900
	901	MDOUBLE initAlphaRate = gainLossOptions::_userAlphaRate;
	902	MDOUBLE initBetaRate = gainLossOptions::_userBetaRate;
	903	MDOUBLE initProbInvariant = gainLossOptions::_userProbInvariantRate;
	904	MDOUBLE initGlobalRate =1;
	905	MDOUBLE initRateInvariantVal = gainLossOptions::_userRateInvariantVal;
	906	//Mixture
	907	int numOfGammaComp = gainLossOptions::_numberOfRateComponents;
	908	int numOfRateCategories = gainLossOptions::_numberOfRateCategories;
	909	Vdouble initAlphaRates;
	910	Vdouble initBetaRates;
	911	Vdouble initCompProbRates;
	912
	913	distribution* rateDist =NULL;
	914	distribution* baseDistr=NULL;
	915	switch (rateDistributionType){
	916	case (gainLossOptions::UNIFORM):
	917	rateDist = new uniDistribution();
	918	LOGnOUT(4,<<"rateDist UNIFORM" <<endl);
	919	break;
	920	case (gainLossOptions::GAMMA):
	921	rateDist = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories); //
	922	LOGnOUT(4,<<"rateDist GAMMA with: initAlphaRate="<<initAlphaRate<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfRateCategories<<endl);
	923	break;
	924	case (gainLossOptions::GENERAL_GAMMA):
	925	rateDist = new generalGammaDistribution(initAlphaRate,initBetaRate,gainLossOptions::_numberOfRateCategories); //
	926	LOGnOUT(4,<<"rateDist GENERAL_GAMMA with: initAlphaRate="<<initAlphaRate<<" initBetaRate="<<initBetaRate<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfRateCategories<<endl);
	927	break;
	928	case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	929	if(!( (numOfRateCategories==1)\|\|(numOfRateCategories==2)\|\|(numOfRateCategories==3)\|\|(numOfRateCategories==5)\|\|(numOfRateCategories==8)\|\|(numOfRateCategories==12\|\|(numOfRateCategories==16)\|\|(numOfRateCategories==24)\|\|(numOfRateCategories==32)\|\|(numOfRateCategories==36)) )){
	930	string err = "in rate distr. GAMMA_FIXED_CATEGORIES only #cat={1,2,3,5,8,12,16,24,32,36} is supported. Not #cat=";
	931	err+=int2string(numOfRateCategories);
	932	errorMsg::reportError(err);
	933	}
	934	rateDist = new gammaDistributionFixedCategories(initAlphaRate, gainLossOptions::_numberOfRateCategories);
	935	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfRateCategories<<" initAlphaRate= "<<initAlphaRate<<endl);
	936	break;
	937	case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	938	if(!( (numOfRateCategories==1)\|\|(numOfRateCategories==2)\|\|(numOfRateCategories==3)\|\|(numOfRateCategories==5)\|\|(numOfRateCategories==8)\|\|(numOfRateCategories==12)\|\|(numOfRateCategories==16)\|\|(numOfRateCategories==24)\|\|(numOfRateCategories==32)\|\|(numOfRateCategories==36) )){
	939	string err = "in rate distr. GENERAL_GAMMA_FIXED_CATEGORIES only #cat={1,2,3,5,8,12,16,24,32,36} is supported. Not #cat=";
	940	err+=int2string(numOfRateCategories);
	941	errorMsg::reportError(err);
	942	}
	943	rateDist = new generalGammaDistributionFixedCategories(initAlphaRate, initBetaRate,gainLossOptions::_numberOfRateCategories);
	944	LOGnOUT(4,<<"The rateDist was initialized as GENERAL_GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfRateCategories<<" initAlphaRate= "<<initAlphaRate<<" initBetaRate="<<initBetaRate<<endl);
	945	break;
	946	case (gainLossOptions::GAMMA_MIXTURE):
	947	if(gainLossOptions::_initRandomGammaMixuteParam){
	948	if(gainLossOptions::_rateDiscretizationType == gainLossOptions::QUANTILE)
	949	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,QUANTILE);
	950	if(gainLossOptions::_rateDiscretizationType == gainLossOptions::LAGUERRE)
	951	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,LAGUERRE);
	952	}
	953	else{
	954	initMixtureParams(initAlphaRates,initBetaRates,initCompProbRates,numOfGammaComp,initAlphaRate, initBetaRate); // standard points
	955	rateDist = new mixtureDistribution(gainLossOptions::_numberOfRateComponents, gainLossOptions::_numberOfRateCategories,initAlphaRates,initBetaRates,initCompProbRates);
	956	}
	957	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_MIXTURE"<<endl);
	958	break;
	959	case (gainLossOptions::GAMMA_PLUS_INV):
	960	baseDistr = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories);
	961	rateDist = new gammaDistributionPlusInvariant(baseDistr,initProbInvariant,initGlobalRate,initRateInvariantVal);
	962	LOGnOUT(4,<<"The rateDist was initialized as GAMMA_PLUS_INV with: initAlphaRate="<<initAlphaRate<<" and initProbInvariant="<<initProbInvariant<<endl);
	963	if(baseDistr) delete baseDistr;
	964	break;
	965	case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):{
	966	baseDistr = new generalGammaDistribution(initAlphaRate,initBetaRate,gainLossOptions::_numberOfRateCategories);
	967	rateDist = new generalGammaDistributionPlusInvariant(baseDistr,initProbInvariant,initGlobalRate,initRateInvariantVal);
	968	LOGnOUT(4,<<"The rateDist was initialized as GENERAL_GAMMA_PLUS_INV with: initBetaRate="<<initBetaRate<<" and initProbInvariant="<<initProbInvariant<<endl);
	969	if(baseDistr) delete baseDistr;
	970	}
	971	}
	972	stochasticProcess* sp = new stochasticProcess(rateDist,pijAcc,gainLossOptions::_isReversible);
	973
	974	MDOUBLE norm_factor = normalizeQ(sp);
	975	LOGnOUT(4,<<"Stochastic process normalized with norm_factor="<<norm_factor<<endl);
	976	if (rateDist) delete rateDist; //at r4s after the sp object is created all other objects dynamically constructed are deleted
	977	if (pijAcc) delete pijAcc;
	978	if (glm) delete glm;
	979	return sp;
	980	}
	981	/********************************************************************************************
	982	startStochasticProcessVec
	983	*********************************************************************************************/
	984	void gainLoss::startStochasticProcessVec(){
	985	LOGnOUT(4,<<"\n startStochasticProcessVec with: GainCategories="<<gainLossOptions::_numberOfGainCategories<<" and GainCategories="<<gainLossOptions::_numberOfLossCategories<<endl);
	986	bool isReversible =gainLossOptions::_isReversible;
	987	//Vdouble freq;
	988	//Vdouble freqEmpirical;
	989	MDOUBLE init_gain;
	990	MDOUBLE init_loss;
	991	MDOUBLE initAlphaRate = gainLossOptions::_userAlphaRate;
	992	MDOUBLE initAlphaGain = gainLossOptions::_userAlphaGain;
	993	MDOUBLE initBetaGain = gainLossOptions::_userBetaGain;
	994	MDOUBLE initAlphaLoss = gainLossOptions::_userAlphaLoss;
	995	MDOUBLE initBetaLoss = gainLossOptions::_userBetaLoss;
	996	MDOUBLE initProbInvariantGain = gainLossOptions::_userProbInvariantGain;
	997	MDOUBLE initProbInvariantLoss = gainLossOptions::_userProbInvariantLoss;
	998	MDOUBLE initGlobalRate =1;
	999	MDOUBLE initRateInvariantVal = gainLossOptions::_userRateInvariantVal;
	1000
	1001	setRootFreq();
	1002
	1003	if(Parameters::getInt("_isInitGainLossByEmpiricalFreq")){
	1004	_freq = evaluateCharacterFreq(_scWithFullLength);
	1005	init_gain = _freq[1];
	1006	init_loss = _freq[0];
	1007	MDOUBLE gainLossRatioToCompleteByBeta = (init_gain/init_loss)*(initAlphaLoss/initAlphaGain);
	1008	LOGnOUT(4,<<endl<<"InitGainLossByEmpiricalFreq: freq 1= "<<_freq[1]<<" Thus, gainLossRatioToCompleteByBeta= "<<gainLossRatioToCompleteByBeta<<endl);
	1009	if(gainLossOptions::_isUpdateOnlyGainBetaForRatio)
	1010	initBetaGain =(initBetaLoss/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35
	1011	else{
	1012	initBetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35
	1013	initBetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9
	1014	}
	1015	}
	1016
	1017	// gain
	1018	switch (gainLossOptions::_gainDistributionType){
	1019	case (gainLossOptions::UNIFORM):
	1020	_gainDist = new uniDistribution();
	1021	LOGnOUT(4,<<"rateDist UNIFORM" <<endl);
	1022	break;
	1023	case (gainLossOptions::GAMMA):
	1024	_gainDist = new gammaDistribution(initAlphaGain,gainLossOptions::_numberOfGainCategories); //
	1025	LOGnOUT(4,<<"gainDist GAMMA with: initAlpha="<<initAlphaGain<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfGainCategories<<endl);
	1026	break;
	1027	case (gainLossOptions::GENERAL_GAMMA):
	1028	_gainDist = new generalGammaDistribution(initAlphaGain,initBetaGain,gainLossOptions::_numberOfGainCategories); //
	1029	LOGnOUT(4,<<"gainDist GENERAL_GAMMA with: initAlphaGain="<<initAlphaGain<<" initBetaGain="<<initBetaGain<<" and _numberOfGainCategories= "<<gainLossOptions::_numberOfGainCategories<<endl);
	1030	break;
	1031	case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	1032	//if(!( (numOfRateCategories==1)\|\|(numOfRateCategories==2)\|\|(numOfRateCategories==3)\|\|(numOfRateCategories==5)\|\|(numOfRateCategories==8)\|\|(numOfRateCategories==12\|\|(numOfRateCategories==16)\|\|(numOfRateCategories==24)\|\|(numOfRateCategories==32)\|\|(numOfRateCategories==36)) )){
	1033	// string err = "in gain distr. GAMMA_FIXED_CATEGORIES only #cat={1,2,3,5,8,12,16,24,32,36} is supported. Not #cat=";
	1034	// err+=int2string(numOfRateCategories);
	1035	// errorMsg::reportError(err);
	1036	//}
	1037	_gainDist = new gammaDistributionFixedCategories(initAlphaGain, gainLossOptions::_numberOfGainCategories);
	1038	LOGnOUT(4,<<"The _gainDist was initialized as GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfGainCategories<<" initAlphaGain= "<<initAlphaGain<<endl);
	1039	break;
	1040	case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	1041	errorMsg::reportError("in gainDist. GENERAL_GAMMA_FIXED_CATEGORIES is not realized");
	1042	break;
	1043	case (gainLossOptions::GAMMA_MIXTURE):
	1044	errorMsg::reportError("in gainDist. GAMMA_MIXTURE is not realized");
	1045	break;
	1046	case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):{
	1047	distribution* baseDistr = new generalGammaDistribution(initAlphaGain,initBetaGain,gainLossOptions::_numberOfGainCategories);
	1048	_gainDist = new generalGammaDistributionPlusInvariant(baseDistr,initProbInvariantGain,initGlobalRate,initRateInvariantVal);
	1049	LOGnOUT(4,<<"The gainDist was initialized as GENERAL_GAMMA_PLUS_INV with: initAlphaGain="<<initAlphaGain<<" initBetaGain="<<initBetaGain<<" initProbInvariantGain="<<initProbInvariantGain<<" without optimization"<<endl);
	1050	delete baseDistr;
	1051	break;}
	1052	default:
	1053	errorMsg::reportError("error in startStochasticProcessVec the distribution chosen is not implemented");
	1054	}
	1055	// loss
	1056	switch (gainLossOptions::_lossDistributionType){
	1057	case (gainLossOptions::UNIFORM):
	1058	_lossDist = new uniDistribution();
	1059	LOGnOUT(4,<<"rateDist UNIFORM" <<endl);
	1060	break;
	1061	case (gainLossOptions::GAMMA):
	1062	_lossDist = new gammaDistribution(initAlphaLoss,gainLossOptions::_numberOfLossCategories); //
	1063	LOGnOUT(4,<<"lossDist GAMMA with: initAlpha="<<initAlphaLoss<<" and _numberOfRateCategories= "<<gainLossOptions::_numberOfLossCategories<<endl);
	1064	break;
	1065	case (gainLossOptions::GENERAL_GAMMA):
	1066	_lossDist = new generalGammaDistribution(initAlphaLoss,initBetaLoss,gainLossOptions::_numberOfLossCategories); //
	1067	LOGnOUT(4,<<"lossDist GENERAL_GAMMA with: initAlphaLoss="<<initAlphaLoss<<" initBetaLoss="<<initBetaLoss<<" and _numberOfLossCategories= "<<gainLossOptions::_numberOfLossCategories<<endl);
	1068	break;
	1069	case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	1070	_lossDist = new gammaDistributionFixedCategories(initAlphaLoss, gainLossOptions::_numberOfLossCategories);
	1071	LOGnOUT(4,<<"The _gainDist was initialized as GAMMA_FIXED_CATEGORIES with: num Of Categories "<<gainLossOptions::_numberOfLossCategories<<" initAlphaLoss= "<<initAlphaLoss<<endl);
	1072	errorMsg::reportError("in lossDist. GAMMA_FIXED_CATEGORIES is not realized");
	1073	break;
	1074	case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	1075	errorMsg::reportError("in lossDist. GENERAL_GAMMA_FIXED_CATEGORIES is not realized");
	1076	break;
	1077	case (gainLossOptions::GAMMA_MIXTURE):
	1078	errorMsg::reportError("in lossDist. GAMMA_MIXTURE is not realized");
	1079	break;
	1080	case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):{
	1081	distribution* baseDistr = new generalGammaDistribution(initAlphaLoss,initBetaLoss,gainLossOptions::_numberOfLossCategories);
	1082	_lossDist = new generalGammaDistributionPlusInvariant(baseDistr,initProbInvariantLoss,initGlobalRate,initRateInvariantVal);
	1083	LOGnOUT(4,<<"The lossDist was initialized as GENERAL_GAMMA_PLUS_INV with: initAlphaLoss="<<initAlphaLoss<<" initBetaLoss="<<initBetaLoss<<" initProbInvariantLoss="<<initProbInvariantLoss<<" without optimization"<<endl);
	1084	delete baseDistr;
	1085	break;}
	1086	default:errorMsg::reportError("error in startStochasticProcessVec the distribution chosen is not implemented");
	1087	}
	1088
	1089	// Loop over gain and loss distributions
	1090	distribution* baseDistr;
	1091	_spVVec.resize(_gainDist->categories());
	1092	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	1093	_spVVec[gainCategor].resize(_lossDist->categories());
	1094	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	1095	replacementModel* glm;
	1096	if(!isReversible){
	1097	glm = new gainLossModelNonReversible(_gainDist->rates(gainCategor),_lossDist->rates(lossCategor),_freq,gainLossOptions::_isRootFreqEQstationary,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	1098	}
	1099	else{
	1100	glm = new gainLossModel(_gainDist->rates(gainCategor),_freq,gainLossOptions::_isRootFreqEQstationary, true,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	1101	}
	1102	pijAccelerator* pijAcc = new trivialAccelerator(glm);
	1103
	1104	distribution* rateDist;
	1105	switch (gainLossOptions::_rateDistributionType){
	1106	case (gainLossOptions::UNIFORM):
	1107	rateDist = new uniDistribution();
	1108	break;
	1109	case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	1110	rateDist = new gammaDistributionFixedCategories(initAlphaRate, gainLossOptions::_numberOfRateCategories);
	1111	break;
	1112	case (gainLossOptions::GAMMA):
	1113	rateDist = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories); //
	1114	break;
	1115	case (gainLossOptions::GAMMA_PLUS_INV):
	1116	baseDistr = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories);
	1117	rateDist = new gammaDistributionPlusInvariant(baseDistr,gainLossOptions::_userProbInvariantRate,initGlobalRate,initRateInvariantVal);
	1118	if(baseDistr) delete baseDistr;
	1119	break;
	1120	default:
	1121	errorMsg::reportError("unknown type in distributionType");
	1122	}
	1123	stochasticProcess* sp = new stochasticProcess(rateDist,pijAcc,gainLossOptions::_isReversible);
	1124	_spVVec[gainCategor][lossCategor] = sp->clone();
	1125
	1126	if (rateDist) delete rateDist; //at r4s after the sp object is created all other objects dynamically constructed are deleted
	1127	if (pijAcc) delete pijAcc;
	1128	if (glm) delete glm;
	1129	if (sp) delete sp;
	1130	}
	1131	}
	1132	_gainExp = rateExpectation(_gainDist);
	1133	_lossExp = rateExpectation(_lossDist);
	1134
	1135	MDOUBLE norm_factor = normalizeQ(_spVVec, _gainDist, _lossDist);
	1136	LOGnOUT(4,<<"Stochastic process vector normalized with norm_factor="<<norm_factor<<endl);
	1137	_sp = _spVVec[0][0]; // initialize the "normal _sp" data member
	1138	}
	1139	/********************************************************************************************
	1140	computeFreq
	1141	*********************************************************************************************/
	1142	Vdouble gainLoss::computeFreq(){
	1143	Vdouble freq;
	1144	switch (gainLossOptions::_characterFreqEval){
	1145	case (gainLossOptions::FiftyFifty):
	1146	freq.push_back(0.5); // initializing the frequency vector to 0.5
	1147	freq.push_back(0.5);
	1148	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<freq[0]<<" "<<freq[1]<<endl);
	1149	break;
	1150	case (gainLossOptions::LeavesAve):
	1151	freq = evaluateCharacterFreq(_scWithFullLength); //
	1152	LOGnOUT(4,<<"frequencies are based on LeavesAve "<<freq[0]<<" "<<freq[1]<<endl);
	1153	break;
	1154	case (gainLossOptions::optimizeOverTree):
	1155	freq = evaluateCharacterFreq(_scWithFullLength); // the rest will be be preformed during the optimization stage
	1156	LOGnOUT(4,<<"frequencies are "<<freq[0]<<" "<<freq[1]<<endl);
	1157	break;
	1158	}
	1159	return freq;
	1160	}
	1161	/********************************************************************************************
	1162	startingEvolTreeTopology
	1163	*********************************************************************************************/
	1164	void gainLoss::startEvolTreeTopology(ostream& out){
	1165	//time_t ltime1;
	1166	//time( &ltime1 );
	1167	LOGnOUT(4,<<"\n startingEvolTreeTopology..."<<endl);
	1168	VVdouble disTab;
	1169	vector<string> vNames;
	1170	if (gainLossOptions::_treeFile=="") {
	1171	LOGnOUT(4,<<"No treeFile was given. The tree will be estimated from distance matrix"<<endl);
	1172	distanceMethod* pDm;
	1173	switch (gainLossOptions::_treeSearchAlg){
	1174	case (gainLossOptions::njJC):
	1175	pDm = new jcDistance();
	1176	giveDistanceTable(pDm, _sc, disTab, vNames);
	1177	break;
	1178	case (gainLossOptions::njJCOLD):
	1179	pDm = new jcDistanceOLD(gainLossOptions::_alphabet_size);
	1180	giveDistanceTable(pDm, _sc,disTab, vNames);
	1181	break;
	1182	case (gainLossOptions::njML): {
	1183	uniDistribution lUni;
	1184	const pijAccelerator* lpijAcc = _spSimple->getPijAccelerator();// note this is just a copy of the pointer.
	1185	stochasticProcess lsp(&lUni,lpijAcc);
	1186	pDm = new likeDist(lsp,0.01);
	1187	//pDm = new likeDist(*_spSimple); // in this sp the gain and loss are taken from empirical freq and gamma dist is used
	1188	giveDistanceTable(pDm,_sc,disTab,vNames);
	1189	}
	1190	break;
	1191	default:
	1192	errorMsg::reportError("this tree search mode is not yet available");
	1193	}
	1194	delete pDm;
	1195
	1196	//calc distance table statistics
	1197	MDOUBLE low_bound = VERYBIG;
	1198	MDOUBLE upper_bound = VERYSMALL;
	1199	MDOUBLE sum = 0.0;
	1200	int count = 0;
	1201	for (int i = 0; i < disTab.size(); ++i){
	1202	for (int j = i+1; j < disTab[i].size(); ++j){
	1203	sum += disTab[i][j];
	1204	++count;
	1205	if (disTab[i][j] < low_bound)
	1206	low_bound = disTab[i][j];
	1207	if (disTab[i][j] > upper_bound)
	1208	upper_bound = disTab[i][j];
	1209	}
	1210	}
	1211	MDOUBLE avg = sum / static_cast<MDOUBLE>(count);
	1212	LOG(5,<<"#MSA diversity matrix"<<endl);
	1213	LOG(5,<<"#Average pairwise distance= "<<avg<<endl);
	1214	LOG(5,<<"#lower bound = "<<low_bound<<endl);
	1215	LOG(5,<<"#upper bound = "<<upper_bound<<endl);
	1216	LOG(5,<<"#end of MSA diversity matrix"<<endl);
	1217	getStartingTreeNJ_fromDistances(disTab, vNames);
	1218	}
	1219
	1220	else
	1221	getStartingTreeFromTreeFile();
	1222
	1223	if (!(gainLossOptions::_rootAt =="")){
	1224	tree::nodeP myroot = _tr.findNodeByName(gainLossOptions::_rootAt); //returns NULL if not found
	1225	if (myroot){
	1226	_tr.rootAt(myroot);
	1227	LOGnOUT(4,<<"tree rooted at "<<myroot->name()<<"\n sons of root are:"<<endl);
	1228	for(int i = 0; i<_tr.getRoot()->getNumberOfSons(); ++i ){
	1229	LOGnOUT(4,<<_tr.getRoot()->getSon(i)->name()<<" ");
	1230	}
	1231	LOGnOUT(4,<<"\n");
	1232	return;
	1233	}
	1234	}
	1235	LOGnOUT(4,<<"default rooting used, root name is "<<_tr.getRoot()->name()<<endl);
	1236	LOGnOUT(4,<<"sons of root are:"<<endl);
	1237	for(int i = 0; i<_tr.getRoot()->getNumberOfSons(); ++i ){
	1238	LOGnOUT(4,<<_tr.getRoot()->getSon(i)->name()<<" ");
	1239	}
	1240	LOGnOUT(4,<<"\n");
	1241
	1242	//return;
	1243
	1244	if(gainLossOptions::_seqFile!="" && !_tr.getLeavesNum()==_sc.numberOfSeqs()){
	1245	errorMsg::reportError("The number of sequence is not equal to the number of taxas in the tree");
	1246	}
	1247	_tr.makeSureAllBranchesAreLargerThanEpsilon(gainLossOptions::_minBranchLength);
	1248	_trOrig = _tr;
	1249
	1250
	1251	//time_t ltime2;
	1252	//time( &ltime2 );
	1253	//int t = static_cast<long>(ltime2 - ltime1);
	1254	//timingsF<<"time for tree topology = "<<t<<endl;
	1255	}
	1256	/********************************************************************************************
	1257	getStartingTreeFromTreeFile
	1258	*********************************************************************************************/
	1259	void gainLoss::getStartingTreeFromTreeFile(){
	1260	_tr= tree(gainLossOptions::_treeFile);
	1261	//if (!_tr.withBranchLength()) _tr.createFlatLengthMatrix(0.1); // not required, checked before
	1262	}
	1263
	1264	/********************************************************************************************
	1265	getStartingTreeNJ_fromDistances
	1266	*********************************************************************************************/
	1267	void gainLoss::getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector<string>& vNames) {
	1268	NJalg nj1;
	1269	_tr= nj1.computeTree(disTab,vNames);
	1270	ofstream f;
	1271	string fileName1=gainLossOptions::_treeOutFile;
	1272	f.open(fileName1.c_str());
	1273	_tr.output(f);
	1274	f.close();
	1275	}
	1276	/********************************************************************************************
	1277	*********************************************************************************************/
	1278	//void gainLoss::initMissingDataInfo(){
	1279	// //if(gainLossOptions::_accountForMissingData){
	1280	// // //gainLossAlphabet alph;
	1281	// // //_scZero.startZeroSequenceContainerGL(_sc,gainLossAlphabet());
	1282	// // //_LforMissingDataPerCat.resize(_sp->categories());
	1283	// // //_pLforMissingDataPerCat = &_LforMissingDataPerCat;
	1284	//
	1285	//
	1286	// // //_plogLforMissingData = &_logLforMissingData;
	1287	// // //_plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scZero,_sp);
	1288	//
	1289	// // computePijGam pi;
	1290	// // pi.fillPij(_tr,*_sp);
	1291	// // _pLforMissingDataPerCat = likelihoodComputationGL::getLofPosPerCat(0,_tr,_scZero,pi,_sp);
	1292	// // //_plogLforMissingData = log(likelihoodComputationGL::getLofPos(0,_tr,_scZero,pi,_sp,*_pLforMissingDataPerCat)); // cause error in tree destructor
	1293	// //}
	1294	// //else{
	1295	// // //_plogLforMissingData = NULL;
	1296	// // _pLforMissingDataPerCat = NULL;
	1297	// //}
	1298	//}
	1299
	1300
	1301
	1302	// Optimizations
	1303	/********************************************************************************************
	1304	*********************************************************************************************/
	1305	void gainLoss::startOptimizations(){
	1306	LOGnOUT(4,<<"\n\n *** Start Optimizations"<<endl);
	1307	time_t t1,t2;
	1308	time(&t1);
	1309
	1310	//bool isScaleTree = false;
	1311	bool isBBL = true; // in optimizer also check gainLossOptions::_isBBL. This if to differ from manyStarts
	1312	MDOUBLE epsilonOptimizationCorrected = gainLossOptions::_epsilonOptimizationIterationCycle;
	1313	MDOUBLE epsilonOptimizationModelCorrected = gainLossOptions::_epsilonOptimizationModel;
	1314	MDOUBLE epsilonOptimizationBBLCorrected = gainLossOptions::_epsilonOptimizationBBL;;
	1315	if(gainLossOptions::_correctOptimizationEpsilon && abs(_logL)>100 ){ // if Likelihood is computed with very bad seed - misleading
	1316	LOGnOUT(4,<<" Modify epsilonOptimizations according to logL: logL ("<<abs(_logL)<<") * originalEpsilon * percent improve ("<<gainLossOptions::_percentOfImprov<<") "<<endl);
	1317	epsilonOptimizationCorrected = min(abs(_logL) * gainLossOptions::_epsilonOptimizationIterationCycle * gainLossOptions::_percentOfImprov, gainLossOptions::_epsilonOptimizationIterationCycle*10);
	1318	epsilonOptimizationModelCorrected = min(abs(_logL) * gainLossOptions::_epsilonOptimizationModel * gainLossOptions::_percentOfImprov, gainLossOptions::_epsilonOptimizationModel*10);
	1319	epsilonOptimizationBBLCorrected = min(abs(_logL) * gainLossOptions::_epsilonOptimizationBBL * gainLossOptions::_percentOfImprov, gainLossOptions::_epsilonOptimizationBBL*10);
	1320	LOGnOUT(4,<<"eOptCorrected(cycle)=\t"<<epsilonOptimizationCorrected<<"\neOptModelCorrected=\t"<<epsilonOptimizationModelCorrected<<"\neOptBBLCorrected=\t"<<epsilonOptimizationBBLCorrected<<endl);
	1321	}
	1322	if(_sc.seqLen()<50){
	1323	LOGnOUT(4,<<"\n WARN: no branch length estimation is performed with too few positions ="<<_sc.seqLen()<<endl);
	1324	Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0");
	1325	Parameters::updateParameter("_performOptimizationsBBL","0");
	1326	isBBL =false;
	1327	}
	1328
	1329	// NOTE:! This is within block of _performOptimizations to allow, if explicit param request - i.e., only Tree optimization
	1330	if(Parameters::getInt("_isMultipleAllBranchesByFactorAtStart") ){ // else unstable
	1331	if(_sc.seqLen()<50){
	1332	LOGnOUT(4,<<"\n WARN: Skip MultipleAllBranchesByFactorAtStart with too few number of positions "<<_sc.seqLen()<<endl);
	1333	multipleAllBranchesByFactorAtStartByMaxParsimonyCost(_CostOfTreeMP);
	1334
	1335	}else{
	1336	multipleAllBranchesByFactorAtStart(epsilonOptimizationBBLCorrected);
	1337	}
	1338	}
	1339
	1340	if(Parameters::getInt("_isBBLEMwithSimpleSpBeforeFullOptimization") ){
	1341	bBLEMwithSimpleSpBeforeFullOptimization(_tr,_sc,_spSimple,_sp,_spVVec,_gainDist,_lossDist,_unObservableData_p);
	1342	}
	1343
	1344	if(Parameters::getInt("_performOptimizations") ){
	1345	// correctOptimizationEpsilon
	1346
	1347
	1348	// optimize one Stochastic process
	1349	if(!gainLossOptions::_gainLossDist){
	1350	if(gainLossOptions::_initParamsAtRandPoints) initParamsAtRandPoints(gainLossOptions::_numberOfRandStartPoints,_sp,_unObservableData_p);
	1351	if(gainLossOptions::_performOptimizationsManyStarts)
	1352	optimizationsManyStarts(gainLossOptions::_epsilonOptimizationIterationCycleManyStarts,gainLossOptions::_maxNumOfIterationsManyStarts);
	1353	gainLossOptimizer glOpt(_tr,_sp,_scUniqPatterns,
	1354	epsilonOptimizationCorrected,gainLossOptions::_maxNumOfIterations,
	1355	epsilonOptimizationModelCorrected,gainLossOptions::_maxNumOfIterationsModel,
	1356	epsilonOptimizationBBLCorrected,gainLossOptions::_maxNumOfIterationsBBL,_weightsUniqPatterns,_unObservableData_p,
	1357	isBBL, gainLossOptions::_isbblLSWhenbblEMdontImprove);
	1358	_tr = glOpt.getOptTree();
	1359	_logL = glOpt.getBestL();
	1360	LOGnOUT(4,<<"# Best likelihood after optimization="<<_logL<<endl);
	1361	}
	1362
	1363	// optimize Mixture of Stochastic processes
	1364	else{
	1365	if(gainLossOptions::_initParamsAtRandPoints) initParamsAtRandPointsSPvv(gainLossOptions::_numberOfRandStartPoints,_spVVec,_gainDist,_lossDist,_unObservableData_p);
	1366	if(gainLossOptions::_performOptimizationsManyStarts)
	1367	optimizationsVVManyStarts(gainLossOptions::_epsilonOptimizationIterationCycleManyStarts,gainLossOptions::_maxNumOfIterationsManyStarts);
	1368	gainLossOptimizer glOpt(_tr,_spVVec,_gainDist,_lossDist,_scUniqPatterns,
	1369	epsilonOptimizationCorrected,gainLossOptions::_maxNumOfIterations,
	1370	epsilonOptimizationModelCorrected,gainLossOptions::_maxNumOfIterationsModel,
	1371	epsilonOptimizationBBLCorrected,gainLossOptions::_maxNumOfIterationsBBL,_weightsUniqPatterns,_unObservableData_p,
	1372	isBBL, gainLossOptions::_isbblLSWhenbblEMdontImprove); // only one set: epsilon,Iter for Model,BBL...
	1373	_tr = glOpt.getOptTree();
	1374	_logL = glOpt.getBestL();
	1375	LOGnOUT(4,<<"# Best likelihood after optimization="<<_logL<<endl);
	1376	}
	1377	}
	1378	// No Optimizations
	1379	else{
	1380	LOGnOUT(4,<<"NOTE: No optimization performed. Proceed with initialized parameter."<<endl);
	1381	}
	1382	// common lines:
	1383	bool _isTransferGainLossRateToFreq = false; // there is no point, if normalizing Q later...
	1384	if(_isTransferGainLossRateToFreq){
	1385	convertGainLossRatesToFreq();
	1386	}
	1387	if(gainLossOptions::_isAlphaEqBetaManipulation && _lossDist && _gainDist && isBetaOptimization(_lossDist) && isBetaOptimization(_gainDist) && !gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately /&& gainLossOptions::_performOptimizationsBBL/){
	1388	AlphaEqBetaManipulation();
	1389	}
	1390	time(&t2);
	1391	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	1392	}
	1393
	1394	/********************************************************************************************
	1395	*********************************************************************************************/
	1396	void gainLoss::printModellValuesOfParams()
	1397	{
	1398
	1399	string modelParams = gainLossOptions::_outDir + "//" + "EstimatedParameters.txt";
	1400	ofstream modelParamsStream(modelParams.c_str());
	1401	modelParamsStream<<"# Log-likelihood= "<<_logL<<endl;
	1402
	1403	if(gainLossOptions::_gainLossDist){
	1404	MDOUBLE bestGainAlpha=1;
	1405	MDOUBLE bestGainBeta=1;
	1406	if(isAlphaOptimization(_gainDist)){
	1407	bestGainAlpha=getRateAlpha(_gainDist);
	1408	LOGnOUT(4,<<"AlphaGain "<<bestGainAlpha <<endl);
	1409	modelParamsStream<<"_userAlphaGain "<<bestGainAlpha<<endl;
	1410	}
	1411	//if(isBetaOptimization(_gainDist))LOGnOUT(4,<<" BetaGain "<<getRateBeta(_gainDist) <<endl);
	1412	if(isBetaOptimization(_gainDist)){
	1413	bestGainBeta=getRateBeta(_gainDist);
	1414	LOGnOUT(4,<<"BetaGain "<<bestGainBeta<<endl);
	1415	LOGnOUT(4,<<" Gain Expectation = "<< rateExpectation(_gainDist)<<endl);
	1416	LOGnOUT(4,<<" Gain Expectancy = "<< bestGainAlpha/bestGainBeta<<endl);
	1417	LOGnOUT(4,<<" Gain Standard Deviation = "<< sqrt(bestGainAlpha/(bestGainBeta*bestGainBeta))<<endl);
	1418	modelParamsStream<<"_userBetaGain "<<bestGainBeta<<endl;
	1419	modelParamsStream<<"# Gain Expectation = "<<rateExpectation(_gainDist)<<endl;
	1420	modelParamsStream<<"# Gain Expectancy = "<<bestGainAlpha/bestGainBeta<<endl;
	1421	modelParamsStream<<"# Gain Standard Deviation = "<<sqrt(bestGainAlpha/(bestGainBeta*bestGainBeta))<<endl;
	1422	}
	1423	if(isInvariantOptimization(_gainDist, true)){
	1424	MDOUBLE probInvariantGain = static_cast<generalGammaDistributionPlusInvariant*>(_gainDist)->getInvProb();
	1425	LOGnOUT(4,<<" ProbInvariantGain "<<probInvariantGain <<endl);
	1426	modelParamsStream<<"_userProbInvariantGain "<<probInvariantGain<<endl;
	1427	}
	1428	MDOUBLE bestLossAlpha=1;
	1429	MDOUBLE bestLossBeta=1;
	1430	if(isAlphaOptimization(_lossDist)){
	1431	bestLossAlpha = getRateAlpha(_lossDist);
	1432	LOGnOUT(4,<<"AlphaLoss "<<bestLossAlpha <<endl);
	1433	modelParamsStream<<"_userAlphaLoss "<<bestLossAlpha<<endl;
	1434	}
	1435	if(isBetaOptimization(_lossDist)){
	1436	bestLossBeta=getRateBeta(_lossDist);
	1437	LOGnOUT(4,<<"BetaLoss "<<bestLossBeta<<endl);
	1438	LOGnOUT(4,<<" Loss Expectation = "<< rateExpectation(_lossDist)<<endl);
	1439	LOGnOUT(4,<<" Loss Expectancy = "<< bestLossAlpha/bestLossBeta<<endl);
	1440	LOGnOUT(4,<<" Loss Standard Deviation = "<< sqrt(bestLossAlpha/(bestLossBeta*bestLossBeta))<<endl);
	1441	modelParamsStream<<"_userBetaLoss "<<bestLossBeta<<endl;
	1442	modelParamsStream<<"# Loss Expectation = "<<rateExpectation(_lossDist)<<endl;
	1443	modelParamsStream<<"# Loss Expectancy = "<<bestLossAlpha/bestLossBeta<<endl;
	1444	modelParamsStream<<"# Loss Standard Deviation = "<<sqrt(bestLossAlpha/(bestLossBeta*bestLossBeta))<<endl;
	1445	}
	1446	if(isInvariantOptimization(_lossDist, true)){
	1447	MDOUBLE probInvariantLoss = static_cast<generalGammaDistributionPlusInvariant*>(_lossDist)->getInvProb();
	1448	LOGnOUT(4,<<" ProbInvariantLoss "<<probInvariantLoss <<endl);
	1449	modelParamsStream<<"_userProbInvariantLoss "<<probInvariantLoss<<endl;
	1450	}
	1451	LOGnOUT(4,<<" Expectancy(Gain)/Expectancy(Loss) ratio by Gamma Params= "<< (bestGainAlpha/bestGainBeta)/(bestLossAlpha/bestLossBeta)<<endl);
	1452	//LOGnOUT(4,<<" Expectancy(Gain/Loss) ratio by computation = "<< computeExpectationOfGainLossRatio(_gainDist, _lossDist)<<endl);
	1453	LOGnOUT(4,<<" Expectancy(Gain)/Expectancy(Loss) by computation = "<< computeExpOfGainByExpOfLossRatio(_gainDist, _lossDist)<<endl);
	1454	modelParamsStream<<"# GainLossRatio Expectation "<<computeExpOfGainByExpOfLossRatio(_gainDist, _lossDist)<<endl;
	1455	modelParamsStream<<"_userGainLossRatio "<<(bestGainAlpha/bestGainBeta)/(bestLossAlpha/bestLossBeta)<<endl;
	1456
	1457	if (gainLossOptions::_isRootFreqEQstationary){
	1458	MDOUBLE estimatedStationaryFreq = computeExpectationOfStationaryFrequency(_gainDist, _lossDist);
	1459	LOGnOUT(4,<<" Stationary '1' Freq at the root - for each stochastic process g/(g+l), with expectation of "<<estimatedStationaryFreq<<endl);
	1460	modelParamsStream<<"# Stationary '1' Freq at the root - for each stochastic process g/(g+l), with expectation of "<<estimatedStationaryFreq<<endl;
	1461	}
	1462	else{
	1463	MDOUBLE thetaVal = static_cast<gainLossModel>((_spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta();
	1464	switch (gainLossOptions::_characterFreqEval){
	1465	case (gainLossOptions::FiftyFifty):
	1466	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<thetaVal<<endl);
	1467	modelParamsStream<<"# frequencies were set to FiftyFifty "<<thetaVal<<endl;
	1468	break;
	1469	case (gainLossOptions::LeavesAve):
	1470	LOGnOUT(4,<<"frequencies are based on LeavesAve (-F option) "<<thetaVal<<endl);
	1471	modelParamsStream<<"# frequencies are based on LeavesAve (-F option) "<<thetaVal<<endl;
	1472	break;
	1473	case (gainLossOptions::optimizeOverTree):
	1474	LOGnOUT(4,<<"Theta "<<thetaVal <<endl);
	1475	modelParamsStream<<"_userTheta "<<thetaVal<<endl;
	1476	break;
	1477	}
	1478	}
	1479	}
	1480
	1481	else{
	1482	if(isAlphaOptimization(_sp->distr())){
	1483	LOGnOUT(4,<<" AlphaRate "<<getRateAlpha(_sp->distr()) <<endl);
	1484	modelParamsStream<<"_userAlphaRate "<<getRateAlpha(_sp->distr())<<endl;
	1485	}
	1486	if(isBetaOptimization(_sp->distr())){
	1487	LOGnOUT(4,<<" BetaRate "<<getRateBeta(_sp->distr()) <<endl);
	1488	modelParamsStream<<"_userBetaRate "<<getRateBeta(_sp->distr())<<endl;
	1489	}
	1490	if(isInvariantOptimization(_sp->distr(), true)){
	1491	MDOUBLE probInvariantRate = static_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())->getInvProb();
	1492	LOGnOUT(4,<<" ProbInvariantRate "<<probInvariantRate <<endl);
	1493	modelParamsStream<<"_userProbInvariantRate "<<probInvariantRate<<endl;
	1494	}
	1495
	1496	MDOUBLE gain = static_cast<gainLossModel>((_sp).getPijAccelerator()->getReplacementModel())->getMu1();
	1497	LOGnOUT(4,<<" Gain "<<gain <<endl);
	1498	modelParamsStream<<"_userGain "<<gain<<endl;
	1499	MDOUBLE loss;
	1500	if(!gainLossOptions::_isReversible){
	1501	loss = static_cast<gainLossModelNonReversible>((_sp).getPijAccelerator()->getReplacementModel())->getMu2();
	1502	LOGnOUT(4,<<" Loss "<<loss<<endl);
	1503	modelParamsStream<<"_userLoss "<<loss<<endl;
	1504	}
	1505	else{
	1506	loss = static_cast<gainLossModel>((_sp).getPijAccelerator()->getReplacementModel())->getMu2();
	1507	}
	1508	LOGnOUT(4,<<" Gain/Loss ratio= "<< gain/loss<<endl);
	1509	modelParamsStream<<"_userGainLossRatio "<<gain/loss<<endl;
	1510
	1511	if (gainLossOptions::_isRootFreqEQstationary){
	1512	LOGnOUT(4,<<" Stationary '1' Freq at the root (g/(g+l) = "<<gain/(gain+loss) <<endl);
	1513	modelParamsStream<<"# Stationary '1' Freq at the root (g/(g+l) = "<< gain/(gain+loss)<<endl;
	1514	}
	1515	else{
	1516	MDOUBLE thetaVal = static_cast<gainLossModel>((_sp).getPijAccelerator()->getReplacementModel())->getTheta();
	1517	switch (gainLossOptions::_characterFreqEval){
	1518	case (gainLossOptions::FiftyFifty):
	1519	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<thetaVal<<endl);
	1520	modelParamsStream<<"# frequencies were set to FiftyFifty "<<thetaVal<<endl;
	1521	break;
	1522	case (gainLossOptions::LeavesAve):
	1523	LOGnOUT(4,<<"frequencies are based on LeavesAve (-F option) "<<thetaVal<<endl);
	1524	modelParamsStream<<"# frequencies are based on LeavesAve (-F option) "<<thetaVal<<endl;
	1525	break;
	1526	case (gainLossOptions::optimizeOverTree):
	1527	LOGnOUT(4,<<" Theta ('1' at root):"<<thetaVal <<endl);
	1528	modelParamsStream<<"_userTheta "<<thetaVal<<endl;
	1529	break;
	1530	}
	1531	}
	1532	}
	1533	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	1534	modelParamsStream<<"# Total branch lengths:"<<_tr.getAllBranchesLengthSum()<<endl;
	1535	}
	1536
	1537	/********************************************************************************************
	1538	*********************************************************************************************/
	1539	void gainLoss::printModellValuesOfParams(tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist)
	1540	{
	1541	MDOUBLE bestGainAlpha=1;
	1542	MDOUBLE bestGainBeta=1;
	1543	if(isAlphaOptimization(gainDist)){
	1544	bestGainAlpha=getRateAlpha(gainDist);
	1545	LOGnOUT(4,<<"AlphaGain "<<bestGainAlpha <<endl);
	1546	}
	1547	if(isBetaOptimization(gainDist)){
	1548	bestGainBeta=getRateBeta(gainDist);
	1549	LOGnOUT(4,<<"BetaGain "<<bestGainBeta<<endl);
	1550	LOGnOUT(4,<<" Gain Expectation = "<< rateExpectation(_gainDist)<<endl);
	1551	LOGnOUT(4,<<" Gain Expectancy = "<< bestGainAlpha/bestGainBeta<<endl);
	1552	LOGnOUT(4,<<" Gain Standard Deviation = "<< sqrt(bestGainAlpha/(bestGainBeta*bestGainBeta))<<endl);
	1553	}
	1554	if(isInvariantOptimization(gainDist, true)) LOGnOUT(4,<<" ProbInvariantGain "<<static_cast<generalGammaDistributionPlusInvariant*>(gainDist)->getInvProb() <<endl);
	1555	MDOUBLE bestLossAlpha=1;
	1556	MDOUBLE bestLossBeta=1;
	1557	if(isAlphaOptimization(lossDist)){
	1558	bestLossAlpha = getRateAlpha(lossDist);
	1559	LOGnOUT(4,<<"AlphaLoss "<<bestLossAlpha <<endl);
	1560	}
	1561	if(isBetaOptimization(lossDist)){
	1562	bestLossBeta=getRateBeta(lossDist);
	1563	LOGnOUT(4,<<"BetaLoss "<<bestLossBeta<<endl);
	1564	LOGnOUT(4,<<" Loss Expectation = "<< rateExpectation(_lossDist)<<endl);
	1565	LOGnOUT(4,<<" Loss Expectancy = "<< bestLossAlpha/bestLossBeta<<endl);
	1566	LOGnOUT(4,<<" Loss Standard Deviation = "<< sqrt(bestLossAlpha/(bestLossBeta*bestLossBeta))<<endl);
	1567	}
	1568	if(isInvariantOptimization(lossDist, true))LOGnOUT(4,<<" ProbInvariantLoss "<<static_cast<generalGammaDistributionPlusInvariant*>(lossDist)->getInvProb() <<endl);
	1569
	1570	LOGnOUT(4,<<" Expectancy(Gain)/Expectancy(Loss) ratio by Gamma Params= "<< (bestGainAlpha/bestGainBeta)/(bestLossAlpha/bestLossBeta)<<endl);
	1571	//LOGnOUT(4,<<" Expectancy(Gain/Loss) ratio by computation = "<< computeExpectationOfGainLossRatio(gainDist, lossDist)<<endl);
	1572	LOGnOUT(4,<<" Expectancy(Gain)/Expectancy(Loss) by computation = "<< computeExpOfGainByExpOfLossRatio(gainDist, lossDist)<<endl);
	1573
	1574	if (gainLossOptions::_isRootFreqEQstationary){
	1575	MDOUBLE estimatedStationaryFreq = computeExpectationOfStationaryFrequency(gainDist,lossDist);
	1576	LOGnOUT(4,<<" Stationary '1' Freq at the root - for each stochastic process g/(g+l), with expectation of "<<estimatedStationaryFreq<<endl);}
	1577	else{
	1578	switch (gainLossOptions::_characterFreqEval){
	1579	case (gainLossOptions::FiftyFifty):
	1580	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<static_cast<gainLossModel>((spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta()<<endl);
	1581	break;
	1582	case (gainLossOptions::LeavesAve):
	1583	LOGnOUT(4,<<"frequencies are based on LeavesAve (-F option) "<<static_cast<gainLossModel>((spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta()<<endl);
	1584	break;
	1585	case (gainLossOptions::optimizeOverTree):
	1586	LOGnOUT(4,<<"Theta "<<static_cast<gainLossModel>((spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta() <<endl);
	1587	break;
	1588	}
	1589	}
	1590	LOGnOUT(4,<<" Total branch lengths:"<<tr.getAllBranchesLengthSum() <<endl);
	1591	}
	1592
	1593	/********************************************************************************************
	1594	*********************************************************************************************/
	1595	void gainLoss::printModellValuesOfParams(stochasticProcess* sp, tree& tr)
	1596	{
	1597	if(isAlphaOptimization(sp->distr()))LOGnOUT(4,<<" AlphaRate "<<getRateAlpha(sp->distr()) <<endl);
	1598	if(isBetaOptimization(sp->distr()))LOGnOUT(4,<<" BetaRate "<<getRateBeta(sp->distr()) <<endl);
	1599	MDOUBLE gain = static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->getMu1();
	1600	LOGnOUT(4,<<" Gain "<<gain <<endl);
	1601	MDOUBLE loss;
	1602	if(!gainLossOptions::_isReversible){
	1603	loss = static_cast<gainLossModelNonReversible>((sp).getPijAccelerator()->getReplacementModel())->getMu2();
	1604	LOGnOUT(4,<<" Loss "<<loss<<endl);
	1605	LOGnOUT(4,<<" Gain/Loss ratio= "<< gain/loss<<endl);
	1606	}
	1607	else{
	1608	loss = static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->getMu2();
	1609	}
	1610	if (gainLossOptions::_isRootFreqEQstationary){
	1611	LOGnOUT(4,<<" Stationary '1' Freq at the root (g/(g+l) = "<<gain/(gain+loss) <<endl);}
	1612	else{
	1613	switch (gainLossOptions::_characterFreqEval){
	1614	case (gainLossOptions::FiftyFifty):
	1615	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->getTheta()<<endl);
	1616	break;
	1617	case (gainLossOptions::LeavesAve):
	1618	LOGnOUT(4,<<"frequencies are based on LeavesAve (-F option) "<<static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->getTheta()<<endl);
	1619	break;
	1620	case (gainLossOptions::optimizeOverTree):
	1621	LOGnOUT(4,<<" Theta ('1' at root):"<<static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->getTheta() <<endl);
	1622	break;
	1623	}
	1624	}
	1625	LOGnOUT(4,<<" Total branch lengths:"<<tr.getAllBranchesLengthSum() <<endl);
	1626	}
	1627
	1628
	1629
	1630
	1631	/********************************************************************************************
	1632	*********************************************************************************************/
	1633	void gainLoss::optimizationsManyStarts(const MDOUBLE epsilonOptimization, const int numIterations){
	1634	int bestModel=0;
	1635	MDOUBLE epsilonOptimizationCorrected = min(epsilonOptimization, abs(_logL)*gainLossOptions::_percentOfImprovManySarts);
	1636	LOGnOUT(4,<<"\n\n --- start optimizationsManyStarts for "<<gainLossOptions::_numberOfRandPointsInOptimization<<" rand points, with epsilonIteration "<<epsilonOptimizationCorrected<<endl);
	1637
	1638	Vdouble likeVecOpt;
	1639	likeVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1640	vector<stochasticProcess*> spVecOpt;
	1641	spVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1642	vector<tree> trVecOpt;
	1643	trVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1644
	1645	for(int i=0; i<gainLossOptions::_numberOfRandPointsInOptimization; ++i){
	1646	LOGnOUT(4,<<"\n\n-------startOptimization "<<i+1<<endl);
	1647	stochasticProcess* sp = _sp->clone();
	1648	tree tr = _tr;
	1649	unObservableData* currUnObs;
	1650	if(_unObservableData_p)
	1651	currUnObs = _unObservableData_p->clone();
	1652	else
	1653	currUnObs = NULL;
	1654
	1655	// initialize
	1656	initParamsAtRandPoints(gainLossOptions::_numberOfRandStartPoints,sp,currUnObs);
	1657	// optimize
	1658	//cout<<"before: "<<currUnObs->getlogLforMissingData()<<endl;
	1659	bool isbblLSWhenbblEMdontImprove = false;
	1660	gainLossOptimizer glOpt(tr,sp,_scUniqPatterns,
	1661	epsilonOptimizationCorrected,numIterations,
	1662	epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_Model,
	1663	(int)floor(numIterations*gainLossOptions::_numIterationsFactor_Model),
	1664	epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_BBL,
	1665	(int)floor(numIterations*gainLossOptions::_numIterationsFactor_BBL),
	1666	_weightsUniqPatterns,
	1667	currUnObs,(bool)Parameters::getInt("_performOptimizationsBBLManyStarts"), isbblLSWhenbblEMdontImprove);
	1668	//cout<<"after: "<<currUnObs->getlogLforMissingData()<<endl;
	1669
	1670	tr = glOpt.getOptTree();
	1671	spVecOpt[i]=sp;
	1672	trVecOpt[i]=tr;
	1673	likeVecOpt[i]=glOpt.getBestL();
	1674	//if(currUnObs){
	1675	// currUnObs->setLforMissingData(tr,sp);
	1676	//}
	1677	MDOUBLE estL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs);
	1678	if(!DEQUAL(likeVecOpt[i],estL)){
	1679	LOGnOUT(3,<<" --- error: different likelihood after optimizeGainLossModel,diff= "<<likeVecOpt[i]-estL <<"\n");
	1680	}
	1681	if(likeVecOpt[i]>likeVecOpt[bestModel])
	1682	bestModel = i;
	1683	LOGnOUT(4,<<"-------L= "<<likeVecOpt[i]<<endl);
	1684	if(currUnObs) delete currUnObs;
	1685	}
	1686	_sp = spVecOpt[bestModel];
	1687	_tr = trVecOpt[bestModel];
	1688	if(Parameters::getInt("_accountForMissingData"))
	1689	_unObservableData_p->setLforMissingData(_tr,_sp);
	1690
	1691	LOGnOUT(4,<<" --- likelihood of All models: "<<endl);
	1692	for(int i=0; i<gainLossOptions::_numberOfRandPointsInOptimization; ++i){
	1693	LOGnOUT(4,<<"likelihood of model "<<i+1<<"\t"<<likeVecOpt[i]<<endl);
	1694	if((spVecOpt[i]) && (i!=bestModel)) {delete spVecOpt[i];}
	1695	}
	1696	LOGnOUT(4,<<"likelihood of Best model "<<bestModel+1<<"\t"<<likeVecOpt[bestModel]<<endl);
	1697	MDOUBLE lll = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p);
	1698	if(!DEQUAL(likeVecOpt[bestModel],lll)){
	1699	LOGnOUT(3,<<"ERROR: re-computed likelihood is diff by- "<<likeVecOpt[bestModel]<<lll<<endl);
	1700	}
	1701	}
	1702
	1703
	1704	/********************************************************************************************
	1705	*********************************************************************************************/
	1706	void gainLoss::optimizationsVVManyStarts(const MDOUBLE epsilonOptimization, const int numIterations){
	1707	int bestModel=0;
	1708	Vdouble likeVecOpt;
	1709	MDOUBLE epsilonOptimizationCorrected = min(epsilonOptimization, abs(_logL)*gainLossOptions::_percentOfImprovManySarts);
	1710	LOGnOUT(4,<<"\n\n --- start optimizationsVVManyStarts for "<<gainLossOptions::_numberOfRandPointsInOptimization<<" rand points, with epsilonIteration "<<epsilonOptimizationCorrected<<endl);
	1711
	1712	likeVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1713	vector<vector<vector<stochasticProcess*> > > spVVVecOpt;
	1714	spVVVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1715	vector<distribution*> gainDistVecOpt;
	1716	gainDistVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1717	vector<distribution*> lossDistVecOpt;
	1718	lossDistVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1719	vector<tree> trVecOpt;
	1720	trVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization);
	1721
	1722	for(int i=0; i<gainLossOptions::_numberOfRandPointsInOptimization; ++i){
	1723	LOGnOUT(4,<<"\n\n-------startOptimization "<<i+1<<endl);
	1724	tree tr = _tr;
	1725	distribution* gainDist =_gainDist->clone();
	1726	distribution* lossDist =_lossDist->clone();
	1727	vector<vector<stochasticProcess*> > spVVec;
	1728	spVVec.resize(_gainDist->categories());
	1729	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	1730	spVVec[gainCategor].resize(_lossDist->categories());
	1731	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	1732	spVVec[gainCategor][lossCategor] = _spVVec[gainCategor][lossCategor]->clone();
	1733	}
	1734	}
	1735	//stochasticProcess* sp = _sp->clone();
	1736	unObservableData* currUnObs;
	1737	if(_unObservableData_p)
	1738	currUnObs = _unObservableData_p->clone();
	1739	else
	1740	currUnObs = NULL;
	1741	//initialize random
	1742	initParamsAtRandPointsSPvv(gainLossOptions::_numberOfRandStartPoints,spVVec,gainDist,lossDist,currUnObs);
	1743	bool isbblLSWhenbblEMdontImprove = false;
	1744	gainLossOptimizer glOpt(tr,spVVec,gainDist,lossDist,_scUniqPatterns,
	1745	epsilonOptimizationCorrected,numIterations,
	1746	epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_Model,
	1747	(int)floor(numIterations*gainLossOptions::_numIterationsFactor_Model),
	1748	epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_BBL,
	1749	(int)floor(numIterations*gainLossOptions::_numIterationsFactor_BBL),
	1750	_weightsUniqPatterns, currUnObs,(bool)Parameters::getInt("_performOptimizationsBBLManyStarts"), isbblLSWhenbblEMdontImprove);
	1751	tr = glOpt.getOptTree();
	1752	spVVVecOpt[i]=spVVec;
	1753	gainDistVecOpt[i]=gainDist;
	1754	lossDistVecOpt[i]=lossDist;
	1755	trVecOpt[i]=tr;
	1756	likeVecOpt[i]=glOpt.getBestL();
	1757	if(likeVecOpt[i]>likeVecOpt[bestModel])
	1758	bestModel = i;
	1759	LOGnOUT(4,<<"-------L= "<<likeVecOpt[i]<<endl);
	1760	if(currUnObs) delete currUnObs;
	1761	}
	1762
	1763	_spVVec = spVVVecOpt[bestModel];
	1764	_gainDist = gainDistVecOpt[bestModel];
	1765	_lossDist = lossDistVecOpt[bestModel];
	1766	_tr = trVecOpt[bestModel];
	1767	if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	1768
	1769	for(int i=0; i<gainLossOptions::_numberOfRandPointsInOptimization; ++i){
	1770	LOGnOUT(4,<<"likelihood of model "<<i+1<<"\t"<<likeVecOpt[i]<<endl);
	1771	if((gainDistVecOpt[i]) && (i!=bestModel)) {delete gainDistVecOpt[i];}
	1772	if((lossDistVecOpt[i]) && (i!=bestModel)) {delete lossDistVecOpt[i];}
	1773	if((spVVVecOpt[i][0][0]) && (i!=bestModel)){
	1774	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	1775	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	1776	delete spVVVecOpt[i][gainCategor][lossCategor];
	1777	}
	1778	}
	1779	}
	1780	}
	1781	LOGnOUT(4,<<"likelihood of Best model "<<bestModel+1<<"\t"<<likeVecOpt[bestModel]<<endl);
	1782
	1783	}
	1784
	1785	/********************************************************************************************
	1786	initParamsAtIntervalPoints
	1787	*********************************************************************************************/
	1788	//void gainLoss::initParamsAtIntervalPoints(int pointIndex,int numOfPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out){
	1789	// int numberOfParameters = 1;
	1790	// bool optimizeAlpha = isAlphaOptimization(sp->distr());
	1791	// bool optimizeBeta = isBetaOptimization(sp->distr());
	1792	// bool optimizeMixture = isMixOptimization(sp->distr());
	1793	// bool probInvariant = isInvariantOptimization(sp->distr());
	1794	// bool evalTheta = isThetaOptimization();
	1795	// if(optimizeAlpha)
	1796	// ++numberOfParameters;
	1797	// if(optimizeBeta)
	1798	// ++numberOfParameters;
	1799	// if(evalTheta)
	1800	// ++numberOfParameters;
	1801	// if(probInvariant)
	1802	// ++numberOfParameters;
	1803	// if(optimizeMixture)
	1804	// ++numberOfParameters;
	1805	// if (!gainLossOptions::_isReversible)
	1806	// ++numberOfParameters;
	1807	//
	1808	// MDOUBLE numOfPointsPerParam = (MDOUBLE)numOfPoints/numberOfParameters;
	1809	//
	1810	//
	1811	//}
	1812
	1813
	1814
	1815	/********************************************************************************************
	1816	initParamsAtRandPoints
	1817	*********************************************************************************************/
	1818	void gainLoss::initParamsAtRandPoints(int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out){
	1819	time_t t1;
	1820	time(&t1);
	1821	time_t t2;
	1822
	1823	LOGnOUT(4,<<"Starting initParamsAtRandPoints with: numOfRandPoints="<<numOfRandPoints<<endl);
	1824	bool optimizeAlpha = isAlphaOptimization(sp->distr());
	1825	bool optimizeBeta = isBetaOptimization(sp->distr());
	1826	//bool optimizeMixture = isMixOptimization(sp->distr());
	1827	bool probInvariant = isInvariantOptimization(sp->distr());
	1828	bool evalTheta = isThetaOptimization();
	1829
	1830	MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,currUnObs);
	1831	MDOUBLE bestM1 =1;
	1832	MDOUBLE bestM2 =1;
	1833	MDOUBLE bestAlpha =1;
	1834	MDOUBLE bestBeta =1;
	1835	MDOUBLE bestTheta =0.5;
	1836	MDOUBLE bestprobInvariantRate =0.05;
	1837	bool isImprovedRandPoint = false;
	1838
	1839	MDOUBLE L =VERYSMALL;
	1840	MDOUBLE currM1;
	1841	MDOUBLE currM2;
	1842	MDOUBLE currAlpha;
	1843	MDOUBLE currBeta;
	1844	MDOUBLE currTheta;
	1845	MDOUBLE currprobInvariantRate;
	1846	int i;
	1847	for (i = 0; i < numOfRandPoints ; ++i) {
	1848	currM1 =talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userGainMin, gainLossOptions::_userGainMax);
	1849	if (!gainLossOptions::_isReversible) currM2=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userLossMin, gainLossOptions::_userLossMax);
	1850	if(optimizeAlpha) currAlpha=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaRateMin, gainLossOptions::_userAlphaRateMax);
	1851	if(optimizeBeta) currBeta=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaRateMin, gainLossOptions::_userBetaRateMax);
	1852	if(evalTheta) currTheta=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userThetaMin, gainLossOptions::_userThetaMax);
	1853	if(probInvariant) currprobInvariantRate =talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantRateMin, gainLossOptions::_userProbInvariantRateMax);
	1854	static_cast<gainLossModel*>(sp->getPijAccelerator()->getReplacementModel())->setMu1(currM1, gainLossOptions::_isReversible);
	1855	if (!gainLossOptions::_isReversible){
	1856	static_cast<gainLossModelNonReversible*>(sp->getPijAccelerator()->getReplacementModel())->setMu2(currM2); }
	1857	if(optimizeAlpha){
	1858	setRateAlpha(sp->distr(),currAlpha);; }
	1859	if(optimizeBeta){
	1860	setRateBeta(sp->distr(),currBeta); }
	1861	if(evalTheta){
	1862	static_cast<gainLossModel*>(sp->getPijAccelerator()->getReplacementModel())->setTheta(currTheta); }
	1863	if(probInvariant){
	1864	static_cast<generalGammaDistributionPlusInvariant*>(sp->distr())->setInvProb(currprobInvariantRate); }
	1865
	1866	// compute Likelihood
	1867	MDOUBLE sumPijQij = normalizeQ(sp);
	1868	if(currUnObs) currUnObs->setLforMissingData(_tr,sp);
	1869	L = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs);
	1870
	1871	//print
	1872	LOG(7,<<"--paramsSet: "<<i<<endl);
	1873	LOG(7,<<"mu1="<<currM1<<endl);
	1874	if (!gainLossOptions::_isReversible) LOG(7,<<"mu2="<<currM2<<endl);
	1875	if(optimizeAlpha) LOG(7,<<"AlphaRate="<<currAlpha<<endl);
	1876	if(optimizeBeta) LOG(7,<<"AlphaBeta="<<currBeta<<endl);
	1877	LOG(7,<<"likelihood is "<<L<<endl);
	1878
	1879
	1880	if(bestL < L){
	1881	bestM1 = currM1;
	1882	if (!gainLossOptions::_isReversible)bestM2 = currM2;
	1883	if(optimizeAlpha)bestAlpha = currAlpha;
	1884	if(optimizeBeta)bestBeta = currBeta;
	1885	if(evalTheta)bestTheta = currTheta;
	1886	if(probInvariant) bestprobInvariantRate = currprobInvariantRate;
	1887	bestL = L;
	1888	isImprovedRandPoint = true;
	1889	LOGnOUT(4,<<" logL improved = "<<L<<" at rand point "<<i<<endl);
	1890	}
	1891	if(isImprovedRandPoint && i>10) // The loop is break after improvement and numOfRandPoints/2
	1892	break;
	1893
	1894	}
	1895	// set best params after all rand points were calculated
	1896	static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->setMu1(bestM1,gainLossOptions::_isReversible);
	1897	if (!gainLossOptions::_isReversible)
	1898	static_cast<gainLossModelNonReversible>((sp).getPijAccelerator()->getReplacementModel())->setMu2(bestM2);
	1899	if(optimizeAlpha)
	1900	setRateAlpha((*sp).distr(),bestAlpha);
	1901	if(optimizeBeta)
	1902	setRateBeta((*sp).distr(),bestBeta);
	1903	if(evalTheta)
	1904	static_cast<gainLossModel>((sp).getPijAccelerator()->getReplacementModel())->setTheta(bestTheta);
	1905	if(probInvariant)
	1906	static_cast<generalGammaDistributionPlusInvariant*>(sp->distr())->setInvProb(bestprobInvariantRate);
	1907	if(currUnObs) currUnObs->setLforMissingData(_tr,sp);
	1908	L = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs);
	1909	time(&t2);
	1910	LOGnOUT(4,<<"End initParamsAtRandPoints after "<<i<<" randPoints with:\n bestGain "<<bestM1<<"\n bestLoss "<<bestM2<<
	1911	"\n bestAlpha "<<bestAlpha<<"\n bestBeta "<<bestBeta <<"\n bestTheta "<<bestTheta <<"\n bestlogL "<<L<<endl);
	1912	LOGnOUT(4,<<"initParamsAtRandPoints RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	1913	}
	1914
	1915
	1916	/********************************************************************************************
	1917	*********************************************************************************************/
	1918	void gainLoss::initParamsAtRandPointsSPvv(int numOfRandPoints, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist,unObservableData* currUnObs, ostream& out){
	1919	time_t t1;
	1920	time(&t1);
	1921	time_t t2;
	1922
	1923	LOGnOUT(4,<<"Starting initParamsAtRandPointsSPvv with: numOfRandPoints="<<numOfRandPoints<<endl);
	1924	MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,_spVVec, _gainDist,_lossDist,_weightsUniqPatterns,currUnObs);
	1925	stochasticProcess* sp = spVVec[0][0];
	1926
	1927	MDOUBLE bestGainAlpha=1; //Gain
	1928	MDOUBLE bestGainBeta=1;
	1929	MDOUBLE bestGainProbInvariant = 0.5;
	1930	MDOUBLE bestLossAlpha=1; // Loss (for non-reversible model only)
	1931	MDOUBLE bestLossBeta=1;
	1932	MDOUBLE bestLossProbInvariant = 0.5;
	1933	MDOUBLE bestAlpha=1; //Rate
	1934	MDOUBLE bestTheta = 0.5;
	1935	bool isImprovedRandPoint = false;
	1936
	1937	MDOUBLE L;
	1938	MDOUBLE currGainAlpha; //Gain
	1939	MDOUBLE currGainBeta;
	1940	MDOUBLE currGainProbInvariant;
	1941	MDOUBLE currLossAlpha; // Loss (for non-reversible model only)
	1942	MDOUBLE currLossBeta;
	1943	MDOUBLE currLossProbInvariant;
	1944	MDOUBLE currAlpha; //Rate
	1945	MDOUBLE currTheta;
	1946
	1947	bool optimizeAlpha = isAlphaOptimization((sp->distr()));
	1948	bool optimizeBetaGain = isBetaOptimization(gainDist);
	1949	bool optimizeBetaLoss = isBetaOptimization(lossDist);
	1950	bool probInvariant = isInvariantOptimization(gainDist); //for both
	1951	bool evalTheta = isThetaOptimization();
	1952
	1953	int i;
	1954	for (i = 0; i < numOfRandPoints ; ++i) {
	1955	//rand make
	1956	currGainAlpha = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaGainMin, gainLossOptions::_userAlphaGainMax);
	1957	if(optimizeBetaGain) currGainBeta = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaGainMin, gainLossOptions::_userBetaGainMax);
	1958	if(probInvariant) currGainProbInvariant = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantGainMin, gainLossOptions::_userProbInvariantGainMax);
	1959	if (!gainLossOptions::_isReversible){
	1960	currLossAlpha= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaLossMin, gainLossOptions::_userAlphaLossMax);// Loss (for non-reversible model only)
	1961	if(optimizeBetaLoss) currLossBeta= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaLossMin, gainLossOptions::_userBetaLossMax);
	1962	if(probInvariant) currLossProbInvariant= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantLossMin, gainLossOptions::_userProbInvariantLossMax);
	1963	}
	1964	if(optimizeAlpha)
	1965	currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaRateMin, gainLossOptions::_userAlphaRateMax);
	1966	if(evalTheta)
	1967	//currTheta = talRandom::giveRandomNumberBetweenTwoPoints(max(0.0,(currTheta-0.1)), min(1.0,(currTheta+0.1)));
	1968	currTheta = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userThetaMin, gainLossOptions::_userThetaMax);
	1969
	1970	//set params
	1971	updateGainAlpha(currGainAlpha,spVVec,gainDist,lossDist);
	1972	if(optimizeBetaGain)
	1973	updateGainBeta(currGainBeta,spVVec,gainDist,lossDist);
	1974	if(probInvariant)
	1975	updateGainProbInvariant(currGainProbInvariant,gainDist);
	1976	if (!gainLossOptions::_isReversible){
	1977	updateLossAlpha(currLossAlpha,spVVec,gainDist,lossDist);
	1978	if(optimizeBetaLoss)
	1979	updateLossBeta(currLossBeta,spVVec,gainDist,lossDist);
	1980	if(probInvariant)
	1981	updateLossProbInvariant(currLossProbInvariant,lossDist);
	1982	}
	1983	if(optimizeAlpha)
	1984	updateRateAlpha(currAlpha,spVVec,gainDist,lossDist);
	1985	if (evalTheta)
	1986	updateTheta(currTheta,spVVec,gainDist,lossDist);
	1987
	1988	// compute Likelihood
	1989	MDOUBLE sumPijQij = normalizeQ(spVVec,gainDist,lossDist);
	1990	if(currUnObs) currUnObs->setLforMissingData(_tr,spVVec,gainDist,lossDist);
	1991	L = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec, gainDist,lossDist,_weightsUniqPatterns,currUnObs);
	1992
	1993	//print
	1994	LOG(7,<<"--paramsSet: "<<i<<endl);
	1995	LOG(7,<<"GainAlpha is "<<currGainAlpha<<endl);
	1996	if(optimizeBetaGain) LOG(7,<<"GainBeta is "<<currGainBeta<<endl);
	1997	if(probInvariant) LOG(7,<<"GainProbInvariant is "<<currGainProbInvariant<<endl);
	1998	if (!gainLossOptions::_isReversible){
	1999	LOG(7,<<"LossAlpha is "<<currLossAlpha<<endl);
	2000	if(optimizeBetaLoss) LOG(7,<<"LossBeta is "<<currLossBeta<<endl);
	2001	if(probInvariant) LOG(7,<<"LossProbInvariant is "<<currLossProbInvariant<<endl);
	2002	}
	2003	if(optimizeAlpha) LOG(7,<<"Alpha is "<<currAlpha<<endl);
	2004	if(evalTheta) LOG(7,<<"Theta is "<<currTheta<<endl);
	2005	LOG(7,<<"likelihood is "<<L<<endl);
	2006
	2007
	2008	if(bestL < L){
	2009	bestGainAlpha=currGainAlpha;
	2010	if(optimizeBetaGain) bestGainBeta=currGainBeta;
	2011	if(probInvariant) bestGainProbInvariant = currGainProbInvariant;
	2012	if (!gainLossOptions::_isReversible){
	2013	bestLossAlpha=currLossAlpha;
	2014	if(optimizeBetaLoss) bestLossBeta=currLossBeta;
	2015	if(probInvariant) bestLossProbInvariant = currLossProbInvariant;
	2016	}
	2017	if(optimizeAlpha) bestAlpha=currAlpha;
	2018	if(evalTheta) bestTheta = currTheta;
	2019	bestL = L;
	2020	isImprovedRandPoint = true;
	2021	LOGnOUT(4,<<" logL improved = "<<L<<" at rand point "<<i<<endl);
	2022	}
	2023	if(isImprovedRandPoint && i>10) // The loop is break after improvement and numOfRandPoints/2
	2024	break;
	2025
	2026	}
	2027	// set best params after all rand points were calculated
	2028	updateGainAlpha(bestGainAlpha,spVVec,gainDist,lossDist);
	2029	if(optimizeBetaGain) updateGainBeta(bestGainBeta,spVVec,gainDist,lossDist);
	2030	if(probInvariant) updateGainProbInvariant(bestGainProbInvariant,gainDist);
	2031	if (!gainLossOptions::_isReversible){
	2032	updateLossAlpha(bestLossAlpha,spVVec,gainDist,lossDist);
	2033	if(optimizeBetaLoss) updateLossBeta(bestLossBeta,spVVec,gainDist,lossDist);
	2034	if(probInvariant) updateLossProbInvariant(bestLossProbInvariant,lossDist);
	2035	}
	2036	if(optimizeAlpha)
	2037	updateRateAlpha(bestAlpha,spVVec,gainDist,lossDist);
	2038	if(evalTheta)
	2039	updateTheta(bestTheta,spVVec,gainDist,lossDist);
	2040	if(currUnObs) currUnObs->setLforMissingData(_tr,spVVec,gainDist,lossDist);
	2041	L = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec, gainDist,lossDist,_weightsUniqPatterns,currUnObs);
	2042	time(&t2);
	2043	LOGnOUT(4,<<"End initParamsAtRandPointsSPvv after "<<i<<" randPoints with:\n bestGainAlpha "<<bestGainAlpha<<"\n bestGainBeta "<<bestGainBeta<<
	2044	"\n bestLossAlpha "<<bestLossAlpha<<"\n bestLossBeta "<<bestLossBeta <<"\n bestTheta "<<bestTheta <<"\n bestlogL "<<L<<endl);
	2045	LOGnOUT(4,<<"initParamsAtRandPointsSPvv RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	2046	}
	2047
	2048
	2049
	2050
	2051
	2052	// computations
	2053	/********************************************************************************************
	2054	*********************************************************************************************/
	2055	void gainLoss::startRate4Site(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p)
	2056	{
	2057	LOGnOUT(4,<<endl<<"Starting rate4site..."<<endl);
	2058	time_t t1,t2;
	2059	time(&t1);
	2060
	2061	rate4siteGL r4s(sc,tr,sp,outDir, unObservableData_p);
	2062	r4s.run();
	2063
	2064	r4s.printRates();
	2065	r4s.printRatesNormalized();
	2066
	2067	_postProbPerCatPerPos = r4s.getLpostPerCat();
	2068	_rates = r4s.getRates();
	2069	_normalizedRates = r4s.getNormalizedRates();
	2070
	2071	time(&t2);
	2072	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2073	}
	2074	/********************************************************************************************
	2075	*********************************************************************************************/
	2076	void gainLoss::startGainLoss4Site(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> > spVVec,distribution gainDist,distribution* lossDist,
	2077	string& outDir, unObservableData* unObservableData_p)
	2078	{
	2079	LOGnOUT(4,<<endl<<"Starting gain4site and loss4site..."<<endl);
	2080	time_t t1;
	2081	time(&t1);
	2082	time_t t2;
	2083
	2084	gainLoss4site gl4s(sc,tr,spVVec,gainDist,lossDist,outDir,unObservableData_p);
	2085	gl4s.computeGain4Site();
	2086	gl4s.computeLoss4Site();
	2087	gl4s.printGain4Site();
	2088	gl4s.printLoss4Site();
	2089
	2090	_postProbPerSpPerCatPerPos = gl4s.getLpostPerSpPerCat();
	2091
	2092	time(&t2);
	2093	LOGnOUT(4,<<"computeEB_EXP_GL4Site RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	2094	}
	2095
	2096	/********************************************************************************************
	2097	*********************************************************************************************/
	2098	void gainLoss::computePosteriorExpectationOfChangeRunOnly()
	2099	{
	2100	LOGnOUT(4,<<endl<<"Starting computePosteriorExpectationOfChangeRunOnly..."<<endl);
	2101	time_t t1,t2;
	2102	time(&t1);
	2103
	2104	computeCountsGL* countsGL =NULL;
	2105
	2106	if(!gainLossOptions::_gainLossDist){
	2107	if(_postProbPerCatPerPos.size()==0 )
	2108	{
	2109	//resizeMatrix(LpostPerCat,sp->categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later
	2110	if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	2111	LOGnOUT(4,<<endl<<"The required LpostPerCat is empty - run Rate4Site to compute."<<endl);
	2112	rate4siteGL r4s(_sc,_tr,_sp,gainLossOptions::_outDir, _unObservableData_p);
	2113	r4s.run();
	2114	_postProbPerCatPerPos = r4s.getLpostPerCat();
	2115	}
	2116	else{
	2117	_postProbPerCatPerPos.resize(1);
	2118	_postProbPerCatPerPos[0].resize(_sc.seqLen());
	2119	oneMatrix(_postProbPerCatPerPos);
	2120	}
	2121	}
	2122	countsGL = new computeCountsGL(_sc,_tr,_sp,gainLossOptions::_outDir,_postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2123	}
	2124	else{
	2125	if(_postProbPerSpPerCatPerPos.size()==0 )
	2126	{
	2127	LOGnOUT(4,<<endl<<"The required LpostPerSpPerCat is empty - run computeGain4Site to compute."<<endl);
	2128	gainLoss4site gl4s(_sc,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_unObservableData_p);
	2129	gl4s.computeGain4Site();
	2130	//gl4s.computeLoss4Site(); // No need to run both
	2131	_postProbPerSpPerCatPerPos = gl4s.getLpostPerSpPerCat();
	2132	}
	2133	countsGL = new computeCountsGL(_sc,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_postProbPerSpPerCatPerPos,_distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2134	}
	2135	countsGL->run();
	2136	_jointProb_PosNodeXY = countsGL->getJointProb();
	2137	if(countsGL) delete countsGL;
	2138	time(&t2);
	2139	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2140	}
	2141
	2142	/********************************************************************************************
	2143	Main version, if used with other data (not the gainLoss _sc,_tr,_sp,.. members) other version required
	2144	*********************************************************************************************/
	2145	void gainLoss::startComputePosteriorExpectationOfChange()
	2146	{
	2147	LOGnOUT(4,<<endl<<"Starting calculePosteriorExpectationOfChange..."<<endl);
	2148	time_t t1,t2;
	2149	time(&t1);
	2150
	2151	computeCountsGL* countsGL =NULL;
	2152	rate4siteGL* r4s=NULL;
	2153	gainLoss4site* gl4s=NULL;
	2154
	2155	if(!gainLossOptions::_gainLossDist){
	2156	if(_postProbPerCatPerPos.size()==0 )
	2157	{
	2158	//resizeMatrix(LpostPerCat,sp->categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later
	2159	if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	2160	LOGnOUT(4,<<endl<<"The required LpostPerCat is empty - run Rate4Site to compute."<<endl);
	2161	r4s = new rate4siteGL(_sc,_tr,_sp,gainLossOptions::_outDir, _unObservableData_p);
	2162	r4s->run();
	2163	_postProbPerCatPerPos = r4s->getLpostPerCat();
	2164	if(r4s) delete r4s;
	2165	}
	2166	else{
	2167	_postProbPerCatPerPos.resize(1);
	2168	_postProbPerCatPerPos[0].resize(_sc.seqLen());
	2169	oneMatrix(_postProbPerCatPerPos);
	2170	}
	2171	}
	2172	countsGL = new computeCountsGL(_sc,_tr,_sp,gainLossOptions::_outDir,_postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2173	}
	2174	else{
	2175	if(_postProbPerSpPerCatPerPos.size()==0 )
	2176	{
	2177	LOGnOUT(4,<<endl<<"The required LpostPerSpPerCat is empty - run computeGain4Site to compute."<<endl);
	2178	gl4s = new gainLoss4site(_sc,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_unObservableData_p);
	2179	gl4s->computeGain4Site();
	2180	//gl4s.computeLoss4Site(); // No need to run both
	2181	_postProbPerSpPerCatPerPos = gl4s->getLpostPerSpPerCat();
	2182	if(gl4s) delete gl4s;
	2183	}
	2184	countsGL = new computeCountsGL(_sc,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_postProbPerSpPerCatPerPos,_distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2185	}
	2186	countsGL->run();
	2187	countsGL->printProbExp(); // Expectation and Probability PerPos
	2188	countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch
	2189	countsGL->printExpectationPerBranch(); // sum over all pos
	2190	countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1);
	2191	countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0);
	2192	countsGL->printProbabilityPerPosPerBranch(); // with probCutOff
	2193
	2194	if(gainLossOptions::_isFewCutOffCounts)
	2195	countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent);
	2196	else
	2197	countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts);
	2198
	2199	if(gainLossOptions::_printPropExpOfChangeFullData){
	2200	MDOUBLE probCutOffPrintEvent = 0; // if <0.05 results with a huge file
	2201	countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts);
	2202	}
	2203	if(gainLossOptions::_printExpPerPosPerBranchMatrix){
	2204	countsGL->printExpPerPosPerBranchMatrix(0,1);
	2205	countsGL->printExpPerPosPerBranchMatrix(1,0);
	2206	}
	2207	if(gainLossOptions::_printTreesWithExpectationValuesAsBP){
	2208	countsGL->printTreesWithExpectationValuesAsBP();
	2209	}
	2210	if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){
	2211	countsGL->printTreesWithProbabilityValuesAsBP();
	2212	}
	2213	//if(gainLossOptions::_saveProbChanges_PosNodeXY){ // the computedProbChanges_PosNodeXY is saved to be used
	2214	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),_jointProb_PosNodeXY);
	2215	_jointProb_PosNodeXY = countsGL->getJointProb();
	2216	//}
	2217	_SMPerPos = countsGL->get_expV();
	2218	_expChanges_PosNodeXY = countsGL->getExpChanges();
	2219
	2220	_gainPerPos =countsGL-> get_expV01();
	2221	_lossPerPos = countsGL-> get_expV10();
	2222
	2223	_meanGain = computeAverage(countsGL-> get_expV01());
	2224	_meanLoss = computeAverage(countsGL-> get_expV10());
	2225	_medianGain = computeMedian(countsGL-> get_expV01());
	2226	_medianLoss = computeMedian(countsGL-> get_expV10());
	2227	LOGnOUT(4,<<"Mean values Gain="<<_meanGain<<"\tLoss="<<_meanLoss<<endl);
	2228	LOGnOUT(4,<<"Median values Gain="<<_medianGain<<"\tLoss="<<_medianLoss<<endl<<endl);
	2229	if(countsGL) delete countsGL;
	2230
	2231	time(&t2);
	2232	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (calculePosteriorExpectationOfChange)"<<endl<<endl);
	2233	}
	2234
	2235	/********************************************************************************************
	2236
	2237	*********************************************************************************************/
	2238	void gainLoss::startComputeAmongSitesCorrelations()
	2239	{
	2240	LOGnOUT(4,<<endl<<"Starting computeAmongSitesCorrelations..."<<endl);
	2241	time_t t1,t2;
	2242	time(&t1);
	2243
	2244	_scEvolvingSites = _sc;
	2245	if(gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping){
	2246	_expChanges_PosNodeXYSampledData = _MP_PosNodeXY;
	2247	_gainPerPosCorr = _gainMPPerPos;
	2248	_lossPerPosCorr = _lossMPPerPos;
	2249	}
	2250	else{
	2251	_expChanges_PosNodeXYSampledData = _expChanges_PosNodeXY;
	2252	_gainPerPosCorr = _gainPerPos;
	2253	_lossPerPosCorr = _lossPerPos;
	2254	}
	2255
	2256	if(Parameters::getInt("_isUpdateminNumOfMPEvent2RemoveSimulatedPositions")){
	2257	MDOUBLE minNumOfMPEvent2RemoveSimulatedPositions = Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions");
	2258	MDOUBLE addedMinNumOfMPEvent = (int)sqrt((double)_sc.numberOfSeqs())/5;
	2259	if(addedMinNumOfMPEvent>0){
	2260	Parameters::updateParameter("_minNumOfMPEvent2RemoveSimulatedPositions",double2string(addedMinNumOfMPEvent+minNumOfMPEvent2RemoveSimulatedPositions).c_str());
	2261	LOGnOUT(4,<<"Update _minNumOfMPEvent2RemoveSimulatedPositions to "<<addedMinNumOfMPEvent+minNumOfMPEvent2RemoveSimulatedPositions<<" from "<<minNumOfMPEvent2RemoveSimulatedPositions<<" with respect to "<<_sc.numberOfSeqs()<< " species"<<endl);
	2262	}
	2263	}
	2264	if(gainLossOptions::_isRemoveSimulatedPositionsBasedOnMP){
	2265	LOGnOUT(4,<<endl<<"Based on Maximum parsimony Remove positions from Real data (remove positions with no events)..."<<endl);
	2266	vector<int> posToRemove(_scEvolvingSites.seqLen(),false);
	2267	MDOUBLE minExpT_MP = Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions")/2;//
	2268	MDOUBLE Nmin = 0;
	2269	LOGnOUT(4,<<"min Number Of Max Parsimony Event to consider a Position is "<<minExpT_MP*2<<endl);
	2270	int numOfRemovedPos=0;
	2271	for (int pos = 0; pos<_sc.seqLen(); ++pos){
	2272	MDOUBLE Gain = _MPPerPos[pos][0][1];
	2273	MDOUBLE Loss = _MPPerPos[pos][1][0];
	2274	if(gainLossOptions::_isOnlyCorrelateWithBoth)
	2275	Nmin = computeNminRforCorrelWithGainAndLoss(Gain,Loss);
	2276	else
	2277	Nmin = max(Gain,Loss); // thus, position are removed only if both their gain and loss values are below minT
	2278	if(Nmin < minExpT_MP){
	2279	posToRemove[pos] = true;
	2280	_expChanges_PosNodeXYSampledData.erase(_expChanges_PosNodeXYSampledData.begin() + pos-numOfRemovedPos );
	2281	numOfRemovedPos++;
	2282	}else{
	2283	_evolvingSites.push_back(pos);
	2284	_numOfGapsTillSite.push_back(numOfRemovedPos);
	2285	}
	2286	}
	2287	_scEvolvingSites.removePositions(posToRemove);
	2288	LOGnOUT(4,<<"removed="<<numOfRemovedPos<<endl);
	2289	int numOfSimulatedPositionsAboveMinRate = _scEvolvingSites.seqLen();
	2290	LOGnOUT(4,<<"After remove numOfPositions="<<numOfSimulatedPositionsAboveMinRate<<endl);
	2291	}else{
	2292	for (int pos = 0; pos<_sc.seqLen(); ++pos){
	2293	_evolvingSites.push_back(pos);
	2294	_numOfGapsTillSite.push_back(0);
	2295	}
	2296	}
	2297	computeCorrelations* computeCorrel =NULL; // required for correlation computation
	2298	if(_expChanges_PosNodeXYSampledData.size()==0)
	2299	errorMsg::reportError("ERROR: Correlation request with empty mapping vector");
	2300	else
	2301	computeCorrel = new computeCorrelations(_tr, gainLossOptions::_outDir, &_expChanges_PosNodeXYSampledData);
	2302
	2303
	2304	if(gainLossOptions::_printComputedCorrelationsAllSites \|\| gainLossOptions::_selectedSitesForCorrelation==""){
	2305	LOGnOUT(4,<<"Correlate all sites (all-against-all, STRING style print)"<<endl);
	2306	//readIntegersFromFileIntoVector(_selectedSites,_scEvolvingSites.seqLen(), 0, NULL); // all sites in range
	2307	_selectedSites = _evolvingSites;
	2308	}else{
	2309	LOGnOUT(4,<<"printComputedCorrelations... read positions from "<<gainLossOptions::_selectedSitesForCorrelation<<endl);
	2310	readIntegersFromFileIntoVector(_selectedSites,_sc.seqLen()-1, 0, &gainLossOptions::_selectedSitesForCorrelation, &_evolvingSites); // fill _selectedSites
	2311	Vint numOfGapsTillSiteSelected; // fill numOfGapsTillSiteSelected => _numOfGapsTillSite
	2312	for(int i=0;i<_selectedSites.size();++i){
	2313	for(int j=0;j<_evolvingSites.size();++j){
	2314	if(_selectedSites[i]==_evolvingSites[j])
	2315	numOfGapsTillSiteSelected.push_back(_numOfGapsTillSite[j]);
	2316	}
	2317	}
	2318	_numOfGapsTillSite = numOfGapsTillSiteSelected;
	2319	}
	2320
	2321	//bool correlationForZscore = false;
	2322	//LOGnOUT(4,<<"Warning: isNormalizeForBranch is by branch length. correlationForZscore false by Default. Both with and withour branch"<<endl);
	2323	computeCorrel->runComputeCorrelations(_selectedSites, _numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute);
	2324
	2325	// required before print. Can't be done before - out of vec: _expChanges_PosNodeXYSampledData index
	2326	if(gainLossOptions::_isPrintCorrelationsOfAllPairs_Corr)
	2327	computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute);
	2328
	2329	//if(gainLossOptions::_performParametricBootstapCorrelation){ // later use these values to print rank according to simulations
	2330	_correlationsPerSitePerPosVec = computeCorrel->getcorrelationPerSitePerPosVec();
	2331	_correlationsPerSitePerPosVecSampledData = _correlationsPerSitePerPosVec;
	2332	//}
	2333	//else{ // else we'll print it later, while taking into account simulations
	2334	// computeCorrel->printComputedCorrelations(selectedSites, true/, correlationForZscore/);
	2335	//}
	2336	if(computeCorrel) delete computeCorrel;
	2337	time(&t2);
	2338	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (computeAmongSitesCorrelations)"<<endl<<endl);
	2339	}
	2340
	2341	/********************************************************************************************
	2342	*********************************************************************************************/
	2343	void gainLoss::startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, stochasticProcess* sp
	2344	, VVdouble LpostPerCat, unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent ,bool isUpdateMPPerPos)
	2345	{
	2346	LOGnOUT(4,<<endl<<"Starting calculePosteriorExpectationOfChange..."<<endl);
	2347	time_t t1,t2;
	2348	time(&t1);
	2349
	2350	computeCountsGL* countsGL;
	2351	if(LpostPerCat.size()==0 )
	2352	{
	2353	LOGnOUT(4,<<endl<<"The required LpostPerCat is empty - run Rate4Site to compute."<<endl);
	2354	//resizeMatrix(LpostPerCat,sp->categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later
	2355	if(sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	2356	rate4siteGL r4s(sc,tr,sp,outDir, unObservableData_p);
	2357	r4s.run();
	2358	LpostPerCat = r4s.getLpostPerCat();
	2359	}
	2360	else{
	2361	oneMatrix(LpostPerCat);
	2362	}
	2363	}
	2364	countsGL = new computeCountsGL(sc,tr,sp,outDir,LpostPerCat,distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2365
	2366	countsGL->run();
	2367	countsGL->printProbExp(); // Expectation and Probability PerPos
	2368	countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch
	2369	countsGL->printExpectationPerBranch(); // sum over all pos
	2370	countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1);
	2371	countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0);
	2372	countsGL->printProbabilityPerPosPerBranch(); // with probCutOff
	2373	if(gainLossOptions::_isFewCutOffCounts)
	2374	countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent);
	2375	else
	2376	countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts);
	2377
	2378	if(gainLossOptions::_printPropExpOfChangeFullData){
	2379	MDOUBLE probCutOffPrintEvent = 0.0; // if <0.05 results with a huge file
	2380	countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts);
	2381	}
	2382	if(gainLossOptions::_printExpPerPosPerBranchMatrix){
	2383	countsGL->printExpPerPosPerBranchMatrix(0,1);
	2384	countsGL->printExpPerPosPerBranchMatrix(1,0);
	2385	}
	2386	if(gainLossOptions::_printTreesWithExpectationValuesAsBP){
	2387	countsGL->printTreesWithExpectationValuesAsBP();
	2388	}
	2389	if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){
	2390	countsGL->printTreesWithProbabilityValuesAsBP();
	2391	}
	2392	if(isUpdateMPPerPos)
	2393	_SMPerPos = countsGL->get_expV();
	2394	if(countsGL) delete countsGL;
	2395	time(&t2);
	2396	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2397	}
	2398	/********************************************************************************************
	2399	*********************************************************************************************/
	2400	void gainLoss::startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution* lossDist
	2401	, VVVdouble& LpostPerSpPerCat,unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos)
	2402	{
	2403	LOGnOUT(4,<<endl<<"Starting calculePosteriorExpectationOfChange..."<<endl);
	2404	time_t t1,t2;
	2405	time(&t1);
	2406	computeCountsGL* countsGL;
	2407	if(LpostPerSpPerCat.size()==0 )
	2408	{
	2409	LOGnOUT(4,<<endl<<"The required LpostPerSpPerCat is empty - run computeGain4Site to compute."<<endl);
	2410	gainLoss4site gl4s(sc,tr,spVVec,gainDist,lossDist,outDir,unObservableData_p);
	2411	gl4s.computeGain4Site();
	2412	//gl4s.computeLoss4Site(); // No need to run both
	2413	LpostPerSpPerCat = gl4s.getLpostPerSpPerCat();
	2414
	2415	}
	2416	countsGL = new computeCountsGL(sc,tr,spVVec,gainDist,lossDist,outDir,LpostPerSpPerCat,distanceFromNearestOTUForRecent);
	2417
	2418	countsGL->run();
	2419	countsGL->printProbExp(); // Expectation and Probability PerPos
	2420	countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch
	2421	countsGL->printExpectationPerBranch(); // sum over all pos
	2422	countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1);
	2423	countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0);
	2424	countsGL->printProbabilityPerPosPerBranch(); // with probCutOff
	2425	if(gainLossOptions::_isFewCutOffCounts)
	2426	countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent);
	2427	else
	2428	countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts);
	2429
	2430	if(gainLossOptions::_printPropExpOfChangeFullData){
	2431	MDOUBLE probCutOffPrintEvent = 0.0; // if <0.05 results with a huge file
	2432	countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts);
	2433	}
	2434	if(gainLossOptions::_printExpPerPosPerBranchMatrix){
	2435	countsGL->printExpPerPosPerBranchMatrix(0,1);
	2436	countsGL->printExpPerPosPerBranchMatrix(1,0);
	2437	}
	2438	if(gainLossOptions::_printTreesWithExpectationValuesAsBP){
	2439	countsGL->printTreesWithExpectationValuesAsBP();
	2440	}
	2441	if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){
	2442	countsGL->printTreesWithProbabilityValuesAsBP();
	2443	}
	2444	if(isUpdateMPPerPos)
	2445	_SMPerPos = countsGL->get_expV();
	2446	if(countsGL) delete countsGL;
	2447	time(&t2);
	2448	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2449	}
	2450
	2451
	2452	/********************************************************************************************
	2453	A version used for simulated sequences
	2454	Good only for Gamma distribution
	2455	Unused!
	2456	*********************************************************************************************/
	2457	void gainLoss::computeCoEvolutionScoresBasedOnSimulatedData(sequenceContainer& scSimulated)
	2458	{
	2459	LOGnOUT(4,<<endl<<" computeCoEvolutionScoresBasedOnSimulatedData..."<<endl);
	2460	errorMsg::reportError("ERROR: computeCoEvolutionScoresBasedOnSimulatedData Not update. Check code or re-run");
	2461	time_t t1,t2;
	2462	time(&t1);
	2463	VVdouble postProbPerCatPerPos;
	2464
	2465	computeCountsGL* countsGL;
	2466	if(!gainLossOptions::_gainLossDist){
	2467	if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	2468	LOGnOUT(4,<<endl<<"The required LpostPerCat is empty - run Rate4Site to compute."<<endl);
	2469	rate4siteGL r4s(scSimulated,_tr,_sp,gainLossOptions::_outDir, _unObservableData_p);
	2470	r4s.run();
	2471	postProbPerCatPerPos = r4s.getLpostPerCat();
	2472	}
	2473	else{
	2474	postProbPerCatPerPos.resize(1);
	2475	postProbPerCatPerPos[0].resize(scSimulated.seqLen());
	2476	oneMatrix(postProbPerCatPerPos);
	2477	}
	2478	countsGL = new computeCountsGL(scSimulated,_tr,_sp,gainLossOptions::_outDir,postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2479	}
	2480	else{
	2481	LOGnOUT(4,<<"ERROR - mixture model not supported in co-evolution by parametric bootstrap"<<endl);
	2482	}
	2483	countsGL->run();
	2484	VVVVdouble expChanges_PosNodeXY_Sim = countsGL->getExpChanges(); // simulated data mapping
	2485
	2486
	2487	////////// Correlations
	2488	computeCorrelations* computeCorrel;
	2489	Vint selectedSites;
	2490	computeCorrel = new computeCorrelations(_tr, gainLossOptions::_outDir, &_expChanges_PosNodeXY, &expChanges_PosNodeXY_Sim);
	2491	if(gainLossOptions::_printComputedCorrelationsAllSites \|\| gainLossOptions::_selectedSitesForCorrelation==""){
	2492	LOGnOUT(4,<<"Correlate all sites (all-against-all, STRING style print)"<<endl);
	2493	readIntegersFromFileIntoVector(selectedSites,_sc.seqLen(), 0, NULL); // all sites in range
	2494	}else{
	2495	LOGnOUT(4,<<" printComputedCorrelations... read positions from "<<gainLossOptions::_selectedSitesForCorrelation<<endl);
	2496	readIntegersFromFileIntoVector(selectedSites,_sc.seqLen(), 0, &gainLossOptions::_selectedSitesForCorrelation);
	2497	}
	2498	LOGnOUT(4,<<"Warning: isNormalizeForBranch is by branch length. correlationForZscore false by Default. Both with and withour branch"<<endl);
	2499	computeCorrel->runComputeCorrelations(selectedSites,_numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute);
	2500
	2501	//computeCorrel->printComputedCorrelations(selectedSites, true/, correlationForZscore/); // DEB
	2502
	2503	VVVdouble correlationsPerSitePerPosVecSim = computeCorrel->getcorrelationPerSitePerPosVec();
	2504	VVVdouble corPvalPerPos = _correlationsPerSitePerPosVec; //instead of resize
	2505	computeCorrel->computedCorrelationsRankBasedOnSimulatedData(selectedSites, _correlationsPerSitePerPosVec,correlationsPerSitePerPosVecSim, corPvalPerPos);
	2506	computeCorrel->produceSymeticMatrix(corPvalPerPos);
	2507	bool correlationForZscore = false;
	2508	computeCorrel->printComputedCorrelations(selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corPvalPerPos);
	2509
	2510	if(countsGL) delete countsGL;
	2511	if(computeCorrel) delete computeCorrel;
	2512	time(&t2);
	2513	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (mapping+correlations with simulated data)"<<endl<<endl);
	2514	}
	2515
	2516
	2517	/********************************************************************************************
	2518	*********************************************************************************************/
	2519	void gainLoss::startParametricBootstapCorrelation(){
	2520	LOGnOUT(4,<<"\n\n startParametricBootstapCorrelation \n");
	2521
	2522	gainLossAlphabet alph;
	2523	sequenceContainer scSimulated;
	2524	//_expChanges_PosNodeXYSampledData = _expChanges_PosNodeXY; // may be reduced, for BootStrap computation
	2525	//_correlationsPerSitePerPosVecSampledData = _correlationsPerSitePerPosVec;
	2526
	2527
	2528	if(Parameters::getInt("_isUpdateMinExpThresholdGivenRealDataQuantile") && Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")>0){
	2529	MDOUBLE gainQuantil = computeQuantileFrac(_gainPerPosCorr,gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal);
	2530	MDOUBLE lossQuantil = computeQuantileFrac(_lossPerPosCorr,gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal);
	2531	MDOUBLE qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil);
	2532	MDOUBLE minExpT = (double)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	2533	if(minExpT < qNminOfSimData){
	2534	Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(qNminOfSimData).c_str());
	2535	LOGnOUT(4,<<"Update Nmin MinExpThreshold Given Read data quantile= "<<qNminOfSimData<<endl);
	2536	}else{
	2537	LOGnOUT(4,<<"No update Nmin Given Read data quantile= "<<qNminOfSimData<<" is smaller than current val="<<minExpT<<endl);
	2538	}
	2539	}
	2540
	2541	MDOUBLE qNminOfRealData; // used for convergence
	2542	if(_gainPerPosCorr.size()>3){
	2543	MDOUBLE gainQuantil = computeQuantile(_gainPerPosCorr,gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration);
	2544	MDOUBLE lossQuantil = computeQuantile(_lossPerPosCorr,gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration);
	2545	qNminOfRealData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil);
	2546	}else{
	2547	qNminOfRealData = computeNminRforCorrelWithGainAndLoss(_meanGain,_meanLoss);
	2548	}
	2549	LOGnOUT(4,<<"\nStart Parametric bootstrap simulations. With up to "<<gainLossOptions::_numberOfIterations2simulate<<" iterations or till pair with Corr=1 simulated for Rate="<<qNminOfRealData<<"\n");
	2550
	2551	MDOUBLE T_BH_prev = 0;
	2552	string simCorrel = gainLossOptions::_outDir + "//" + "simCorrelationsFrequencies.txt";
	2553	ofstream* simCorrelStream = new ofstream(simCorrel.c_str());
	2554	int totalNumberOfSimulatedPairsAboveNmin= 0;
	2555	bool isLastIteration = false;
	2556	int numOfpairsWithRateAboveMinRequiredExp = (int)1E09; // temp init. the number of "hypothesis" tested, to be filled by CoMap
	2557
	2558	////////////////////////////////////////////////////////////////////////////////
	2559	int i =0;
	2560	for(; i<gainLossOptions::_numberOfIterations2simulate && !isLastIteration ; ++i){
	2561	LOGnOUT(4,<<"\n Parametric bootstrap iteration "<<i+1<<"\n");
	2562	*simCorrelStream<<"iteration num "<<i+1<<"\n";
	2563	if(i==gainLossOptions::_numberOfIterations2simulate-1) // last, without convergence (median Bin with Corr=1)
	2564	isLastIteration = true;
	2565
	2566	tree trSampled = _tr;
	2567	sequenceContainer scSampled = _scEvolvingSites;
	2568	if( gainLossOptions::_usePosSpecificSimulations){
	2569	startSimultePosteriorExpectationOfChange(gainLossOptions::_numberOfPositions2simulate,gainLossOptions::_numberOfSequences2simulate);
	2570	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp1" + "//"+ "seqAll"+ "//" + "seq" + ".fa";
	2571	ifstream in(strSeqNum.c_str());
	2572	scSimulated = recognizeFormat::read(in,&alph);
	2573	}
	2574	else{
	2575	scSimulated = simulateSequencesForParametricBootstrap(gainLossOptions::_numberOfPositions2simulate,scSampled,trSampled );
	2576	}
	2577
	2578	// sample leaves 2-remove. Not final. Need to sample from Real data and re-compute correlaion. Note: the _usePosSpecificSimulations is not updated
	2579	bool isSample = false;
	2580	if(isSample){
	2581	MDOUBLE fractionOfSeq2Sample = 0.75;
	2582	vector<int> seqIDs2remove;
	2583	vector<tree::nodeP> nodes2remove;
	2584
	2585	treeIterDownTopConst tIt(trSampled);
	2586	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	2587	if (mynode->isInternal())
	2588	continue;
	2589	MDOUBLE randV = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
	2590	if(randV>fractionOfSeq2Sample && !mynode->father()->isRoot())
	2591	nodes2remove.push_back(mynode);
	2592	}
	2593	LOGnOUT(3,<<" In sampling, "<<nodes2remove.size()<<" sequences are removed"<<endl);
	2594
	2595	for(int node=0; i<nodes2remove.size(); ++node){
	2596	cout<<nodes2remove[node]->name()<<"\n";
	2597	if(nodes2remove[node]->name()=="A")
	2598	cout<<nodes2remove[node]->name()<<"\n";
	2599
	2600	trSampled.removeLeaf(nodes2remove[node]);
	2601	}
	2602	//sequenceContainer::constTaxaIterator myseq=scSimulated.constTaxaBegin();
	2603	//for (;myseq != scSimulated.constTaxaEnd(); ++myseq){
	2604	// if(talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)<fractionOfSeq2Sample)
	2605	// seqIDs2remove.push_back(myseq->id());
	2606	//}
	2607	//for(int i=0; i<scSimulated.numberOfSeqs(); ++i){
	2608	// if(talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)<fractionOfSeq2Sample)
	2609	// seqIDs2remove.push_back(i);
	2610	//}
	2611	//for(int i=0; i<seqIDs2remove.size(); ++i){
	2612	// scSimulated.remove(seqIDs2remove[i]);
	2613	//}
	2614	intersectNamesInTreeAndSequenceContainer(trSampled,scSimulated);
	2615
	2616	// Write seq and tree (required for re-labeling IDs
	2617	string strSeqNum = gainLossOptions::_outDir + "//" + "seqSim."+ int2string(i) + ".fa";
	2618	ofstream seq_out(strSeqNum.c_str());
	2619	fastaFormat:: write(seq_out,scSimulated);
	2620	string treeSampled = gainLossOptions::_outDir + "//" + "TheTree." + int2string(i) + ".ph";
	2621	ofstream treeStream(treeSampled.c_str());
	2622	trSampled.output(treeStream);
	2623
	2624	// re-Read
	2625	ifstream in(strSeqNum.c_str());
	2626	scSimulated = recognizeFormat::read(in,&alph);
	2627	trSampled= tree(treeSampled);
	2628	}
	2629
	2630	if(Parameters::getInt("_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur")){
	2631	LOGnOUT(3,<<" Remove simulated position with too low/high occur to save later computation time (quick and (very) dirty)"<<endl);
	2632	int minNumOfOnes = (int)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	2633	int minNumOfZeros = (int)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	2634	bool isRemovePosNotWithinMinMax = true;
	2635	checkMinNumOfOnesOrZeros(scSimulated,minNumOfOnes,minNumOfZeros, isRemovePosNotWithinMinMax);
	2636	}
	2637	if(Parameters::getInt("_accountForMissingData")){
	2638	int minNumOfOnes = Parameters::getInt("_minNumOfOnes");
	2639	int minNumOfZeros = Parameters::getInt("_minNumOfZeros");
	2640	bool isRemovePosNotWithinMinMax = true;
	2641	checkMinNumOfOnesOrZeros(scSimulated,minNumOfOnes,minNumOfZeros, isRemovePosNotWithinMinMax);
	2642	}
	2643
	2644	// version of startComputePosteriorExpectationOfChange with simulated seq. 1) get mapping vectors for each simulated seq 2) run correlation with both A,B
	2645	if(totalNumberOfSimulatedPairsAboveNmin > numOfpairsWithRateAboveMinRequiredExp*10000.0 && totalNumberOfSimulatedPairsAboveNmin>1E09){
	2646	isLastIteration = true; // in case there are 10000 more simulated pairs then tested pairs, last.
	2647	LOGnOUT(4,<<"\n Last iteration of simulations, with sufficient simulated pairs "<<totalNumberOfSimulatedPairsAboveNmin<<" compared with tested pairs "<<numOfpairsWithRateAboveMinRequiredExp<<endl);
	2648	}
	2649	if((i+1) % gainLossOptions::_numberOfIterationsForPrintResults == 0 ) isLastIteration = true; //only for print
	2650	totalNumberOfSimulatedPairsAboveNmin += computeCoEvolutionScoresBasedOnSimulatedDataCoMap(scSimulated,trSampled ,qNminOfRealData, isLastIteration ,numOfpairsWithRateAboveMinRequiredExp, T_BH_prev, simCorrelStream);
	2651	if((i+1) % gainLossOptions::_numberOfIterationsForPrintResults == 0 ) isLastIteration = false; //revert back
	2652
	2653	if(totalNumberOfSimulatedPairsAboveNmin < numOfpairsWithRateAboveMinRequiredExp*1000.0){
	2654	isLastIteration = false; // revert to false, in case it was changed to 'true' due to simulation of Corr=1
	2655	LOGnOUT(4,<<"More iterations of simulations required, with too few simulated pairs "<<totalNumberOfSimulatedPairsAboveNmin<<" compared with tested pairs "<<numOfpairsWithRateAboveMinRequiredExp<<" after "<<i+1<<" iterations\n");
	2656	}
	2657	}
	2658	LOGnOUT(4,<<"total NumberOf Pairs simulated and AboveNmin ="<<totalNumberOfSimulatedPairsAboveNmin<<" after "<<i+1<<" iterations\n");
	2659	}
	2660
	2661
	2662
	2663	/********************************************************************************************
	2664	A version used for simulated sequences
	2665	Good only for Gamma distribution
	2666	*********************************************************************************************/
	2667	int gainLoss::computeCoEvolutionScoresBasedOnSimulatedDataCoMap(sequenceContainer& scSimulated, tree& trSampled, MDOUBLE qNminOfRealData, bool& isLastIteration, int& numOfpairsWithRateAboveMinRequiredExp, MDOUBLE& T_BH_prev, ofstream* simCorrelStream)
	2668	{
	2669	LOGnOUT(4,<<endl<<"Compute: Mapping + Correlation. CoMap Algorithm"<<endl);
	2670	time_t t1,t2;
	2671	time(&t1);
	2672	VVdouble postProbPerCatPerPos;
	2673	VVVdouble postProbPerSpPerCatPerPos;
	2674
	2675	Vdouble rate4siteSim;
	2676	Vdouble rate4siteReal;
	2677	Vdouble gainSim;
	2678	Vdouble lossSim;
	2679
	2680	computeCountsGL* countsGL = NULL;
	2681	sankoffReconstructGL* sankoffReconstructMP = NULL;
	2682	rate4siteGL* r4s = NULL;
	2683	gainLoss4site* gl4s = NULL;
	2684
	2685	if(gainLossOptions::_isRemoveSimulatedPositionsBasedOnMP){
	2686	LOGnOUT(4,<<endl<<"MaxParsimonyChange for simulated data (remove positions with no events)..."<<endl);
	2687	createDir(gainLossOptions::_outDir, "MPsimulations");
	2688	string dirMP = gainLossOptions::_outDir + "//" + "MPsimulations";
	2689	sankoffReconstructGL sankoffReconstructMP(scSimulated, trSampled, dirMP,gainLossOptions::_costMatrixGainLossRatio, _distanceFromNearestOTUForRecent);
	2690	VVVdouble MPPerPos = sankoffReconstructMP.getMPPerPos();
	2691	vector<int> posToRemove(scSimulated.seqLen(),false);
	2692	MDOUBLE minExpT_MP =Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions")/2; //Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")/2.0;
	2693	MDOUBLE Nmin = 0;
	2694	LOGnOUT(4,<<"min Number Of Max Parsimony Event to consider a Position is "<<minExpT_MP*2<<endl);
	2695	int numOfRemovedPos=0;
	2696	for (int pos = 0; pos<scSimulated.seqLen(); ++pos){
	2697	MDOUBLE Gain = MPPerPos[pos][0][1];
	2698	MDOUBLE Loss = MPPerPos[pos][1][0];
	2699	if(gainLossOptions::_isOnlyCorrelateWithBoth)
	2700	Nmin = computeNminRforCorrelWithGainAndLoss(Gain,Loss);
	2701	else
	2702	Nmin = max(Gain,Loss); // thus, position are removed only if both their gain and loss values are below minT
	2703	if(Nmin < minExpT_MP){
	2704	posToRemove[pos] = true;
	2705	numOfRemovedPos++;
	2706	}
	2707	}
	2708	scSimulated.removePositions(posToRemove);
	2709	LOGnOUT(4,<<"removed="<<numOfRemovedPos<<endl);
	2710	int numOfSimulatedPositionsAboveMinRate = scSimulated.seqLen();
	2711	LOGnOUT(4,<<"After remove numOfPositions="<<numOfSimulatedPositionsAboveMinRate<<endl);
	2712	}
	2713
	2714	MDOUBLE meanGain, meanLoss, medianGain, medianLoss;
	2715	VVVVdouble expChanges_PosNodeXY_Sim;
	2716	if(gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping){
	2717	sankoffReconstructMP = new sankoffReconstructGL(scSimulated, trSampled, gainLossOptions::_outDir,gainLossOptions::_costMatrixGainLossRatio, _distanceFromNearestOTUForRecent);
	2718	expChanges_PosNodeXY_Sim = sankoffReconstructMP->getMPPerPosPerNode();
	2719	gainSim = sankoffReconstructMP-> getGainMPPerPos();
	2720	lossSim = sankoffReconstructMP-> getLossMPPerPos();
	2721	}else{
	2722	if(!gainLossOptions::_gainLossDist){
	2723	if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site()
	2724	LOGnOUT(4,<<endl<<"The required LpostPerCat is empty - run Rate4Site to compute..."<<endl);
	2725	r4s = new rate4siteGL(scSimulated,trSampled,_sp,gainLossOptions::_outDir, _unObservableData_p);
	2726	r4s->run();
	2727	postProbPerCatPerPos = r4s->getLpostPerCat();
	2728	if(gainLossOptions::_isUseRateForSiteAsNminForCorrelations){
	2729	rate4siteSim = r4s->getRates();
	2730	rate4siteReal = _rates;
	2731	}
	2732	if(r4s) delete r4s;
	2733	}
	2734	else{
	2735	postProbPerCatPerPos.resize(1);
	2736	postProbPerCatPerPos[0].resize(scSimulated.seqLen());
	2737	oneMatrix(postProbPerCatPerPos);
	2738	}
	2739	countsGL = new computeCountsGL(scSimulated,trSampled,_sp,gainLossOptions::_outDir,postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2740	}
	2741	else{
	2742	LOGnOUT(4,<<endl<<"The required LpostPerSpPerCat is empty - run computeGain4Site to compute..."<<endl);
	2743	gl4s = new gainLoss4site(scSimulated,trSampled,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_unObservableData_p);
	2744	gl4s->computeGain4Site();
	2745	postProbPerSpPerCatPerPos = gl4s->getLpostPerSpPerCat();
	2746	if(gl4s) delete gl4s;
	2747	countsGL = new computeCountsGL(scSimulated,trSampled,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,postProbPerSpPerCatPerPos,_distanceFromNearestOTUForRecent); //_distanceFromRootForRecent
	2748	}
	2749	countsGL->run();
	2750	expChanges_PosNodeXY_Sim = countsGL->getExpChanges(); // simulated data mapping
	2751	gainSim = countsGL-> get_expV01();
	2752	lossSim = countsGL-> get_expV10();
	2753	}
	2754	meanGain = computeAverage(gainSim);
	2755	meanLoss = computeAverage(lossSim);
	2756	medianGain = computeMedian(gainSim);
	2757	medianLoss = computeMedian(lossSim);
	2758	LOGnOUT(4,<<"Mean values Gain="<<meanGain<<"\tLoss="<<meanLoss<<endl);
	2759	LOGnOUT(4,<<"Median values Gain="<<medianGain<<"\tLoss="<<medianLoss<<endl<<endl);
	2760
	2761	if(Parameters::getInt("_isUpdateMinExpThresholdGivenSimulaitonsQuantile") && Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")>0){
	2762	MDOUBLE quantileVal = 0.1;
	2763	MDOUBLE gainQuantil = computeQuantileFrac(gainSim,quantileVal);
	2764	MDOUBLE lossQuantil = computeQuantileFrac(lossSim,quantileVal);
	2765	MDOUBLE qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil);
	2766	MDOUBLE qNminOfSimDataPrev = qNminOfSimData;
	2767	while( qNminOfSimData-qNminOfSimDataPrev < 0.1 ){
	2768	qNminOfSimDataPrev = qNminOfSimData;
	2769	gainQuantil = computeQuantileFrac(gainSim,quantileVal);
	2770	lossQuantil = computeQuantileFrac(lossSim,quantileVal);
	2771	qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil);
	2772	quantileVal += 0.1;
	2773	}
	2774
	2775	MDOUBLE minExpT = (double)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	2776	if(minExpT < qNminOfSimData){
	2777	Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(qNminOfSimData).c_str());
	2778	LOGnOUT(4,<<"Update MinExpThreshold GivenSimulaitonsQuantile= "<<qNminOfSimData <<" with respect to simulation quantile ="<<quantileVal<<endl);
	2779	}else{
	2780	LOGnOUT(4,<<"No update MinExpThreshold GivenSimulaitonsQuantile= "<<qNminOfSimData<<" is smaller than current val="<<minExpT<<endl);
	2781	}
	2782	Parameters::updateParameter("_isUpdateMinExpThresholdGivenSimulaitonsQuantile","0"); // Done once
	2783	}
	2784	if(countsGL) delete countsGL;
	2785	if(sankoffReconstructMP) delete sankoffReconstructMP;
	2786
	2787	// can remove only positions that are below Threshold in all correlation types
	2788	MDOUBLE minExpT = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	2789	LOGnOUT(4,<<"Remove simulated positions below Nmin="<<minExpT<<endl);
	2790	LOGnOUT(4,<<"Before remove numOfPositions="<<scSimulated.seqLen()<<endl);
	2791	vector<int> posToRemove(scSimulated.seqLen(),false);
	2792	int numOfRemovedPos = 0;
	2793	VVVVdouble expChanges_PosNodeXY_SimFinal;
	2794	VVVdouble expChanges_PosXY_Sim;
	2795	computeRateValPerPos(expChanges_PosNodeXY_Sim, expChanges_PosXY_Sim);
	2796	for (int pos = 0; pos<scSimulated.seqLen(); ++pos){
	2797	MDOUBLE Nmin = 0;
	2798	MDOUBLE Gain = expChanges_PosXY_Sim[pos][0][1];
	2799	MDOUBLE Loss = expChanges_PosXY_Sim[pos][1][0];
	2800
	2801	if(gainLossOptions::_isOnlyCorrelateWithBoth)
	2802	Nmin = computeNminRforCorrelWithGainAndLoss(Gain,Loss);
	2803	else
	2804	Nmin = max(Gain,Loss); // thus, position are removed only if both their gain and loss values are below minT
	2805
	2806	if(Nmin < minExpT){
	2807	posToRemove[pos] = true;
	2808	numOfRemovedPos++;
	2809	}
	2810	else
	2811	expChanges_PosNodeXY_SimFinal.push_back(expChanges_PosNodeXY_Sim[pos]);
	2812
	2813	}
	2814	scSimulated.removePositions(posToRemove);
	2815	LOGnOUT(4,<<"removed="<<numOfRemovedPos<<endl);
	2816	int numOfSimulatedPositionsAboveMinRate = scSimulated.seqLen();
	2817	LOGnOUT(4,<<"After remove numOfPositions="<<numOfSimulatedPositionsAboveMinRate<<endl);
	2818
	2819
	2820	//// Correlations with simulated data
	2821	computeCorrelations* computeCorrel =NULL;
	2822	Vint selectedSitesSim;
	2823	Vint numOfGapsTillSite;
	2824	computeCorrel = new computeCorrelations(trSampled, gainLossOptions::_outDir, &expChanges_PosNodeXY_SimFinal);
	2825	readIntegersFromFileIntoVector(selectedSitesSim, numOfSimulatedPositionsAboveMinRate-1, 0, NULL,NULL); // all sites in range
	2826	numOfGapsTillSite.resize(selectedSitesSim.size(),0);
	2827
	2828	//LOGnOUT(4,<<"Warning: isNormalizeForBranch is by branch length. correlationForZscore false by Default. Both with and without branch"<<endl);
	2829	// Compute correlations of Sim data
	2830	computeCorrel->runComputeCorrelations(selectedSitesSim,numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute);
	2831
	2832	// sort Corr vector of Sim data
	2833	computeCorrel->produceSortedVectorsOfAllCorrelations(rate4siteSim); // maybe of size=0
	2834
	2835	// produce Bins of Sim data
	2836	int numberOfHighCorrInSimulationOfMedianNminBin = computeCorrel->produceSortedVectorsOfCorrelationsBinedByRate(qNminOfRealData, simCorrelStream);
	2837	if(numberOfHighCorrInSimulationOfMedianNminBin >= 1 && gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration<100){ // use 100 for no "Corr=1 based convergence"
	2838	isLastIteration = true; // convergence (median Bin with Corr=1)
	2839	LOGnOUT(4,<<"\n Last iteration of simulations, reached 'convergence' - simulated "<<numberOfHighCorrInSimulationOfMedianNminBin<<" pairs with Corr~=1 for bin with Rate in bin of "<<qNminOfRealData<<endl);
	2840	}
	2841
	2842	// compute correlations between real data (input _correlationsPerSitePerPosVec, _expChanges_PosNodeXY)
	2843	// and simulated data (according to Rate bins, already part of object)
	2844	// fill corPvalPerPos and _correlationsData
	2845	resizeMatrix( _isComputePairWithRateAboveNim, _correlationsPerSitePerPosVecSampledData[0].size(),_correlationsPerSitePerPosVecSampledData[0][0].size()); // _isComputePairWithRateAboveNim - bool vector
	2846	VVVdouble corPvalPerPos = _correlationsPerSitePerPosVecSampledData; //instead of resize - the vector of all-against-all Correlation coefficient determines the size of the vector PVals
	2847	if(_correlationsPerSitePerPosVecSampledData.size() == 0 \|\| _expChanges_PosNodeXYSampledData.size()==0)
	2848	errorMsg::reportError("Real data correlation and expectation data is missing, can't compute simulation-based pVal");
	2849	else
	2850	numOfpairsWithRateAboveMinRequiredExp = computeCorrel->computedCorrelationsPValBasedOnSimulatedDataCoMapBins(_correlationsPerSitePerPosVecSampledData,_isComputePairWithRateAboveNim,_expChanges_PosNodeXYSampledData,corPvalPerPos
	2851	, _correlationsData, rate4siteReal ,_selectedSites,_numOfGapsTillSite,_evolvingSites, isLastIteration); // fill corPvalPerPos
	2852
	2853	if(isLastIteration){ // compute FDR and print results
	2854	bool correlationForZscore = false;
	2855	//Vint selectedSites;
	2856	//readIntegersFromFileIntoVector(selectedSites,_sc.seqLen(), 0, NULL); // all sites in range
	2857
	2858	string printType = "pVal";
	2859	if(gainLossOptions::_isPrintCorrelationsOfAllPairs_pVal)
	2860	computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corPvalPerPos,&printType);
	2861
	2862	if(gainLossOptions::_isFDRcorrectionForPValInCorrelation && _correlationsData.size()>0){
	2863	Vdouble T_BH(corPvalPerPos.size()); // to be filled, for each corr type
	2864	// FDR
	2865	if(gainLossOptions::_isComputeQVals){
	2866	VVVdouble corQvalPerPos = computeCorrel-> pVals2qVals (corPvalPerPos,_correlationsData,_isComputePairWithRateAboveNim, T_BH, _selectedSites,_evolvingSites);
	2867	string printType = "qVal";
	2868	computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corQvalPerPos,&printType);
	2869	}else
	2870	computeCorrel-> pVals2qVals (corPvalPerPos,_correlationsData,_isComputePairWithRateAboveNim, T_BH, _selectedSites,_evolvingSites);
	2871
	2872	computeCorrel->printComputedCorrelationsData(gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,_correlationsData, T_BH);
	2873
	2874	if(gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH){
	2875	Vdouble minPValForPrint(corPvalPerPos.size(),gainLossOptions::_pValueCutOffForBootStrap); // same non-FDR min pVal for all correlation types
	2876	computeCorrel->printComputedCorrelationsData(gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,_correlationsData, minPValForPrint,gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH);
	2877	}
	2878
	2879	// Convergence of BH, less than 0.01% change
	2880	MDOUBLE T_BH_currentMinuslast = T_BH[0]-T_BH_prev;
	2881	LOGnOUT(4,<<" Convergence of BH: current Minus last="<<T_BH_currentMinuslast<<endl);
	2882	T_BH_prev = T_BH[0]; // updated
	2883
	2884	}else{
	2885	LOGnOUT(4,<<" Note: No computation of FDR corrData (pVal significant) size="<<_correlationsData.size()<<endl);
	2886	}
	2887	}
	2888
	2889	if(computeCorrel) delete computeCorrel;
	2890	time(&t2);
	2891	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (mapping+correlations with simulated data)"<<endl);
	2892	int numOfSimulatedPairsAboveMinRate = (numOfSimulatedPositionsAboveMinRate*(1+numOfSimulatedPositionsAboveMinRate))/2 - numOfSimulatedPositionsAboveMinRate;
	2893	return numOfSimulatedPairsAboveMinRate;
	2894	}
	2895
	2896
	2897	/********************************************************************************************
	2898	*********************************************************************************************/
	2899	void gainLoss::startMaxParsimonyChange(bool isUpdateMPPerPos)
	2900	{
	2901	LOGnOUT(4,<<endl<<"Starting MaxParsimonyChange..."<<endl);
	2902	time_t t1,t2;
	2903	time(&t1);
	2904	sankoffReconstructGL sankoffReconstructMP(_sc, _tr, gainLossOptions::_outDir,gainLossOptions::_costMatrixGainLossRatio, _distanceFromNearestOTUForRecent);
	2905	if(isUpdateMPPerPos){
	2906	_MPPerPos = sankoffReconstructMP.getMPPerPos();
	2907	_MP_PosNodeXY = sankoffReconstructMP.getMPPerPosPerNode();
	2908	_gainMPPerPos = sankoffReconstructMP.getGainMPPerPos();
	2909	_lossMPPerPos = sankoffReconstructMP.getLossMPPerPos();
	2910	_CostOfTreeMP = sankoffReconstructMP.getNumOfGainEvnetsMP() + sankoffReconstructMP.getNumOfLossEvnetsMP() ;
	2911	}
	2912	time(&t2);
	2913	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2914	}
	2915
	2916
	2917	/********************************************************************************************
	2918	*********************************************************************************************/
	2919	void gainLoss::startMaxParsimonyChange(sequenceContainer& sc, tree& tr, string& outDir,MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos)
	2920	{
	2921	LOGnOUT(4,<<endl<<"Starting MaxParsimonyChange..."<<endl);
	2922	time_t t1,t2;
	2923	time(&t1);
	2924	sankoffReconstructGL sankoffReconstructMP(sc, tr, outDir,costMatrixGainLossRatio, distanceFromNearestOTUForRecent);
	2925	if(isUpdateMPPerPos)
	2926	_MPPerPos = sankoffReconstructMP.getMPPerPos();
	2927	time(&t2);
	2928	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2929	}
	2930
	2931
	2932
	2933	/********************************************************************************************
	2934	ancestralReconstructStates
	2935	*********************************************************************************************/
	2936	void gainLoss::ancestralReconstructor()
	2937	{
	2938	LOGnOUT(4,<<endl<<"Starting ancestralReconstructor..."<<endl);
	2939	time_t t1,t2;
	2940	time(&t1);
	2941
	2942	if(_alphVecDist.size()>_sc.getAlphabet()->size() && _alphVecDist[_sc.getAlphabet()->size()]>0)
	2943	LOGnOUT(2,<<"\nWARNING !!! : ancestralReconstruct is not fully functional with missing data.\n Assume missing data indicates absence (indels)."<<endl<<endl);
	2944
	2945	VVint statesV; // the vector with the states of the nodes, to be filled with reconstructed states (max over joint)
	2946	ancestralReconstructStates ancestralReconst(_tr,_sc,_sp); // Per POS,CAT
	2947
	2948	// compute joint reconstruction (?)
	2949	VVVdouble upL;
	2950	VVVint backtrack;
	2951	VVVint transitionTypeCount;
	2952	ancestralReconst.traverseUpML(upL, backtrack);
	2953	Vdouble LofJointV = ancestralReconst.traverseDownML(upL, backtrack, transitionTypeCount);
	2954	statesV = ancestralReconst.getStates();
	2955
	2956	treeIterDownTopConst tIt(_tr); // iterator used by following loops
	2957
	2958	// sum over positions - joint
	2959	Vint statesSum; // the vector with the Sum states of the nodes (joint)
	2960	statesSum.resize(_tr.getNodesNum());
	2961	for (int pos = 0; pos <_sc.seqLen(); ++pos) {
	2962	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){
	2963	statesSum[mynode->id()]+=statesV[pos][mynode->id()]; // Sum over positions
	2964	}
	2965	}
	2966
	2967	string AncestralReonstructSum = gainLossOptions::_outDir + "//" + "AncestralReconstructSumJoint.txt";
	2968	ofstream AncestralReonstructSumStream(AncestralReonstructSum.c_str());
	2969	AncestralReonstructSumStream<<"Node"<<"\t"<<"Sum"<<endl;
	2970	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	2971	AncestralReonstructSumStream<<mynode->name()<<"\t"<<statesSum[mynode->id()]<<endl;
	2972	}
	2973
	2974	// printAncestralReconstructFullData (joint)
	2975	if(gainLossOptions::_printAncestralReconstructFullData){
	2976	string AncestralReonstruct = gainLossOptions::_outDir + "//" + "AncestralReonstruct.txt";
	2977	ofstream AncestralReonstructStream(AncestralReonstruct.c_str());
	2978	AncestralReonstructStream<<"POS"<<"\t"<<"Node"<<"\t"<<"State"<<endl;
	2979	for (int pos = 0; pos <_sc.seqLen(); ++pos) {
	2980	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){
	2981	AncestralReonstructStream<<pos+1<<"\t"<<mynode->name()<<"\t"<<statesV[pos][mynode->id()]<<endl;
	2982	}
	2983	}
	2984	}
	2985	// print Trees
	2986	if(gainLossOptions::_printTreesWithAncestralReconstructAsBP){
	2987	createDir(gainLossOptions::_outDir, "TreesWithAncestralReonstruct");
	2988	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	2989	string strTreeNum = gainLossOptions::_outDir + "//" + "TreesWithAncestralReonstruct" + "//" + "TreeAncRec" + int2string(pos+1) + ".ph";
	2990	ofstream tree_out(strTreeNum.c_str());
	2991	printTreeStatesAsBPValues(tree_out,statesV[pos],_tr);
	2992	}
	2993	}
	2994	time(&t2);
	2995	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	2996	}
	2997
	2998
	2999	/********************************************************************************************
	3000	ancestralReconstructorBasedOnJoint (Posterior)
	3001	// interested in a the set of all the hypothetical taxonomic unit (HTU) sequences (joint reconstruction)
	3002	// as oppose to a specific HTU whose sequence we would like to estimate (marginal reconstruction).
	3003	*********************************************************************************************/
	3004	void gainLoss::ancestralReconstructorBasedOnJoint()
	3005	{
	3006	LOGnOUT(4,<<endl<<"Starting ancestralReconstructorBasedOnJoint..."<<endl);
	3007	time_t t1,t2;
	3008	time(&t1);
	3009
	3010	VVint statesV; // the vector with the states of the nodes, to be filled with reconstructed states (max over joint)
	3011	VVVdouble ancestralProbsPerPosNodeState; // the vector with the probabilities of the nodes states, to be filled with reconstructed states (posterior)
	3012
	3013	ancestralReconstructStates ancestralReconst(_tr,_sc,_sp); // Per POS,CAT
	3014
	3015	// compute posterior reconstruction
	3016	// Prob(N=x\|Data) = sum{fatherState}[P(N=x, father(N)=y\|D)]}
	3017	if(_jointProb_PosNodeXY.size()==0){
	3018	computePosteriorExpectationOfChangeRunOnly(); // this phase will also fill _jointProb_PosNodeXY
	3019	}
	3020	ancestralReconst.computeAncestralPosterior(_jointProb_PosNodeXY);
	3021	ancestralProbsPerPosNodeState = ancestralReconst.getAncestralProbs(); // VVVdouble[pos][node][state] ancestralProbsPerPosNodeState
	3022
	3023	treeIterDownTopConst tIt(_tr); // iterator used by following loops
	3024	// printAncestralReconstructFullData (posterior)
	3025	if(gainLossOptions::_printAncestralReconstructPosterior){
	3026	string AncestralReonstructPosterior = gainLossOptions::_outDir + "//" + "AncestralReconstructPosterior.txt";
	3027	ofstream AncestralReonstructPosteriorStream(AncestralReonstructPosterior.c_str());
	3028	AncestralReonstructPosteriorStream.precision(PRECISION);
	3029
	3030	AncestralReonstructPosteriorStream<<"POS"<<"\t"<<"Node"<<"\t"<<"State"<<"\t"<<"Prob"<<endl;
	3031	for (int pos = 0; pos <_sc.seqLen(); ++pos) {
	3032	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){
	3033	int state=1;
	3034	AncestralReonstructPosteriorStream<<pos+1<<"\t"<<mynode->name();
	3035	//for (int state = 0; state <_sp->alphabetSize(); ++state){ // only state=1 is printed
	3036	AncestralReonstructPosteriorStream<<"\t"<<state<<"\t"<<ancestralProbsPerPosNodeState[pos][mynode->id()][state];
	3037	//}
	3038	AncestralReonstructPosteriorStream<<endl;
	3039	}
	3040	}
	3041	}
	3042	// sum over positions - posterior
	3043	VVdouble probStatesSum; // the vector with the Sum probes of the nodes (posterior)
	3044	resizeMatrix(probStatesSum,_tr.getNodesNum(),_sp->alphabetSize());
	3045	Vdouble probOnesSum; // the vector with the Sum ones probes of the nodes (posterior) - good for {0,1}
	3046	probOnesSum.resize(_tr.getNodesNum());
	3047	for (int pos = 0; pos <_sc.seqLen(); ++pos) {
	3048	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){
	3049	for (int state = 0; state <_sp->alphabetSize(); ++state){
	3050	probStatesSum[mynode->id()][state]+=ancestralProbsPerPosNodeState[pos][mynode->id()][state]; // Sum over positions
	3051	probOnesSum[mynode->id()] +=ancestralProbsPerPosNodeState[pos][mynode->id()][state]*state; // if state==0 Nothing added
	3052	}
	3053	}
	3054	}
	3055	// print Sum: Table, Tree
	3056	string AncestralReonstructPosteriorSum = gainLossOptions::_outDir + "//" + "AncestralReconstructPosteriorSum.txt";
	3057	ofstream AncestralReonstructPosteriorSumStream(AncestralReonstructPosteriorSum.c_str());
	3058	AncestralReonstructPosteriorSumStream.precision(PRECISION);
	3059
	3060	AncestralReonstructPosteriorSumStream<<"Node"<<"\t"<<"State"<<"\t"<<"ProbSum"<<"\t"<<"Father"<<"\t"<<"StateFather"<<"\t"<<"ProbSumFather"<<endl;
	3061	int state=1;
	3062	//for (int state = 0; state <_sp->alphabetSize(); ++state){ // only state=1 is printed
	3063	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	3064	AncestralReonstructPosteriorSumStream<<mynode->name()<<"\t"<<state<<"\t"<<probStatesSum[mynode->id()][state]<<"\t";
	3065	if(!mynode->isRoot())
	3066	AncestralReonstructPosteriorSumStream<<mynode->father()->name()<<"\t"<<state<<"\t"<<probStatesSum[mynode->father()->id()][state]<<endl;
	3067	else
	3068	AncestralReonstructPosteriorSumStream<<"NoFather2Root"<<"\t"<<"NA"<<"\t"<<"NA"<<endl;
	3069
	3070	}
	3071	//}
	3072
	3073	// print Tree
	3074	string TreeAncRecSum = gainLossOptions::_outDir + "//" + "TreeAncRecSum" + ".ph";
	3075	ofstream TreeAncRecSumStr(TreeAncRecSum.c_str());
	3076	TreeAncRecSumStr.precision(LOW_PRECISION);
	3077	printTreeStatesAsBPValues(TreeAncRecSumStr,probOnesSum,_tr);
	3078
	3079	time(&t2);
	3080	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	3081	}
	3082
	3083
	3084
	3085
	3086	/********************************************************************************************
	3087	*********************************************************************************************/
	3088	void gainLoss::computeBranchLegthDiffFactor(ostream& out){
	3089	LOGnOUT(4,<<endl<<"Starting computeBranchLegthDiffFactor..."<<endl);
	3090	LOGnOUT(4,<<" Likelihood reference (computed after BBL)="<<_logL<<endl);
	3091
	3092	MDOUBLE percentOfLogLDiffTolerance = 0.01;
	3093	MDOUBLE logLOrig;
	3094	MDOUBLE branchLegthOrig;
	3095	MDOUBLE branchLegthAfterBBL;
	3096	tree treeComp = _tr; // copy the tree
	3097	treeIterTopDownConst tIt(treeComp);
	3098
	3099	out<<"branch"<<"\t"<<"length@orginal"<<"\t"<<"lengthAfterBBL"<<"\t"<<"factor"<<"\t"<<"Diff"<<"\t"<<"logL@orginal"<<"\t"<<"logLAfterBBL"<<"\t"<<"logL_Diff"<<endl;
	3100	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	3101	if(mynode->isRoot()) continue;
	3102
	3103	branchLegthOrig = _trOrig.findNodeByName(mynode->name())->dis2father();
	3104	branchLegthAfterBBL = _tr.findNodeByName(mynode->name())->dis2father();
	3105
	3106	treeComp.findNodeByName(mynode->name())->setDisToFather(branchLegthOrig); // set BL to original
	3107
	3108	if(_unObservableData_p){
	3109	if(!gainLossOptions::_gainLossDist){_unObservableData_p->setLforMissingData(treeComp,_sp);}
	3110	else{_unObservableData_p->setLforMissingData(treeComp,_spVVec,_gainDist,_lossDist);}
	3111	}
	3112	if(!gainLossOptions::_gainLossDist){logLOrig = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(treeComp,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p);}
	3113	else{logLOrig = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(treeComp,_scUniqPatterns,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);}
	3114
	3115	treeComp.findNodeByName(mynode->name())->setDisToFather(branchLegthAfterBBL); // set BL back
	3116
	3117	if(logLOrig > _logL+((percentOfLogLDiffTolerance/100.0)*abs(_logL)) ){
	3118	LOGnOUT(4,<<"WARN... logL with estimated BL=" <<_logL<<" is lower than original BL="<<logLOrig<<endl);
	3119	}
	3120	LOGnOUT(6,<<"Likelihood for branch Diff="<<branchLegthAfterBBL-branchLegthOrig<<"\t previous logL ="<<logLOrig<<endl);
	3121	out<<mynode->name()<<"\t"
	3122	<<branchLegthOrig<<"\t"
	3123	<<branchLegthAfterBBL<<"\t"
	3124	<<branchLegthAfterBBL / branchLegthOrig <<"\t"
	3125	<<branchLegthAfterBBL - branchLegthOrig<<"\t"
	3126	<<logLOrig <<"\t"<<_logL<<"\t"<<_logL-logLOrig<<endl;
	3127	}
	3128	}
	3129
	3130
	3131
	3132
	3133
	3134
	3135	/********************************************************************************************
	3136	*********************************************************************************************/
	3137	void gainLoss::startSimulateSequences(int numOfSequenceSets, int seqLengthInSeq)
	3138	{
	3139	LOGnOUT(4,<<endl<< "simulating sequences with the same rate as _rates. numOfSequenceSets="<<numOfSequenceSets<<endl);
	3140	simulateSequences(numOfSequenceSets, seqLengthInSeq, gainLossOptions::_writeSeqSim,
	3141	gainLossOptions::_useTheSameSpForSim, gainLossOptions::_isReversibleSim, gainLossOptions::_gainEQlossSim,gainLossOptions::_rateDistributionTypeSim);
	3142
	3143
	3144	if(!gainLossOptions::_gainLossDist&&gainLossOptions::_calculateRate4siteSim){
	3145	for(int i=0; i<numOfSequenceSets; ++i){
	3146	//re-open seq
	3147	gainLossAlphabet alph;
	3148	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedSequences" + "//" + "seq" + int2string(i+1) + ".fa";
	3149	ifstream in(strSeqNum.c_str());
	3150	sequenceContainer seqReOpened = recognizeFormat::read(in,&alph);
	3151
	3152	string outDirSeq = gainLossOptions::_outDir + "//" + "SimulatedSequences" + "//" + "seq" + int2string(i+1);
	3153	createDir(gainLossOptions::_outDir + "//" + "SimulatedSequences", "seq" + int2string(i+1));
	3154	rate4siteGL r4s(seqReOpened,_tr,_sp, outDirSeq,_unObservableData_p);
	3155	r4s.run();
	3156
	3157	}
	3158	}
	3159	}
	3160
	3161	/********************************************************************************************
	3162	simulateSequences
	3163	*********************************************************************************************/
	3164	vector<sequenceContainer> gainLoss::simulateSequences(int numOfSequenceSets, int seqLengthInSet, bool writeSeq,
	3165	bool useTheSame, bool isReversible, bool isGeqL, gainLossOptions::distributionType rateDistributionTypeSim)
	3166	{
	3167	int numOfSitesInSeq= seqLengthInSet;
	3168	LOGnOUT(4,<< "simulating numOfSitesInSeq="<<numOfSitesInSeq<<endl);
	3169	time_t t1,t2;
	3170	time(&t1);
	3171
	3172	gainLossAlphabet alph;
	3173	vector<sequenceContainer> scV;
	3174	scV.resize(numOfSequenceSets);
	3175
	3176	tree trForSim;
	3177	stochasticProcess* spForSim =NULL;
	3178
	3179	if(useTheSame){
	3180	LOGnOUT(4,<< "simulating sequences with the same stochastic proess"<<endl);
	3181	spForSim = _sp;
	3182	trForSim = _tr;
	3183	printModellValuesOfParams(spForSim, trForSim);
	3184	}
	3185	else{
	3186	LOGnOUT(4,<< "simulating sequences with the NEW stochastic proess"<<endl);
	3187	LOGnOUT(4,<< "simulating sequences with a Reversible stochastic proess="<<isReversible<<endl);
	3188	LOGnOUT(4,<< "simulating sequences with _gainEQlossSim="<<isGeqL<<endl);
	3189	if(isGeqL){
	3190	LOGnOUT(4,<< "WARNING: _gainLossRateAreFreq is overwritten with"<<isGeqL<<endl);
	3191	//Parameters::updateParameter("_gainEQloss","1"); // override to previous value assumed to be false
	3192	gainLossOptions::_gainEQloss =1; // override to previous value assumed to be false
	3193	//Parameters::updateParameter("_characterFreqEval","FiftyFifty");
	3194	gainLossOptions::_characterFreqEval = gainLossOptions::FiftyFifty;
	3195	//Parameters::updateParameter("_isReversible","1");
	3196	gainLossOptions::_isReversible =1;
	3197	}
	3198	spForSim = startStochasticProcessGeneric(rateDistributionTypeSim, isReversible);
	3199	//gainLossOptimizer glOpt(_tr,spForSim,_sc,
	3200	// gainLossOptions::_epsilonOptimizationIterationCycle,gainLossOptions::_maxNumOfIterations,
	3201	// gainLossOptions::_epsilonOptimizationModel,gainLossOptions::_maxNumOfIterationsModel,
	3202	// gainLossOptions::_epsilonOptimizationBBL,gainLossOptions::_maxNumOfIterationsBBL,_pLforMissingDataPerCat);
	3203	bool isBBL = true; // in optimizer also check gainLossOptions::_isBBL. This if to differ from manyStarts
	3204	gainLossOptimizer glOpt(_tr,spForSim,_scUniqPatterns,
	3205	gainLossOptions::_epsilonOptimizationIterationCycle,gainLossOptions::_maxNumOfIterations,
	3206	gainLossOptions::_epsilonOptimizationModel,gainLossOptions::_maxNumOfIterationsModel,
	3207	gainLossOptions::_epsilonOptimizationBBL,gainLossOptions::_maxNumOfIterationsBBL,_weightsUniqPatterns,_unObservableData_p,
	3208	isBBL ,gainLossOptions::_isbblLSWhenbblEMdontImprove);
	3209	trForSim = glOpt.getOptTree();
	3210
	3211	}
	3212
	3213	if(_rates.size()==0 && gainLossOptions::_rateDistributionType==gainLossOptions::GAMMA){ // to fill _LpostPerCat - run computeRate4site()
	3214	rate4siteGL r4s(_sc,_tr,_sp, gainLossOptions::_outDir,_unObservableData_p);
	3215	r4s.run();
	3216	_postProbPerCatPerPos = r4s.getLpostPerCat();
	3217	_rates = r4s.getRates();
	3218	}
	3219	if(writeSeq)
	3220	createDir(gainLossOptions::_outDir, "SimulatedSequences");
	3221	for(int i=0; i<numOfSequenceSets; ++i){
	3222	LOGnOUT(4,<< "simulating set="<<i<<endl);
	3223	simulateTree st(trForSim, *spForSim, &alph);
	3224	//st.generate_seqWithRateVector(_rates, _sc.seqLen());
	3225	st.generate_seq(numOfSitesInSeq);
	3226	scV[i] = st.toSeqDataWithoutInternalNodes();
	3227	if(writeSeq){
	3228	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedSequences" + "//" + "seq" + int2string(i+1) + ".fa";
	3229	ofstream seq_out(strSeqNum.c_str());
	3230	fastaFormat:: write(seq_out,scV[i]);
	3231	}
	3232	}
	3233	time(&t2);
	3234	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	3235	return scV;
	3236	}
	3237
	3238	/********************************************************************************************
	3239	simulateSequencesForParametricBootstrap
	3240	*********************************************************************************************/
	3241	sequenceContainer gainLoss::simulateSequencesForParametricBootstrap(int seqLengthInSet, sequenceContainer& scSampled, tree& trSampled, bool writeSeq,
	3242	bool useTheSame)
	3243	{
	3244	LOGnOUT(4,<< "simulateSequencesForParametricBootstrap numOfSitesInSeq="<<seqLengthInSet<<endl);
	3245
	3246	time_t t1,t2;
	3247	time(&t1);
	3248
	3249	MDOUBLE lowRateFactor = 0.1; // not clear it's working...
	3250	MDOUBLE fractionOfPosForLowRate = 0.1;
	3251	int numOfPos2SimulateLowRate = (int)(fractionOfPosForLowRate*seqLengthInSet);
	3252	if(gainLossOptions::_isAddSimulationsWithLowRate){
	3253	seqLengthInSet -= numOfPos2SimulateLowRate; // keep the entire number of positions fixed.
	3254	LOGnOUT(4,<<"Simulate low rate for "<<numOfPos2SimulateLowRate<<" positions (with tree branches multiplied by "<<lowRateFactor<< ")"<<endl);
	3255	}
	3256	gainLossAlphabet alph;
	3257	sequenceContainer sc;
	3258	//if(gainLossOptions::_accountForMissingData)
	3259	//sc.startZeroSequenceContainerGL(scSampled,alph,gainLossOptions::_minNumOfZeros,gainLossOptions::_minNumOfOnes); // reverse from the Zero sequence
	3260	sc.startZeroSequenceContainerGL(scSampled,alph,0,0); // Just as precurasor for next concat.
	3261	//fastaFormat::write(cout,sc); // DEBUG
	3262
	3263	tree trForSim;
	3264	tree trForSimForLowRate;
	3265	stochasticProcess* spForSim =NULL;
	3266
	3267	trForSim = trSampled;
	3268	if(!gainLossOptions::_gainLossDist){
	3269	spForSim = _sp;
	3270	printModellValuesOfParams(spForSim, trForSim);
	3271
	3272	simulateTree st(trForSim, *spForSim, &alph);
	3273	//st.generate_seqWithRateVector(_rates, scSampled.seqLen());
	3274	st.generate_seq(seqLengthInSet);
	3275	sequenceContainer scTemp = st.toSeqDataWithoutInternalNodes();
	3276	/if(sc.seqLen()>0)/
	3277	sc.concatenate(scTemp);
	3278	/*else
	3279	sc = scTemp;*/
	3280	}else{
	3281	int numOfSpGain = _spVVec.size();
	3282	int numOfSpLoss = _spVVec[0].size();
	3283	int numOfSps = numOfSpGain*numOfSpLoss;
	3284	int numOfPos2SimulatePerSp = seqLengthInSet/numOfSps;
	3285	for (int gainCategor=0; gainCategor<numOfSpGain; gainCategor++){
	3286	for (int lossCategor=0; lossCategor<numOfSpLoss; lossCategor++){
	3287	spForSim = _spVVec[gainCategor][lossCategor];
	3288	simulateTree st(trForSim, *spForSim, &alph);
	3289	//st.generate_seqWithRateVector(_rates, scSampled.seqLen());
	3290	st.generate_seq(numOfPos2SimulatePerSp);
	3291	sequenceContainer scTemp = st.toSeqDataWithoutInternalNodes();
	3292	/if(sc.seqLen()>0)/
	3293	sc.concatenate(scTemp);
	3294	/*else
	3295	sc = scTemp;*/
	3296	}
	3297	}
	3298	spForSim = _spSimple;
	3299	}
	3300	if(gainLossOptions::_isAddSimulationsWithLowRate){
	3301	trForSimForLowRate = trSampled;
	3302	trForSimForLowRate.multipleAllBranchesByFactor(lowRateFactor);
	3303	simulateTree stLowRate(trForSimForLowRate, *spForSim, &alph);
	3304	stLowRate.generate_seq(numOfPos2SimulateLowRate); // add 10% low rate simulations
	3305	sequenceContainer scLowRate = stLowRate.toSeqDataWithoutInternalNodes();
	3306	sc.concatenate(scLowRate);
	3307	}
	3308	if(writeSeq){
	3309	string strSeqNum = gainLossOptions::_outDir + "//" + "simulatedSeq" + ".fa";
	3310	ofstream seq_out(strSeqNum.c_str());
	3311	fastaFormat:: write(seq_out,sc);
	3312	}
	3313	time(&t2);
	3314	LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	3315	return sc;
	3316	}
	3317
	3318
	3319
	3320	/********************************************************************************************
	3321	Co-Evolution
	3322	*********************************************************************************************/
	3323	void gainLoss::findCoEvolvingSites(const int numberOfSequences2simulateForCoEvol) {
	3324	// 1. get the observed Vi array
	3325	if(_postProbPerCatPerPos.size()==0){ // to fill LpostPerCat - run computeRate4site()
	3326	rate4siteGL r4s(_sc,_tr,_sp,gainLossOptions::_outDir, _unObservableData_p);
	3327	r4s.run();
	3328	_rates = r4s.getRates();
	3329	_postProbPerCatPerPos = r4s.getLpostPerCat();
	3330	}
	3331	computeCountsGL countsGL(_sc,_tr,_sp,gainLossOptions::_outDir,_postProbPerCatPerPos,_distanceFromNearestOTUForRecent);
	3332	countsGL.run();
	3333	VVVVdouble posteriorsGivenTerminals; // probChangesForBranch[pos][nodeID][x][y]
	3334	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminals);
	3335	posteriorsGivenTerminals = countsGL.getExpChanges();
	3336
	3337
	3338	// 2. get the simulated Vi arrays
	3339	LOGnOUT(4,<<endl<< "simulating sequences with the same rate as _rates. numOfSequenceSets="<<numberOfSequences2simulateForCoEvol<<endl);
	3340	createDir(gainLossOptions::_outDir, "SimulatedSequences");
	3341
	3342	simulateSequences(numberOfSequences2simulateForCoEvol,_sc.seqLen(), gainLossOptions::_writeSeqSim,
	3343	gainLossOptions::_useTheSameSpForSim, gainLossOptions::_isReversibleSim, gainLossOptions::_gainEQlossSim,gainLossOptions::_rateDistributionTypeSim);
	3344
	3345	VVVVVdouble posteriorsGivenTerminalsSim; // posteriorsGivenTerminalsSim[Seq][pos][nodeID][x][y]
	3346	posteriorsGivenTerminalsSim.resize(numberOfSequences2simulateForCoEvol);
	3347	bool isSilent = true;
	3348	for(int i=0; i<numberOfSequences2simulateForCoEvol; ++i){
	3349	//re-open seq
	3350	gainLossAlphabet alph;
	3351	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedSequences" + "//" + "seq" + int2string(i+1) + ".fa";
	3352	ifstream in(strSeqNum.c_str());
	3353	sequenceContainer seqReOpened = recognizeFormat::read(in,&alph);
	3354
	3355	string outDirSeq = gainLossOptions::_outDir + "//" + "SimulatedSequences" + "//" + "seq" + int2string(i+1);
	3356	createDir(gainLossOptions::_outDir + "//" + "SimulatedSequences", "seq" + int2string(i+1));
	3357
	3358	computeCountsGL countsGL(seqReOpened,_tr,_sp,outDirSeq,_postProbPerCatPerPos, isSilent);
	3359	countsGL.run();
	3360	//countsGL.printExpectationPerBranch();
	3361	//countsGL.printProbabilityPerPosPerBranch();
	3362	//countsGL.printProbExp();
	3363
	3364	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminalsSim[i]);
	3365	posteriorsGivenTerminalsSim[i] = countsGL.getExpChanges();
	3366	}
	3367
	3368	// 3. Call a general class the finds co-evolving sites based on these VI arrays.
	3369	VVdouble correlations; //[pos][pos]. The correlation between position i and position j.
	3370	correlations.resize(_sc.seqLen());
	3371	for (int k=0; k < correlations.size(); ++k) correlations[k].resize(_sc.seqLen());
	3372
	3373	for (int i=0; i < posteriorsGivenTerminals.size() ; ++i) {
	3374	for (int j=i+1; j < posteriorsGivenTerminals.size() ; ++j) {
	3375	correlations[i][j] = computeCorrelationBetweenVis(posteriorsGivenTerminals[i],posteriorsGivenTerminals[j]);
	3376	}
	3377	}
	3378
	3379	// computing the correlations between the simulated sequences
	3380	VVVdouble correlationsSim; //[sim][pos][pos]
	3381	resizeVVV(numberOfSequences2simulateForCoEvol,_sc.seqLen(),_sc.seqLen(),correlationsSim);
	3382	for (int k=0; k < correlationsSim.size(); ++k) {
	3383	for (int i=0; i < posteriorsGivenTerminals.size() ; ++i) {
	3384	for (int j=i+1; j < posteriorsGivenTerminals.size() ; ++j) {
	3385	correlationsSim[k][i][j] = computeCorrelationBetweenVis(posteriorsGivenTerminalsSim[k][i],posteriorsGivenTerminalsSim[k][j]);
	3386	}
	3387	}
	3388	}
	3389
	3390	// sort and find where the actual corr is with respect to the simualted sequences.
	3391
	3392	// CoEvol glCoEvo(
	3393	// LOGnOUT(3,<<" starting to compute co evolving sites... "<<endl);
	3394	}
	3395
	3396	/********************************************************************************************
	3397	*********************************************************************************************/
	3398	MDOUBLE gainLoss::computeCorrelationBetweenVis(const VVVdouble & VIpos_i, const VVVdouble & VIpos_j
	3399	//, char corrType
	3400	)
	3401	{
	3402	// corrType will be 0 for correlations of 0>1 in both (the two positions underand
	3403	// the function gets as input two vectors of substitutions - one for position i and one for position j.
	3404	// it then computes the correlation between these two vectors by computing cov (vi, vj)/(sd(vi),sd(vj)).
	3405	// VIpos_i has the general structur [nodeId][char][char]
	3406	// 1. computing e(x,y)
	3407	MDOUBLE corr = 0.0;
	3408	MDOUBLE EXY = 0.0;
	3409	MDOUBLE EX = 0.0;
	3410	MDOUBLE EY = 0.0;
	3411	for (int i=0; i < VIpos_i.size(); ++i) {// going over all nodes
	3412	MDOUBLE tmp1 = VIpos_i[i][0][1]-VIpos_i[i][1][0];
	3413	MDOUBLE tmp2 = VIpos_j[i][0][1]-VIpos_j[i][1][0];
	3414	EX += tmp1;
	3415	EY += tmp2;
	3416	EXY += (tmp1*tmp2);
	3417	}
	3418	EXY /= VIpos_i.size();
	3419	EX /= VIpos_i.size();
	3420	EY /= VIpos_i.size();
	3421	corr = EXY-EX*EY;
	3422	return corr;
	3423	}
	3424
	3425	/********************************************************************************************
	3426	FlatSpBeforeOpt
	3427	*********************************************************************************************/
	3428	void gainLoss::FlatSpBeforeOpt(stochasticProcess& sp , unObservableData* unObservableData_p){
	3429	LOGnOUT(4,<<"WARNING: FlatSpBeforeOpt.. "<<endl);
	3430	bool isReversible = gainLossOptions::_isReversible;
	3431	bool optimizeAlpha = isAlphaOptimization(sp.distr());
	3432	bool optimizeBeta = isBetaOptimization(sp.distr());
	3433	//bool optimizeMixture = isMixOptimization(sp.distr());
	3434	bool probInvariant = isInvariantOptimization(sp.distr());
	3435	bool evalTheta = isThetaOptimization();
	3436
	3437	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setMu1(1,isReversible);
	3438	if (!isReversible){
	3439	static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->setMu2(1); }
	3440	if(optimizeAlpha){
	3441	setRateAlpha(sp.distr(),0.7); }
	3442	if(optimizeBeta){
	3443	setRateBeta(sp.distr(),0.7); }
	3444	if(evalTheta){
	3445	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setTheta(0.5);}
	3446	if(probInvariant){
	3447	static_cast<generalGammaDistributionPlusInvariant*>(sp.distr())->setInvProb(0.01);}
	3448	if(gainLossOptions::_isNormalizeQ)
	3449	normalizeQ(&sp);
	3450	if(unObservableData_p)
	3451	unObservableData_p->setLforMissingData(_tr,&sp);
	3452	}
	3453	/********************************************************************************************
	3454	*********************************************************************************************/
	3455	void gainLoss::FlatSpBeforeOpt(vector<vector<stochasticProcess> >& spVVec,distribution gainDist, distribution * lossDist, unObservableData* unObservableData_p){
	3456	LOGnOUT(4,<<"WARNING: FlatSpBeforeOpt.. "<<endl);
	3457	bool isReversible = gainLossOptions::_isReversible;
	3458	bool optimizeBetaGain = isBetaOptimization(gainDist);
	3459	bool optimizeBetaLoss = isBetaOptimization(lossDist);
	3460	bool optimizeGLProbInvariant = isInvariantOptimization(gainDist); // for both gain and loss
	3461	bool evalTheta = isThetaOptimization();
	3462
	3463	stochasticProcess sp = *spVVec[0][0];
	3464	bool optimizeRateAlpha = isAlphaOptimization((sp.distr()));
	3465	bool optimizeRateProbInvariant = isInvariantOptimization((sp.distr()));
	3466
	3467	updateGainAlpha(1,spVVec,gainDist,lossDist,false);
	3468	if(optimizeBetaGain) updateGainBeta(1,spVVec,gainDist,lossDist,false);
	3469	if(optimizeGLProbInvariant) {
	3470	updateGainProbInvariant(0.01,gainDist);
	3471	}
	3472	// Loss
	3473	if (!isReversible){
	3474	updateLossAlpha(1,spVVec,gainDist,lossDist,false);
	3475	if(optimizeBetaLoss) updateLossBeta(1,spVVec,gainDist,lossDist,false);
	3476	if(optimizeGLProbInvariant) {
	3477	updateGainProbInvariant(0.01,lossDist);
	3478	}
	3479	}
	3480	// overall rate
	3481	if(optimizeRateAlpha) updateRateAlpha(0.7,spVVec,gainDist,lossDist,false);
	3482	if(optimizeRateProbInvariant) updateRateProbInvariant(0.01,spVVec,gainDist,lossDist,false);
	3483
	3484	if(evalTheta) updateTheta(0.5,spVVec,gainDist,lossDist,false);
	3485	normalizeQ(spVVec,gainDist,lossDist);
	3486	if(unObservableData_p)
	3487	_unObservableData_p->setLforMissingData(_tr,spVVec,gainDist,lossDist);
	3488	}
	3489
	3490
	3491
	3492
	3493
	3494
	3495	// prints
	3496	/********************************************************************************************
	3497	printOptionParameters
	3498	*********************************************************************************************/
	3499	void gainLoss::printOptionParameters(ostream & out) {
	3500	LOGnOUT(4,<<"\n ---------------------- THE PARAMETERS ----------------------------"<<endl);
	3501	if(gainLossOptions::_gainEQloss)
	3502	LOGnOUT(4,<<"gain=loss model is used. =>freq(0)=freq(1)."<<endl);
	3503	if(Parameters::getInt("_accountForMissingData")){
	3504	LOGnOUT(4,<<"Likelihood computation is performed while acounting for un-oberved data"<<endl);
	3505	LOGnOUT(4,<<"With min number of presences('1's)= "<<Parameters::getInt("_minNumOfOnes")<<endl);
	3506	LOGnOUT(4,<<"With min number of absences('0's)= "<<Parameters::getInt("_minNumOfZeros")<<endl);
	3507	}
	3508	if(!gainLossOptions::_isReversible && !gainLossOptions::_isRootFreqEQstationary){
	3509	LOGnOUT(4,<<"Fixed-Root ('Non-Rev') model is used"<<endl);}
	3510	else{
	3511	LOGnOUT(4,<<"'Reversible'(Root.freq==stationary.freq) model is used"<<endl);}
	3512
	3513	if(gainLossOptions::_isRootFreqEQstationary){
	3514	LOGnOUT(4,<<"RootFreq EQ stationary (taken from each sp - gain/(gain+loss) )"<<endl);
	3515	}
	3516	else
	3517	{
	3518	switch (gainLossOptions::_characterFreqEval){
	3519	case (gainLossOptions::FiftyFifty):
	3520	LOGnOUT(4,<<"frequencies were set to FiftyFifty "<<endl);
	3521	break;
	3522	case (gainLossOptions::LeavesAve):
	3523	LOGnOUT(4,<<"frequencies are based on LeavesAve (-F option) "<<endl);
	3524	break;
	3525	case (gainLossOptions::optimizeOverTree):
	3526	LOGnOUT(4,<<"frequencies (root '1'(Theta)/'0' freq) are model-based"<<endl);
	3527	break;
	3528	}
	3529	}
	3530	if (gainLossOptions::_treeFile.size()>0) LOGnOUT(4,<<"inTree file: "<< gainLossOptions::_treeFile<<endl);
	3531	LOGnOUT(4,<<"inSeq file: "<<gainLossOptions::_seqFile<<endl);
	3532	if (strcmp(gainLossOptions::_referenceSeq.c_str(),"non")!=0) LOGnOUT(4,<<"reference sequence is: "<<gainLossOptions::_referenceSeq<<endl);
	3533	LOGnOUT(4,<<"log: "<<gainLossOptions::_logFile<<" with level= "<<gainLossOptions::_logValue<<endl);
	3534	//LOGnOUT(4,<<"outDir: "<<gainLossOptions::_outDir<<endl);
	3535
	3536	// _gainLossDist
	3537	if (gainLossOptions::_gainLossDist) {
	3538	if (gainLossOptions::_gainLossDistPlusInvariant) {
	3539	LOGnOUT(4,<<"gain , loss ~ GammaPlusInvariant(Alpha,Beta) "<<endl);
	3540	LOGnOUT(4,<<"gain - a Gamma prior distribution with: "<<gainLossOptions::_numberOfGainCategories+1<< " categories (1 Invariant)"<<endl);
	3541	LOGnOUT(4,<<"loss - a Gamma prior distribution with: "<<gainLossOptions::_numberOfLossCategories+1<< " categories (1 Invariant)"<<endl);
	3542	}
	3543	else {
	3544	LOGnOUT(4,<<"gain , loss ~ Gamma(Alpha,Beta) "<<endl);
	3545	LOGnOUT(4,<<"gain - a Gamma prior distribution with: "<<gainLossOptions::_numberOfGainCategories<< " categories"<<endl);
	3546	LOGnOUT(4,<<"loss - a Gamma prior distribution with: "<<gainLossOptions::_numberOfLossCategories<< " categories"<<endl);
	3547	}
	3548	}
	3549	// _performOptimizations
	3550	if(gainLossOptions::_performOptimizations){
	3551	LOGnOUT(4,<< "Optimization of the model parmeters is performed"<<endl);
	3552	if(gainLossOptions::_performOptimizationsManyStarts)
	3553	LOGnOUT(4,<< "performOptimizationsManyStarts with numStarts= "<< gainLossOptions::_numberOfRandPointsInOptimization<<endl);
	3554	switch (gainLossOptions::_rateEstimationMethod){
	3555	case (gainLossOptions::ebExp):
	3556	{
	3557	if(gainLossOptions::_rateDistributionType == gainLossOptions::GAMMA){
	3558	LOGnOUT(4,<< "rate inference method is: empirical Bayesian estimate"<<endl);
	3559	LOGnOUT(4,<< "using a Gamma prior distribution with: "<<gainLossOptions::_numberOfRateCategories<< " discrete categories"<<endl);
	3560	}
	3561	else if(gainLossOptions::_rateDistributionType == gainLossOptions::GAMMA_MIXTURE){
	3562	LOGnOUT(4,<< "rate inference method is: empirical Bayesian estimate"<<endl);
	3563	LOGnOUT(4,<< "using a GAMMA_MIXTURE distribution with: "<<gainLossOptions::_numberOfRateComponents<< " components, "<< gainLossOptions::_numberOfRateCategories<< " categories"<<endl);
	3564	if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::EM) LOGnOUT(4,<< "Optimize the Alpha and Beta parameters with EM algorithm"<<endl);
	3565	if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::ONE_DIM) LOGnOUT(4,<< "Optimize the Alpha and Beta parameters with ONE_DIM algorithm"<<endl);
	3566	}break;
	3567	}
	3568	case (gainLossOptions::mlRate):
	3569	LOGnOUT(4,<< "rate inference method is: maximum likelihood (ML) "<<endl); break;
	3570	}
	3571
	3572	if(Parameters::getInt("_performOptimizationsBBL")){
	3573	if(gainLossOptions::_isBblLS){
	3574	LOGnOUT(4,<<"branch lengths optimization is performed using 'Line-Search'"<<endl);}
	3575	else{
	3576	LOGnOUT(4,<<"branch lengths optimization is performed using 'BBL-EM'"<<endl);}
	3577	}
	3578	else{
	3579	LOGnOUT(4,<<"branch lengths are not optimized"<<endl); }
	3580	}
	3581	else{
	3582	LOGnOUT(4,<< "No optimization is performed"<<endl);
	3583	}
	3584
	3585
	3586	if(gainLossOptions::_isHGT_normal_Pij){
	3587	//LOGnOUT(4,<<"'Normal' model used with: P01 = gain/(-eigenvalue)-exp(eigenvalued)(1-loss/(-eigenvalue))"<<endl);
	3588	}
	3589	else {
	3590	LOGnOUT(4,<<"The replacement model not allows HGT: P01 = epsilon*d"<<endl); }
	3591	if(gainLossOptions::_isHGT_with_Q){
	3592	//LOGnOUT(4,<<"'Normal' replacement model is used with gain => 0"<<endl);
	3593	}
	3594	else {
	3595	LOGnOUT(4,<<"The replacement model with gain = 0"<<endl); }
	3596
	3597	if (gainLossOptions::_calculateRate4site) {
	3598	LOGnOUT(4,<<"rate4site is calculated "<<endl);
	3599	}
	3600	if (gainLossOptions::_gainLossDist&&gainLossOptions::_calculeGainLoss4site) {
	3601	LOGnOUT(4,<<"gain and loss 4site are calculated "<<endl);
	3602	}
	3603	if(gainLossOptions::_calculePosteriorExpectationOfChange){
	3604	if(gainLossOptions::_isAnaliticComputeJumps){
	3605	LOGnOUT(4,<<"calculePosteriorExpectationOfChange is done Analytically"<<endl);}
	3606	else{
	3607	LOGnOUT(4,<<"calculePosteriorExpectationOfChange is done with "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations"<<endl);}
	3608	}
	3609	LOGnOUT(4,<<"-------------------------------------------------------------------"<<endl);
	3610	}
	3611	/********************************************************************************************
	3612	printPij_t
	3613	*********************************************************************************************/
	3614	void gainLoss::printPij_t(MDOUBLE dist, ostream& out){
	3615	out<<"-------------------------------"<<endl<<"The Pij("<<dist<<"): Matrix:"<<endl;
	3616	if(gainLossOptions::_gainLossDist){
	3617	MDOUBLE spPij_t00=0;
	3618	MDOUBLE spPij_t01=0;
	3619	MDOUBLE spPij_t10=0;
	3620	MDOUBLE spPij_t11=0;
	3621	int numOfSPs = _gainDist->categories()*_lossDist->categories();
	3622	for (int i=0; i < numOfSPs; ++i) {
	3623	int gainIndex =fromIndex2gainIndex(i,_gainDist->categories(),_lossDist->categories());
	3624	int lossIndex =fromIndex2lossIndex(i,_gainDist->categories(),_lossDist->categories());
	3625	spPij_t00 += _spVVec[gainIndex][lossIndex]->Pij_t(0,0,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3626	spPij_t01 += _spVVec[gainIndex][lossIndex]->Pij_t(0,1,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3627	spPij_t10 += _spVVec[gainIndex][lossIndex]->Pij_t(1,0,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3628	spPij_t11 += _spVVec[gainIndex][lossIndex]->Pij_t(1,1,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3629	}
	3630	out<<"p0,0["<<dist<<"]: "<<spPij_t00<<endl;
	3631	out<<"p0,1["<<dist<<"]: "<<spPij_t01<<endl;
	3632	out<<"p1,0["<<dist<<"]: "<<spPij_t10<<endl;
	3633	out<<"p1,1["<<dist<<"]: "<<spPij_t11<<endl;
	3634	out<<endl;
	3635	}
	3636	else{
	3637	out<<"p0,0["<<dist<<"]: "<<_sp->Pij_t(0,0,dist)<<endl;
	3638	out<<"p0,1["<<dist<<"]: "<<_sp->Pij_t(0,1,dist)<<endl;
	3639	out<<"p1,0["<<dist<<"]: "<<_sp->Pij_t(1,0,dist)<<endl;
	3640	out<<"p1,1["<<dist<<"]: "<<_sp->Pij_t(1,1,dist)<<endl;
	3641	out<<endl;
	3642	}
	3643	}
	3644	/********************************************************************************************
	3645	printQ
	3646	*********************************************************************************************/
	3647	void gainLoss::printQ(ostream& out){
	3648	VVdouble Q;
	3649	out<<"-------------------------------"<<endl<<"The Q Matrix:"<<endl;
	3650	if(gainLossOptions::_gainLossDist){
	3651	MDOUBLE spPij_t00=0;
	3652	MDOUBLE spPij_t01=0;
	3653	MDOUBLE spPij_t10=0;
	3654	MDOUBLE spPij_t11=0;
	3655	int numOfSPs = _gainDist->categories()*_lossDist->categories();
	3656	for (int i=0; i < numOfSPs; ++i) {
	3657	int gainIndex =fromIndex2gainIndex(i,_gainDist->categories(),_lossDist->categories());
	3658	int lossIndex =fromIndex2lossIndex(i,_gainDist->categories(),_lossDist->categories());
	3659	Q = (static_cast<gainLossModel*>(_spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->getQ());
	3660	spPij_t00 += Q[0][0]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3661	spPij_t01 += Q[0][1]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3662	spPij_t10 += Q[1][0]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3663	spPij_t11 += Q[1][1]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex);
	3664	}
	3665	out<<"Q[0][0]= "<<spPij_t00<<endl;
	3666	out<<"Q[0][1]= "<<spPij_t01<<endl;
	3667	out<<"Q[1][0]= "<<spPij_t10<<endl;
	3668	out<<"Q[1][1]= "<<spPij_t11<<endl;
	3669	out<<endl;
	3670	}
	3671	else{
	3672	VVdouble Q = (static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getQ());
	3673	//out<<"freq[0]Q[0][1]= "<<(1-static_cast<gainLossModel>(_sp->getPijAccelerator()->getReplacementModel())->getTheta())*Q[0][1]<<endl;
	3674	//out<<"freq[1]Q[1][0]= "<<(static_cast<gainLossModel>(_sp->getPijAccelerator()->getReplacementModel())->getTheta())*Q[1][0]<<endl;
	3675	out<<"Q[0][0]= "<<Q[0][0] <<endl;
	3676	out<<"Q[0][1]= "<<Q[0][1] <<endl;
	3677	out<<"Q[1][0]= "<<Q[1][0] <<endl;
	3678	out<<"Q[1][1]= "<<Q[1][1] <<endl;
	3679	out<<endl;
	3680	}
	3681	}
	3682	/********************************************************************************************
	3683	printTreeLikelihoodAllPosAlphTheSame
	3684	*********************************************************************************************/
	3685	void gainLoss::printTreeLikelihoodAllPosAlphTheSame(bool isLOGnOUT ,ostream& out)
	3686	{
	3687	MDOUBLE res;
	3688	if(!gainLossOptions::_gainLossDist){
	3689	res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p);
	3690	out.precision(9);
	3691	if(isLOGnOUT){
	3692	LOGnOUT(3,<<"The Tree Likelihood AllPosAlphTheSame is "<<res<<endl);}
	3693	else{
	3694	out<<"The Tree Likelihood AllPosAlphTheSame is "<<res<<endl;}
	3695	}
	3696	else{
	3697	res = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	3698	out.precision(9);
	3699	if(isLOGnOUT){
	3700	LOGnOUT(3,<<"The Tree Likelihood AllPosAlphTheSame is "<<res<<endl);}
	3701	else{
	3702	out<<"The Tree Likelihood AllPosAlphTheSame is "<<res<<endl;}
	3703	//res = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSameNoComputeUp(_tr,_sc,_spVVec,_gainDist,_lossDist);
	3704	//out<<"The Tree Likelihood AllPosAlphTheSameNoComputeUp is "<<res<<endl;
	3705	}
	3706	_logL = res; // update the tree likelihood.
	3707	}
	3708
	3709
	3710	/********************************************************************************************
	3711	printLofPosBothModels
	3712	*********************************************************************************************/
	3713	void gainLoss::printLofPosBothModels(){
	3714	LOGnOUT(4,<<"Starting printLofPosBothModels..."<<endl);
	3715	string LofPosBothModels = gainLossOptions::_outDir + "//" + "printLofPosBothModels.txt";
	3716	ofstream likeOfPosBothModelsStream(LofPosBothModels.c_str());
	3717	likeOfPosBothModelsStream.precision(PRECISION);
	3718
	3719	MDOUBLE treeL = printLofPosBothModels(likeOfPosBothModelsStream);
	3720	LOGnOUT(4,<<"treeL= "<<treeL<<endl);
	3721	}
	3722
	3723	/********************************************************************************************
	3724	printLofPosBothModels
	3725	*********************************************************************************************/
	3726	MDOUBLE gainLoss::printLofPosBothModels(ostream& out){
	3727
	3728	//MDOUBLE mu1 = static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getMu1();
	3729	MDOUBLE res =0;
	3730	unObservableData* unObservableData_p_0;
	3731	// single stochastic process
	3732	if(!gainLossOptions::_gainLossDist){
	3733	out<<"The likelihood of each pos when gain and loss are scalars"<<endl;
	3734
	3735	computePijGam piModel_0, piModel_1;
	3736	piModel_1.fillPij(_tr,*_sp);
	3737
	3738	stochasticProcess* spModel_0 = _sp->clone();
	3739	//static_cast<gainLossModel>((_sp).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible);
	3740	static_cast<gainLossModel>((spModel_0).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible); //NO NEED to update since the _sp is byRef
	3741
	3742	piModel_0.fillPij(_tr,*spModel_0);
	3743	if(_unObservableData_p){
	3744	unObservableData_p_0 = new unObservableData(_sc, spModel_0, gainLossAlphabet(),Parameters::getInt("_minNumOfOnes"), Parameters::getInt("_minNumOfZeros"));
	3745	unObservableData_p_0->setLforMissingData(_tr,spModel_0);
	3746	}
	3747	else
	3748	unObservableData_p_0 = NULL;
	3749
	3750
	3751	MDOUBLE LnofPos_Model_0, LnofPos_Model_1;
	3752	int k;
	3753	out<<"POS"<<"\t"<<"M_gain0"<<"\t"<<"M"<<"\t"<<"Diff"<<endl;
	3754	for (k=0; k < _scWithFullLength.seqLen(); ++k) {
	3755	LnofPos_Model_0 = log(likelihoodComputation::getLofPos(k,_tr,_scWithFullLength,piModel_0,*spModel_0));
	3756	LnofPos_Model_1 = log(likelihoodComputation::getLofPos(k,_tr,_scWithFullLength,piModel_1,*_sp));
	3757	if(_unObservableData_p){
	3758	LnofPos_Model_1 = LnofPos_Model_1 - log(1- exp(_unObservableData_p->getlogLforMissingData()));
	3759	LnofPos_Model_0 = LnofPos_Model_0 - log(1- exp(unObservableData_p_0->getlogLforMissingData()));
	3760	}
	3761	res += LnofPos_Model_0;
	3762	out<<k+1<<"\t"<<LnofPos_Model_0<<"\t"<<LnofPos_Model_1<<"\t"<<LnofPos_Model_1-LnofPos_Model_0<<endl;
	3763	}
	3764	if(unObservableData_p_0) delete unObservableData_p_0;
	3765	return res;
	3766	}
	3767	// multiple stochastic processes
	3768	else{
	3769	out<<"The likelihood of each pos when gain and loss ~Gamma(Alpha,Beta)"<<endl;
	3770	out<<"...Not implemented (yet)"<<endl;
	3771	return res;
	3772	}
	3773	}
	3774
	3775	/********************************************************************************************
	3776	printLofPos
	3777	*********************************************************************************************/
	3778	void gainLoss::printLofPos(){
	3779	LOGnOUT(4,<<"Starting printLofPos..."<<endl);
	3780	//ofstream likeOfPosStream(gainLossOptions::_outFileLikeofPos.c_str());
	3781	string g4s = gainLossOptions::_outDir + "//" + "likeOfPos.txt";
	3782	ofstream likeOfPosStream(g4s.c_str());
	3783	likeOfPosStream.precision(PRECISION);
	3784	MDOUBLE treeL = printLofPos(likeOfPosStream);
	3785	cout<<"Tree logL="<<treeL<<"\n";
	3786	}
	3787
	3788	/********************************************************************************************
	3789	*********************************************************************************************/
	3790	MDOUBLE gainLoss::printLofPos(ostream& out){
	3791
	3792	MDOUBLE res =0;
	3793	out<<"# log Likelihood of tree (entire data)="<<"\t"<<_logL<<endl;
	3794	out<<"POS"<<"\t"<<"logLofPos"<<endl;
	3795	if(!gainLossOptions::_gainLossDist){
	3796	//out<<"The likelihood of each pos when gain and loss are scalars"<<endl;
	3797	computePijGam pi;
	3798	pi.fillPij(_tr,*_sp);
	3799	MDOUBLE LnofPos;
	3800	for (int k=0; k < _scWithFullLength.seqLen(); ++k) {
	3801	LnofPos = log(likelihoodComputation::getLofPos(k,_tr,_scWithFullLength,pi,*_sp));
	3802	if(_unObservableData_p)
	3803	LnofPos = LnofPos - log(1- exp(_unObservableData_p->getlogLforMissingData()));
	3804	res += LnofPos;
	3805	out<<k+1<<"\t"<<LnofPos<<endl;
	3806	// DEB
	3807	if(LnofPos>0)
	3808	out<<k+1<<"\t"<<LnofPos<<endl;
	3809	}
	3810	return res;
	3811	}
	3812	else{
	3813	int numOfRateCategories = _spVVec[0][0]->categories();
	3814	vector<computePijGam> pi_vec(numOfRateCategories);
	3815	vector<suffStatGlobalGam> ssc_vec(numOfRateCategories);
	3816	vector<computeUpAlg> cup_vec(numOfRateCategories);
	3817	likelihoodComputationGL::fillPijAndUp(_tr,_sc, _spVVec,_gainDist,_lossDist,pi_vec,ssc_vec,cup_vec);
	3818	Vdouble posLike;
	3819	res = likelihoodComputationGL::getTreeLikelihoodFromUp2(_tr,_sc,_spVVec,ssc_vec,_gainDist, _lossDist,NULL,_unObservableData_p,&posLike);
	3820	for (int k=0; k < _sc.seqLen(); ++k) {
	3821	out<<k+1<<"\t"<<posLike[k]<<endl;
	3822	}
	3823	return res;
	3824	}
	3825	}
	3826	/********************************************************************************************
	3827	Util - printLikelihoodLandscape
	3828	*********************************************************************************************/
	3829	void gainLoss::printLikelihoodLandscape(stochasticProcess* sp){
	3830	LOGnOUT(4,<<"start printLikelihoodLandscape for: ..."<<endl);
	3831	if(gainLossOptions::_printLikelihoodLandscapeAlphaRate)
	3832	LOGnOUT(4,<<" AlphaRate"<<endl);
	3833	if(gainLossOptions::_printLikelihoodLandscapeGainLoss)
	3834	LOGnOUT(4,<<"Gain and Loss"<<endl);
	3835	if(gainLossOptions::_printLikelihoodLandscapeTheta)
	3836	LOGnOUT(4,<<"Theta"<<endl);
	3837
	3838	stochasticProcess* spTemp = sp->clone();
	3839	string LikelihoodLandscape = gainLossOptions::_outDir + "//" + "LikelihoodLandscape.txt";
	3840	ofstream LikelihoodLandscapeStream(LikelihoodLandscape.c_str());
	3841	LikelihoodLandscapeStream.precision(PRECISION);
	3842	LikelihoodLandscapeStream<<"Alpha"<<"\t"<<"Gain"<<"\t"<<"Loss"<<"\t"<<"Theta"<<"\t"<<"L"<<endl;
	3843	cout<<"Alpha"<<"\t"<<"Gain"<<"\t"<<"Loss"<<"\t"<<"Theta"<<"\t"<<"L"<<endl;
	3844
	3845	bool optimizeAlpha = isAlphaOptimization(_sp->distr());
	3846	//bool optimizeBeta = isBetaOptimization(_sp->distr());
	3847	//bool optimizeMixture = isMixOptimization(_sp->distr());
	3848	//bool probInvariant = isInvariantOptimization(_sp->distr());
	3849	bool evalTheta = isThetaOptimization();
	3850
	3851	MDOUBLE AlphaRate,Gain,Loss,Theta;
	3852	MDOUBLE Increment = 0.01;
	3853	int BigEnoughToEndLoop = 100000000;
	3854	MDOUBLE LL;
	3855
	3856	// get all original values
	3857	if(optimizeAlpha)
	3858	AlphaRate =static_cast<gammaDistribution*>(spTemp->distr())->getAlpha();
	3859	Gain = static_cast<gainLossModel>((spTemp).getPijAccelerator()->getReplacementModel())->getMu1();
	3860	if(!gainLossOptions::_isReversible)
	3861	Loss= static_cast<gainLossModelNonReversible>((spTemp).getPijAccelerator()->getReplacementModel())->getMu2();
	3862	if(evalTheta)
	3863	Theta= static_cast<gainLossModel>((spTemp).getPijAccelerator()->getReplacementModel())->getTheta();
	3864
	3865	// start the 1-3way loop for landscape
	3866	for (int i=1; i*Increment<=gainLossOptions::_userAlphaRateMax; i++){
	3867	if(gainLossOptions::_printLikelihoodLandscapeAlphaRate){
	3868	AlphaRate = i*Increment;
	3869	if(optimizeAlpha) setRateAlpha(spTemp->distr(),AlphaRate);
	3870	}
	3871	else
	3872	i=BigEnoughToEndLoop;
	3873	for (int j=1; j*Increment<=gainLossOptions::_userGainMax; j++){
	3874	if(gainLossOptions::_printLikelihoodLandscapeGainLoss){
	3875	Gain = j*Increment;
	3876	static_cast<gainLossModel*>(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible);
	3877	}
	3878	else
	3879	j=BigEnoughToEndLoop;
	3880	for (int k=1; k*Increment<=gainLossOptions::_userLossMax; k++){
	3881	if(gainLossOptions::_printLikelihoodLandscapeGainLoss){
	3882	Loss = k*Increment;
	3883	if (!gainLossOptions::_isReversible) static_cast<gainLossModelNonReversible*>(spTemp->getPijAccelerator()->getReplacementModel())->setMu2(Loss);
	3884	}
	3885	else
	3886	k=BigEnoughToEndLoop;
	3887	for (int l=1; l*Increment<=gainLossOptions::_userThetaMax; l++){
	3888	if(gainLossOptions::_printLikelihoodLandscapeTheta){
	3889	Theta = l*Increment;
	3890	if(evalTheta) static_cast<gainLossModel*>(spTemp->getPijAccelerator()->getReplacementModel())->setTheta(Theta);
	3891	}
	3892	else
	3893	l=BigEnoughToEndLoop;
	3894	LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,_unObservableData_p);
	3895	LikelihoodLandscapeStream<<AlphaRate<<"\t"<<Gain<<"\t"<<Loss<<"\t"<<Theta<<"\t"<<LL<<endl;
	3896	cout<<AlphaRate<<"\t"<<Gain<<"\t"<<Loss<<"\t"<<Theta<<"\t"<<LL<<endl;
	3897	}
	3898	}
	3899	}
	3900	}
	3901	delete spTemp;
	3902	}
	3903	/********************************************************************************************
	3904	printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio
	3905	*********************************************************************************************/
	3906	void gainLoss::printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio(){
	3907	bool isCloneEveryIteration = false; // otherwise same model in all iteration (other than the update)
	3908	stochasticProcess* spTemp=NULL;
	3909	vector<vector<stochasticProcess*> > spVVec;
	3910	unObservableData* unObservableData_p=NULL;
	3911	distribution* gainDist=NULL;
	3912	distribution* lossDist=NULL;
	3913	tree tempTree = _tr;
	3914	if(_unObservableData_p)
	3915	unObservableData_p = _unObservableData_p->clone();
	3916
	3917
	3918	if(gainLossOptions::_gainLossDist){
	3919	LOGnOUT(4,<<"start printLikelihoodLandscape for: gainLossDist (mixture) with gainLoss ratio and Theta (Root'1'Freq)"<<endl);
	3920	LOGnOUT(4,<<"increment="<<gainLossOptions::_likelihoodLandscapeIncrement<<endl);
	3921	//LOGnOUT(4,<<"WARNING: the _spVVec,_gainDist,_lossDist are overwritten"<<endl);
	3922	cloneSpVVec(_spVVec,spVVec);
	3923	gainDist = _gainDist->clone();
	3924	lossDist = _lossDist->clone();
	3925	if(gainLossOptions::_optBBL_LS_InIteration \|\| gainLossOptions::_optBBL_EM_InIteration){
	3926	errorMsg::reportError("Error: BBL not implemented with gainLossDist for printLikelihoodLandscape\n");
	3927	}
	3928	}
	3929	else{
	3930	if(!gainLossOptions::_gainLossRateAreFreq){
	3931	LOGnOUT(4,<<"WARNING:: choose _gainLossRateAreFreq for printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio\n");
	3932	}
	3933	LOGnOUT(4,<<"start printLikelihoodLandscape for: Gain (Stationary'1'Freq) and Theta (Root'1'Freq)"<<endl);
	3934	LOGnOUT(4,<<"increment="<<gainLossOptions::_likelihoodLandscapeIncrement<<endl);
	3935	LOGnOUT(4,<<"gainLossOptions::_optAlphaInIteration="<<gainLossOptions::_optAlphaInIteration<<" optBBL_LS_InIteration="<<gainLossOptions::_optBBL_LS_InIteration<<" optBBL_EM_InIteration="<<gainLossOptions::_optBBL_EM_InIteration<<endl);
	3936	spTemp = _sp->clone();
	3937	}
	3938
	3939	bool optimizeAlpha=false;
	3940	if(!gainLossOptions::_gainLossDist)
	3941	optimizeAlpha= isAlphaOptimization((*spTemp).distr());
	3942	string LikelihoodLandscape = gainLossOptions::_outDir + "//" + "LikelihoodLandscape.txt";
	3943	ofstream LikelihoodLandscapeStream(LikelihoodLandscape.c_str());
	3944	LikelihoodLandscapeStream<<"StationaryFreq"<<"\t"<<"Theta"<<"\t"<<"Like"<<endl;
	3945	LOGnOUT(4,<<"StationaryFreq"<<"\t"<<"Theta"<<"\t"<<"Like"<<endl);
	3946	MDOUBLE AlphaGain,AlphaLoss, BetaGain,BetaLoss, gainLossRatioToCompleteByBeta, ratio =1;
	3947	if(gainLossOptions::_gainLossDist){
	3948	AlphaGain = getRateAlpha(_gainDist);
	3949	AlphaLoss = getRateAlpha(_lossDist);
	3950	}
	3951
	3952	MDOUBLE Gain,Theta=1;
	3953	MDOUBLE Increment = gainLossOptions::_likelihoodLandscapeIncrement;
	3954	MDOUBLE LL=1;
	3955
	3956	MDOUBLE optLike=1;
	3957	MDOUBLE AlphaRate=1;
	3958	MDOUBLE currAlpha=1;
	3959
	3960	MDOUBLE tollForPairwiseDist=0.01; // the BBL default, epsilon per branch (brent's value)
	3961	MDOUBLE bblEMfactor = 10;
	3962	int numberOfBranchs = _tr.getNodesNum();
	3963	MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/5; // (is 1.5) for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	3964	epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	3965	MDOUBLE epsilonOptimizationBBLIter = gainLossOptions::_epsilonOptimizationBBL*epsilonOptimizationIterFactor/bblEMfactor; // The next iteration epsilon, multiply per-branch value
	3966
	3967	// get all original values
	3968	if(gainLossOptions::_optAlphaInIteration){
	3969	if(optimizeAlpha)
	3970	AlphaRate =static_cast<gammaDistribution*>(spTemp->distr())->getAlpha();
	3971	}
	3972
	3973
	3974	//////////////////////////////////////////////////////////////////////////
	3975	for (int j=1; j*Increment<=0.99999; j++){
	3976	Gain = j*Increment;
	3977	if(gainLossOptions::_gainLossDist){
	3978	ratio = Gain/(1-Gain);
	3979	gainLossRatioToCompleteByBeta = ratio*(AlphaLoss/AlphaGain);
	3980	BetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35
	3981	BetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9
	3982	updateGainBeta(BetaGain,spVVec,_gainDist,_lossDist);
	3983	updateLossBeta(BetaLoss,spVVec,_gainDist,_lossDist);
	3984	}
	3985	else{
	3986	if(gainLossOptions::_gainLossRateAreFreq)
	3987	static_cast<gainLossModel*>(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible);
	3988	else{
	3989	static_cast<gainLossModel*>(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible);
	3990	static_cast<gainLossModelNonReversible*>(spTemp->getPijAccelerator()->getReplacementModel())->setMu2((1-Gain));
	3991	}
	3992	}
	3993
	3994	//////////////////////////////////////////////////////////////////////////
	3995	for (int l=1; l*Increment<=0.99999; l++){
	3996	tree tempTree = _tr;
	3997	Theta = l*Increment;
	3998	if(gainLossOptions::_gainLossDist){
	3999	updateTheta(Theta,spVVec,_gainDist,_lossDist);
	4000	if(unObservableData_p) unObservableData_p->setLforMissingData(_tr,spVVec,_gainDist,_lossDist); // No need?
	4001	}
	4002	else{
	4003	static_cast<gainLossModel*>(spTemp->getPijAccelerator()->getReplacementModel())->setTheta(Theta);
	4004	if(unObservableData_p) unObservableData_p->setLforMissingData(_tr,_sp);
	4005	}
	4006
	4007	if(gainLossOptions::_gainLossDist){
	4008	LL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec,_gainDist,_lossDist,_weightsUniqPatterns,unObservableData_p);
	4009	}
	4010	else{
	4011	LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,unObservableData_p);
	4012	}
	4013	// BBL-LS
	4014	if(gainLossOptions::_optBBL_LS_InIteration){
	4015	bblLS bbl;
	4016	LL = bbl.optimizeBranches(tempTree,spTemp,_sc,_weightsUniqPatterns,unObservableData_p,1, gainLossOptions::_epsilonOptimizationBBL, gainLossOptions::_maxNumOfIterationsBBL,LL);
	4017	}
	4018	// BBL-EM
	4019	if(gainLossOptions::_optBBL_EM_InIteration){
	4020	bblEM bblEM1(tempTree, _sc, spTemp, NULL, (int)(gainLossOptions::_maxNumOfIterationsBBLbblEMfactor), epsilonOptimizationBBLIter,tollForPairwiseDist,unObservableData_p,&LL);
	4021	LL = bblEM1.getTreeLikelihood();
	4022	}
	4023	// optAlpha
	4024	if(optimizeAlpha && gainLossOptions::_optAlphaInIteration){
	4025	LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,unObservableData_p);
	4026	optLike = -brent(MINIMUM_ALPHA_PARAM,AlphaRate,MAXIMUM_ALPHA_PARAM
	4027	,C_evalParam(_tr,*spTemp,_sc,C_evalParam::rateAlpha,gainLossOptions::_isReversible,_weightsUniqPatterns,unObservableData_p),gainLossOptions::_epsilonOptimizationModel,&currAlpha);
	4028	if (optLike>LL)
	4029	setRateAlpha(spTemp->distr(),currAlpha);
	4030	}
	4031	LikelihoodLandscapeStream<<Gain<<"\t"<<Theta<<"\t"<<LL<<endl;
	4032	LOGnOUT(4,<<Gain<<"\t"<<Theta<<"\t"<<LL<<endl);
	4033	// clone every iteration (is needed?)
	4034	tempTree = _tr;
	4035	if(isCloneEveryIteration){
	4036	if(gainLossOptions::_gainLossDist){
	4037	deleteSpVVec(&spVVec);
	4038	cloneSpVVec(_spVVec,spVVec);
	4039	delete(gainDist);
	4040	gainDist = _gainDist->clone();
	4041	delete(lossDist);
	4042	lossDist = _lossDist->clone();
	4043	if(unObservableData_p){
	4044	delete unObservableData_p;
	4045	unObservableData_p = _unObservableData_p->clone();
	4046	}
	4047
	4048	}
	4049	else{
	4050	delete(spTemp);
	4051	spTemp = _sp->clone();
	4052	if(unObservableData_p){
	4053	delete unObservableData_p;
	4054	unObservableData_p = _unObservableData_p->clone();
	4055	}
	4056	}
	4057	}
	4058	}
	4059	}
	4060	// final deletions
	4061	if(spTemp)
	4062	delete(spTemp);
	4063	if(spVVec.size()>0)
	4064	deleteSpVVec(&spVVec);
	4065	if(gainDist)
	4066	delete gainDist;
	4067	if(lossDist)
	4068	delete lossDist;
	4069	if(unObservableData_p)
	4070	delete unObservableData_p;
	4071
	4072	}
	4073
	4074
	4075
	4076
	4077
	4078
	4079
	4080
	4081	/********************************************************************************************
	4082	*********************************************************************************************/
	4083	void gainLoss::initMixtureParams(Vdouble& initAlphaRates, Vdouble& initBetaRates, Vdouble& initCompProbRates, int numOfGammaComp,
	4084	MDOUBLE initAlphaRate, MDOUBLE initBetaRate, MDOUBLE initCompProbRate)
	4085	{
	4086	initAlphaRates.resize(numOfGammaComp);
	4087	initBetaRates.resize(numOfGammaComp);
	4088	initCompProbRates.resize(numOfGammaComp);
	4089	for (int i = 0; i < numOfGammaComp; ++i)
	4090	{
	4091	initAlphaRates[i] = initAlphaRate*(numOfGammaComp-i)/numOfGammaComp;
	4092	initBetaRates[i] = initBetaRate*(i+1)/numOfGammaComp;
	4093	initCompProbRates[i] = initCompProbRate/numOfGammaComp;
	4094	}
	4095	}
	4096
	4097
	4098	/********************************************************************************************
	4099	*********************************************************************************************/
	4100	void gainLoss::convertGainLossRatesToFreq(){
	4101	LOGnOUT(4,<<"Starting convertGainLossRatesToFreq..."<<endl);
	4102	MDOUBLE gainLossSum = 0.0;
	4103	if(!gainLossOptions::_gainLossDist){
	4104	MDOUBLE gain = static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->getMu1();
	4105	MDOUBLE loss = static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->getMu2();
	4106	gainLossSum = gain+loss;
	4107	static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->setMu1(gain/gainLossSum,gainLossOptions::_isReversible);
	4108	static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->setMu2(loss/gainLossSum);
	4109	}
	4110	else{
	4111	//gainLossSum = normalizeQ(_spVVec, _gainDist, _lossDist);
	4112	}
	4113	_tr.multipleAllBranchesByFactor(gainLossSum); //Needed in order to maintain the overall expected number of event
	4114	printTreeLikelihoodAllPosAlphTheSame();
	4115	}
	4116
	4117
	4118	/********************************************************************************************
	4119	Normalize the rates by setting the expected number of substitutions per site (per unit time) to 1:
	4120	setting Sum over i q_ii*freq_i = 1
	4121	*********************************************************************************************/
	4122	void gainLoss::normalizeQandTree(bool isComputeLikelihood, bool isMultipleAllBranchesByNormFactor){
	4123	LOGnOUT(4,<<"Starting normalizeQandTree...(so that sumQii=1 (or weighted ave. of sunOii's for many Qs))"<<endl);
	4124	MDOUBLE norm_factor = 0.0;
	4125	if(!gainLossOptions::_gainLossDist){
	4126	norm_factor = normalizeQ(_sp);
	4127	}
	4128	else{
	4129	norm_factor = normalizeQ(_spVVec, _gainDist, _lossDist);
	4130	}
	4131	LOGnOUT(4,<<"Q were multiplied by "<<1.0/norm_factor<<endl);
	4132	if(isMultipleAllBranchesByNormFactor){
	4133	_tr.multipleAllBranchesByFactor(norm_factor); ////Needed in order to maintain the overall expected number of event, Q was multi in 1/norm_factor
	4134	LOGnOUT(4,<<"Tree branches multi by "<<norm_factor<<endl);
	4135	}
	4136	if(isComputeLikelihood)
	4137	printTreeLikelihoodAllPosAlphTheSame();
	4138	}
	4139
	4140
	4141	/********************************************************************************************
	4142	This manipulation produces an un normalized Q matrices
	4143	*********************************************************************************************/
	4144	void gainLoss::AlphaEqBetaManipulation(){
	4145	LOGnOUT(4,<<"Starting AlphaEqBetaManipulation..."<<endl);
	4146	MDOUBLE lossAlpha = getRateAlpha(_lossDist);
	4147	MDOUBLE lossBeta = getRateBeta(_lossDist);
	4148	MDOUBLE factor2MultiplyBy = lossAlpha/lossBeta;
	4149	bool isNormalizeQ = false;
	4150
	4151	updateLossBeta(getRateBeta(_lossDist)*factor2MultiplyBy,_spVVec,_gainDist,_lossDist,isNormalizeQ);
	4152	updateGainBeta(getRateBeta(_gainDist)*factor2MultiplyBy,_spVVec,_gainDist,_lossDist,isNormalizeQ);
	4153	_tr.multipleAllBranchesByFactor(factor2MultiplyBy);
	4154	if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); // No need?
	4155	LOGnOUT(4,<<"Finish AlphaEqBetaManipulation.");
	4156	printTreeLikelihoodAllPosAlphTheSame();
	4157	}
	4158
	4159	/********************************************************************************************
	4160	Aiming to classify branch specific event as either Recent or Ancient,
	4161	compute the distance from root cut-off
	4162	This is a basic method to compute the cut-off while finding a balance in total branch lengths
	4163	so to minimize "totalBranchLengthAncient - totalBranchLengthRecent"
	4164	This method don't consider "distance to OTU" - i.e., that some nodes will be recent by
	4165	*********************************************************************************************/
	4166	MDOUBLE gainLoss::computeDistanceFromRootForRecent(tree& tr)
	4167	{
	4168	MDOUBLE distanceFromRootForRecentCutOff;
	4169	MDOUBLE MeanDistanceFromRoot;
	4170	//MDOUBLE MeanDistanceFromNearestOTU;
	4171
	4172	MDOUBLE totalBranchLengthRecent = 0;
	4173	MDOUBLE totalBranchLengthAncient = 0;
	4174	MDOUBLE diffTotalBranchLengthRecentAncient = 0;
	4175
	4176	int numberOfNodes = tr.getNodesNum();
	4177	Vdouble DistanceFromRoot(numberOfNodes-1); // -1 because of Root
	4178	//Vdouble DistanceFromNearestOTU(numberOfNodes);
	4179	Vdouble Distance2father(numberOfNodes-1);
	4180
	4181	treeIterDownTopConst tIt(tr);
	4182	int i = 0;
	4183	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	4184	if(mynode->isRoot())
	4185	break;
	4186	//DistanceFromRoot.push_back(getDistance2ROOT(mynode));
	4187	//Distance2father.push_back(mynode->dis2father());
	4188	Distance2father[i] = mynode->dis2father();
	4189	DistanceFromRoot[i] = mynode->getDistance2ROOT();
	4190	//DistanceFromNearestOTU[i] = getMinimalDistance2OTU(mynode);
	4191	//cout<<mynode->name()<<" "<<DistanceFromRoot[i]<<endl; // DEBUG
	4192	++i;
	4193	}
	4194	MeanDistanceFromRoot = computeAverage(DistanceFromRoot, &Distance2father);
	4195
	4196	distanceFromRootForRecentCutOff = MeanDistanceFromRoot; // Starting point as Mean
	4197	for(i = 0; i<numberOfNodes; ++i){
	4198	if(DistanceFromRoot[i] < distanceFromRootForRecentCutOff)
	4199	totalBranchLengthAncient+= Distance2father[i];
	4200	else
	4201	totalBranchLengthRecent+= Distance2father[i];
	4202	}
	4203	diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent;
	4204	bool isRecentBiggerAncient = true;
	4205	if(totalBranchLengthAncient>totalBranchLengthRecent)
	4206	isRecentBiggerAncient = false;
	4207
	4208	bool isImprovedRecentEstimation = true;
	4209	int numberOfIterations = 0;
	4210	while(isImprovedRecentEstimation && (numberOfIterations<10000)){
	4211	MDOUBLE prevDiffTotalBranchLengthRecentAncient = diffTotalBranchLengthRecentAncient; // init
	4212	MDOUBLE prevDistanceFromRootForRecent = distanceFromRootForRecentCutOff;
	4213	MDOUBLE prevtotalBranchLengthAncient = totalBranchLengthAncient;
	4214	MDOUBLE prevtotalBranchLengthRecent = totalBranchLengthRecent;
	4215
	4216	distanceFromRootForRecentCutOff = distanceFromRootForRecentCutOff - diffTotalBranchLengthRecentAncient/(numberOfNodes*100); // cont. correction
	4217
	4218	for(i=0, totalBranchLengthAncient=0, totalBranchLengthRecent=0; i<numberOfNodes; ++i){
	4219	if(DistanceFromRoot[i] < distanceFromRootForRecentCutOff)
	4220	totalBranchLengthAncient+= Distance2father[i];
	4221	else
	4222	totalBranchLengthRecent+= Distance2father[i];
	4223	}
	4224	diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent;
	4225	if(abs(diffTotalBranchLengthRecentAncient) > abs(prevDiffTotalBranchLengthRecentAncient)
	4226	//&& ((totalBranchLengthAncient>totalBranchLengthRecent)*isRecentBiggerAncient) // to make sure that Ancient is not more than Recent, wait for "flip"
	4227	)
	4228	{
	4229	isImprovedRecentEstimation = false;
	4230	distanceFromRootForRecentCutOff = prevDistanceFromRootForRecent; // go back to last estimation.
	4231	totalBranchLengthAncient = prevtotalBranchLengthAncient;
	4232	totalBranchLengthRecent = prevtotalBranchLengthRecent;
	4233	}
	4234	//cout<<diffTotalBranchLengthRecentAncient<<" "<<distanceFromRootForRecentCutOff<<"\n"; // DEBUG
	4235	numberOfIterations++;
	4236	}
	4237	LOGnOUT(4,<<"The computed distanceFromRootForRecentCutOff="<<distanceFromRootForRecentCutOff<<" with TotalBranchLength Ancient="<<totalBranchLengthAncient<<" Recent="<<totalBranchLengthRecent<<" Converged "<<!isImprovedRecentEstimation<<endl);
	4238	return distanceFromRootForRecentCutOff;
	4239	}
	4240
	4241
	4242
	4243	/********************************************************************************************
	4244	Aiming to classify branch specific event as either Recent or Ancient,
	4245	compute the distance from Leaf
	4246	This is a basic method to compute the cut-off while finding a balance in total branch lengths
	4247	so to minimize "totalBranchLengthAncient - totalBranchLengthRecent"
	4248	*********************************************************************************************/
	4249	MDOUBLE gainLoss::computeDistanceNearestOTUforRecent(tree& tr)
	4250	{
	4251	MDOUBLE distance2NearestOTUForRecent;
	4252	MDOUBLE MeanDistanceFromNearestOTU;
	4253
	4254	MDOUBLE totalBranchLengthRecent = 0;
	4255	MDOUBLE totalBranchLengthAncient = 0;
	4256	MDOUBLE diffTotalBranchLengthRecentAncient = 0;
	4257
	4258	int numberOfNodes = tr.getNodesNum();
	4259	Vdouble DistanceFromNearestOTU(numberOfNodes-1);
	4260	Vdouble Distance2father(numberOfNodes-1);
	4261
	4262	treeIterDownTopConst tIt(tr);
	4263	int i = 0;
	4264	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	4265	if(mynode->isRoot())
	4266	break;
	4267	Distance2father[i] = mynode->dis2father();
	4268	DistanceFromNearestOTU[i] = mynode->getMinimalDistance2OTU();
	4269	//cout<<mynode->name()<<" "<<DistanceFromNearestOTU[i]<<"\n"; // DEBUG
	4270	++i;
	4271	}
	4272
	4273	MeanDistanceFromNearestOTU = computeAverage(DistanceFromNearestOTU, &Distance2father);
	4274	distance2NearestOTUForRecent = MeanDistanceFromNearestOTU; // Starting point
	4275	for(i = 0, totalBranchLengthAncient=0, totalBranchLengthRecent=0; i<numberOfNodes-1; ++i){
	4276	if(DistanceFromNearestOTU[i] > distance2NearestOTUForRecent)
	4277	totalBranchLengthAncient+= Distance2father[i];
	4278	else
	4279	totalBranchLengthRecent+= Distance2father[i];
	4280	}
	4281	diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent;
	4282	bool isRecentBiggerAncient = true;
	4283	if(totalBranchLengthAncient>totalBranchLengthRecent)
	4284	isRecentBiggerAncient = false;
	4285
	4286	bool isImprovedRecentEstimation = true;
	4287	int numberOfIterations = 0;
	4288	while(isImprovedRecentEstimation && (numberOfIterations<100000)){
	4289	MDOUBLE prevDiffTotalBranchLengthRecentAncient = diffTotalBranchLengthRecentAncient; // init
	4290	MDOUBLE prevDistance2NearestOTUForRecent = distance2NearestOTUForRecent;
	4291	MDOUBLE prevtotalBranchLengthAncient = totalBranchLengthAncient;
	4292	MDOUBLE prevtotalBranchLengthRecent = totalBranchLengthRecent;
	4293	distance2NearestOTUForRecent = distance2NearestOTUForRecent + diffTotalBranchLengthRecentAncient/(numberOfNodes*100000); // cont. correction
	4294
	4295	for(i = 0, totalBranchLengthAncient=0, totalBranchLengthRecent=0; i<numberOfNodes-1; ++i){
	4296	if(DistanceFromNearestOTU[i] > distance2NearestOTUForRecent)
	4297	totalBranchLengthAncient+= Distance2father[i];
	4298	else
	4299	totalBranchLengthRecent+= Distance2father[i];
	4300	}
	4301	diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent;
	4302
	4303	if(abs(diffTotalBranchLengthRecentAncient) > abs(prevDiffTotalBranchLengthRecentAncient)
	4304	//&& ((totalBranchLengthAncient>totalBranchLengthRecent)*isRecentBiggerAncient) // to make sure that Ancient is not more than Recent, wait for "flip"
	4305	)
	4306	{
	4307	isImprovedRecentEstimation = false;
	4308	distance2NearestOTUForRecent = prevDistance2NearestOTUForRecent; // go back to last estimation.
	4309	diffTotalBranchLengthRecentAncient = prevDiffTotalBranchLengthRecentAncient;
	4310	totalBranchLengthAncient = prevtotalBranchLengthAncient;
	4311	totalBranchLengthRecent = prevtotalBranchLengthRecent;
	4312	}
	4313	//cout<<diffTotalBranchLengthRecentAncient<<" "<<distance2NearestOTUForRecent<<"\n"; // DEBUG
	4314	numberOfIterations++;
	4315	}
	4316	LOGnOUT(4,<<"The computed distance2NearestOTUForRecent="<<distance2NearestOTUForRecent<<" with TotalBranchLength Ancient="<<totalBranchLengthAncient<<" Recent="<<totalBranchLengthRecent<<" Converged "<<!isImprovedRecentEstimation<<endl);
	4317	return distance2NearestOTUForRecent;
	4318	}
	4319
	4320	/********************************************************************************************
	4321	*********************************************************************************************/
	4322	void gainLoss::updateSetLofMissingData(){
	4323	if(!gainLossOptions::_gainLossDist)
	4324	_unObservableData_p->setLforMissingData(_tr,_sp);
	4325	else
	4326	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	4327	}
	4328
	4329
	4330	void gainLoss::multipleAllBranchesByFactorAtStartByMaxParsimonyCost(int costOfTreeMP){
	4331	MDOUBLE branchLengthSum = _tr.getAllBranchesLengthSum();
	4332	MDOUBLE requiredBranchLengthSumByMaxParsimonyCost = (double)costOfTreeMP/_sc.seqLen();
	4333	MDOUBLE factorBL = requiredBranchLengthSumByMaxParsimonyCost / branchLengthSum;
	4334	_tr.multipleAllBranchesByFactor(factorBL);
	4335	MDOUBLE updatedBranchLengthSum = _tr.getAllBranchesLengthSum();
	4336	LOGnOUT(4,<<" multipleAllBranchesByFactorAtStartByMaxParsimonyCost Total branch lengths: "<<updatedBranchLengthSum <<" with respect to costOfTreeMP "<<costOfTreeMP<<endl);
	4337	}
	4338
	4339	/********************************************************************************************
	4340	brent doesn't work if the limits are "non-average-able".
	4341	e.g., if min=0.01 and max=100 ave =~50 and brent will not go towards the lower values
	4342	Solution: the exponent of log10, thus,
	4343	1 = 10^0,
	4344	0.1 = 10-1
	4345	10 = 10^1
	4346	*********************************************************************************************/
	4347	void gainLoss::multipleAllBranchesByFactorAtStart(MDOUBLE epsilonOptimization){
	4348
	4349	//printTreeLikelihoodAllPosAlphTheSame(); // updates _logL
	4350	MDOUBLE branchLengthSum = _tr.getAllBranchesLengthSum();
	4351	MDOUBLE factorBL = 1;
	4352	LOGnOUT(4,<<" Start multipleAllBranchesByFactorAtStart use epsilonOptimization "<<epsilonOptimization<<" Total branch lengths:"<<branchLengthSum <<endl);
	4353	MDOUBLE minBranchProportionExponent = -8;
	4354	MDOUBLE maxBranchProportionExponent = 8;
	4355	MDOUBLE bestBranchProportionExponent = 0;
	4356	MDOUBLE currBranchProportionExponent = 0;
	4357	MDOUBLE currBestL = VERYSMALL;
	4358	MDOUBLE logLimprovement = 0;
	4359	bool isStopAfterNoImprovment = false;
	4360
	4361	//while(maxBranchProportionExponent>=0){ // allow up to 8 orders of magnitude change
	4362	LOGnOUT(4,<<"Allow proportion: "<<pow(10,minBranchProportionExponent) <<" to "<<pow(10,maxBranchProportionExponent)<<endl);
	4363
	4364	if(gainLossOptions::_gainLossDist)
	4365	currBestL = -brent(minBranchProportionExponent,bestBranchProportionExponent,maxBranchProportionExponent,evalBranchProportionExponentSPvv(&_tr, _sc, _spVVec,_gainDist,_lossDist,NULL,_unObservableData_p),epsilonOptimization,&currBranchProportionExponent);
	4366	else
	4367	currBestL = -brent(minBranchProportionExponent,bestBranchProportionExponent,maxBranchProportionExponent,evalBranchProportionExponent(&_tr, _sc, _sp,NULL,_unObservableData_p),epsilonOptimization,&currBranchProportionExponent);
	4368	factorBL = pow(10,currBranchProportionExponent);
	4369	logLimprovement = currBestL-_logL;
	4370	if(logLimprovement > 0){
	4371	_tr.multipleAllBranchesByFactor(factorBL);
	4372	if(_unObservableData_p){
	4373	if(gainLossOptions::_gainLossDist)
	4374	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	4375	else
	4376	_unObservableData_p->setLforMissingData(_tr,_sp);
	4377	}
	4378	printTreeLikelihoodAllPosAlphTheSame(); // updates _logL
	4379	LOGnOUT(4,<<"Tree multiplied by "<<factorBL<< " Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	4380	printTree(_tr);
	4381	if(! currBestL > _logL+epsilonOptimization && isStopAfterNoImprovment){
	4382	LOGnOUT(4,<<"Last iteration with maxBranchProportionExponent "<<maxBranchProportionExponent<<" and Likelihood improvement of "<<logLimprovement<< " Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	4383	//break;
	4384	}
	4385	}
	4386	else{
	4387	LOGnOUT(4,<<" Branch length LengthSum was not changed. factor="<<factorBL <<endl);
	4388	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	4389	return;
	4390	}
	4391	// minBranchProportionExponent += 2;
	4392	// maxBranchProportionExponent -= 2;
	4393	//}
	4394	}
	4395
	4396
	4397
	4398	/********************************************************************************************
	4399	numOfSequenceSets = original seq. length (~4,800)
	4400	numOfRepeats = replications of simulation (~10)
	4401	The simulations if by simulateOnePos (not efficient)
	4402	- for each positions we randoms sample gain and loss rates and create new sp
	4403
	4404	There are few options for data simulation:
	4405	gain,loss rates are sampled from (for each position):
	4406	1. Gamma distributions [with empirical parameters] //_initParamsFromTrueEstimation
	4407	!!! Need to run with previously found user parameters: Tree, _userTheta,_userAlphaGain,_userBetaGain,_userAlphaLoss,_userBetaLoss
	4408	2. MP estimated rates (the empirical _MPPerPos) +minimalRate added //initParamsFromMPEstimation
	4409	3. uniform distributions //_initParamsAtRandPointsInSimPostExp, Default
	4410
	4411	Theta ("1" freq):
	4412	1. taken from the empirical theta for all positions //_initParamsFromTrueEstimation
	4413	2. The observed frequencies for all positions+perturbation //isTheataFromObservedFreq
	4414	2. sampled from uniform distribution
	4415	a. once for all positions //_initParamsAtRandPointsInSimPostExp, Default
	4416	b. for each position //_initRootFreqAtRandPointsInSimPostExpEachPos
	4417	*********************************************************************************************/
	4418	void gainLoss::startSimultePosteriorExpectationOfChange(int numOfSequenceSets, const int numOfRepeats)
	4419	{
	4420	bool isNormalizeQAfterRatesSample = true; // After Norm, multi by gain+loss
	4421	bool isNormalizeQwithEmpricialQ = true; // applicable for MPemt and SMest
	4422	bool isMultBy2_normQ = false; // False, old and wrong correction, no need.
	4423	bool isComputeEmpiricalCorrection = false; // Failed trial. MP and SM, sampling rates
	4424	bool isGammaRatioAdjusted = false; // thus, the user data is override
	4425	bool isThetaSampledForGamma = false;
	4426	bool isThetaFromObservedForEmpiricalSimulations = true; // for MPest and SMest
	4427	bool isMPcostEmpirical = false; //if false, the loss2gainRatioToSim
	4428	bool isRateEQnumOfEvents = true; // gain=#gainEvents
	4429	bool isUsePeudoCountForEmpirical = true; // keep it true, it's actually minRate as before, but with correct sampling
	4430	MDOUBLE minPeudoCountForEmpirical = 0.01;
	4431
	4432	MDOUBLE glRatioTieBreakerInCostMatrix = 0.0; //is positive, losses are favored (gain cost is higher)
	4433	MDOUBLE epsilonForgainLossRatio = 0.01; // was 0.1
	4434	MDOUBLE loss2gainRatioToSim = gainLossOptions::_loss2gainRatioToSim;
	4435	MDOUBLE MaxGLratio = 2.1; //was 37, here, only 1,2 is accounted for in multiple MP costs
	4436
	4437	// Evolutionary model based, with separate variables (possibly "flat" each replication)
	4438	stochasticProcess* spSim=NULL;
	4439	stochasticProcess* spSimpleSim=NULL;
	4440	vector<vector<stochasticProcess*> > spVVecSim;
	4441	unObservableData* unObservableDataSim=NULL;
	4442	distribution* gainDistSim=NULL;
	4443	distribution* lossDistSim=NULL;
	4444	tree trSim;
	4445	VVdouble LpostPerCatSim; // the posterior probability for each position for each category
	4446	VVVdouble LpostPerSpPerCatSim;
	4447
	4448	MDOUBLE minThetaRandSample = 0.1; // was 0.01. change all from 0.01 to 0.05, and later to 0.1
	4449	MDOUBLE maxThetaRandSample = 0.9; // was 0.09
	4450	MDOUBLE minGainRandSample = 0.1; // was 0.01
	4451	MDOUBLE maxGainRandSample = 2.0; // was 2.5, now E(val) = 1
	4452	MDOUBLE minLossRandSample = 0.1; // was 0.01
	4453	MDOUBLE maxLossRandSample = loss2gainRatioToSim*2;
	4454	MDOUBLE meanGaussianGain = 1.0;
	4455	MDOUBLE varianceGaussianGain = 1.0;
	4456	MDOUBLE minAllowedRate = 0.01; // 0.01, An important parameter. used to avoid too low or high rates in Gamma and MP
	4457	MDOUBLE maxAllowedRate = 100;
	4458
	4459	MDOUBLE meanGainFromEMP=1;
	4460	MDOUBLE meanLossFromEMP=1;
	4461	MDOUBLE meanQrateFromEMP=1;
	4462	// these parameter need to be part of gainLossOptions
	4463	bool printTreeForEachReplication = true;
	4464	//bool isUseMeanEventFromEMP = true; // sum of gain and loss, if T: meanGainFromMP=meanLossFromMP=Events (for computation)
	4465	MDOUBLE meanEventsFromEMP=1;
	4466	MDOUBLE expectedQvalEmpirical=1;
	4467
	4468	MDOUBLE meanGaussianLoss = loss2gainRatioToSim;
	4469	MDOUBLE varianceGaussianLoss = loss2gainRatioToSim;
	4470	MDOUBLE Theta = gainLossOptions::_userTheta; //
	4471	MDOUBLE AlphaGain = gainLossOptions::_userAlphaGain; //
	4472	MDOUBLE BetaGain = gainLossOptions::_userBetaGain; //
	4473	MDOUBLE AlphaLoss = gainLossOptions::_userAlphaLoss; //
	4474	MDOUBLE BetaLoss = gainLossOptions::_userBetaLoss; //
	4475	MDOUBLE AlphaRate = gainLossOptions::_userAlphaRate; //
	4476	if(gainLossOptions::_performParametricBootstapCorrelation){
	4477	isNormalizeQAfterRatesSample = false;
	4478	if(gainLossOptions::_gainLossDist){
	4479	Theta =static_cast<gainLossModel>((_spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta(); // gainLossOptions::_userTheta
	4480	AlphaGain = getRateAlpha(_gainDist); // gainLossOptions::_userAlphaGain
	4481	BetaGain = getRateBeta(_gainDist); // gainLossOptions::_userBetaGain
	4482	AlphaLoss = getRateAlpha(_lossDist); // gainLossOptions::_userAlphaLoss
	4483	BetaLoss = getRateBeta(_lossDist); // gainLossOptions::_userBetaLoss
	4484	}else{
	4485	Theta =static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->getTheta();
	4486	AlphaRate = getRateAlpha(_sp->distr());
	4487	}
	4488	minGainRandSample = 0.01;
	4489	maxGainRandSample = VERYBIG;
	4490	minLossRandSample = 0.01;
	4491	maxLossRandSample = VERYBIG;
	4492	minAllowedRate = 0.01;
	4493	maxAllowedRate = VERYBIG;
	4494	}
	4495
	4496	MDOUBLE gainPlusLossExpectancyGamma = (AlphaGain/BetaGain)+(AlphaLoss/BetaLoss);
	4497
	4498	MDOUBLE costMatrixGainLossRatio = gainLossOptions::_costMatrixGainLossRatio; // to be updated according to simulation
	4499	MDOUBLE costMatrixGainLossRatioCorrectionFactor =1;
	4500	MDOUBLE minAllowedMeanEMP = 0.01;
	4501	bool normalizationFactorForLoss1AsInTree = false;
	4502	MDOUBLE randomNoise =0;
	4503
	4504	Vdouble freq(2,0.0);
	4505	MDOUBLE init_gain = 0.5; //gainLossOptions::_userGain taken from original runs of COG data, get it from params file
	4506	MDOUBLE init_loss = 0.5; //gainLossOptions::_userLoss
	4507	MDOUBLE rateSample = 1;
	4508	MDOUBLE lossGainRatioSample = 1;
	4509	bool _isHGT_normal_Pij = gainLossOptions::_isHGT_normal_Pij;
	4510	bool _isHGT_with_Q = gainLossOptions::_isHGT_with_Q;
	4511
	4512	// DEBUG Test for gain events in Eq sequences (change isTestForGainEventsInEqSeq=true)
	4513	bool isTestForGainEventsInEqSeq =false;
	4514
	4515	LOGnOUT(4,<<endl<<"****************************************************\n startSimultePosteriorExpectationOfChange... "<<endl);
	4516	LOGnOUT(4,<<"Replicates="<<numOfRepeats<<" Positions="<<numOfSequenceSets<<endl);
	4517	LOGnOUT(4,<<" simulationType {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise;..."<<endl);
	4518	LOGnOUT(4,<<" EQ_gEql,EQ_gVrl,Gam_gEql,GamgVrl}="<<gainLossOptions::_simulationType<<endl);
	4519
	4520	LOGnOUT(4,<<" loss/gain = ");
	4521	if(!(gainLossOptions::_simulationType == gainLossOptions::Gamma))
	4522	LOGnOUT(4,<<loss2gainRatioToSim<<endl)
	4523	else
	4524	LOGnOUT(4,<<(gainLossOptions::_userAlphaLoss/gainLossOptions::_userBetaLoss) / (gainLossOptions::_userAlphaGain/gainLossOptions::_userBetaGain)<<endl);
	4525	if(!gainLossOptions::_isRootFreqEQstationaryInSimulations){
	4526	if(gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos)
	4527	LOGnOUT(4,<<"Root(1) freq is sampled seperatly for each pos"<<endl)
	4528	else
	4529	LOGnOUT(4,<<"Root(1) freq is sampled once for the entire replication (sim all positions)"<<endl)
	4530	if(isComputeEmpiricalCorrection
	4531	&& (gainLossOptions::_simulationType == gainLossOptions::MPestEmp \|\| gainLossOptions::_simulationType == gainLossOptions::SMestEmp ))
	4532	LOGnOUT(3,<<"!!! WARN !!! the ComputeEmpiricalCorrection for SM and MP assumes RootFreq=Stationary"<<endl);
	4533	}
	4534	else
	4535	LOGnOUT(4,<<"Root(1) freq = gain/(gain+loss)"<<endl);
	4536
	4537	if(isGammaRatioAdjusted){
	4538	LOGnOUT(4,<<" LossBeta is assigned to maintain loss2gainRatioToSim="<<loss2gainRatioToSim<<endl);
	4539	BetaLoss = BetaGainAlphaLoss/(AlphaGainloss2gainRatioToSim);
	4540	}
	4541	LOGnOUT(4,<<" All rate simulations are done with minAllowedRate "<<minAllowedRate<<" and maxAllowedRate "<<maxAllowedRate<<endl);
	4542
	4543	if(gainLossOptions::_isRootFreqEQstationaryInSimulations & !gainLossOptions::_isRootFreqEQstationary)
	4544	LOGnOUT(3,<<"\n\n WARNING!!! _isRootFreqEQstationaryInSimulations " <<gainLossOptions::_isRootFreqEQstationaryInSimulations<<" and model "<<gainLossOptions::_isRootFreqEQstationary<<endl<<endl<<endl);
	4545
	4546	if(gainLossOptions::_simulationType == gainLossOptions::MPestEmp){
	4547	if(isNormalizeQwithEmpricialQ)
	4548	isNormalizeQAfterRatesSample = false; // in this case, no other normalization
	4549	if(_MPPerPos.size()==0){
	4550	LOGnOUT(4,<<" _MPPerPos size="<<_MPPerPos.size()<<endl);
	4551	startMaxParsimonyChange();
	4552	}
	4553	meanGainFromEMP = max(getVMatrixJK(_MPPerPos,0,1)/_MPPerPos.size(), minAllowedMeanEMP); // used for initParamsFromMPEstimation
	4554	meanLossFromEMP = max(getVMatrixJK(_MPPerPos,1,0)/_MPPerPos.size(), minAllowedMeanEMP); // used for initParamsFromMPEstimation
	4555	MDOUBLE sumQrateFromEMP = 0;
	4556	for(int i=0; i<_MPPerPos.size(); ++i){
	4557	MDOUBLE gain = _MPPerPos[i][0][1];
	4558	MDOUBLE loss = _MPPerPos[i][1][0];
	4559	if(gain+loss>0)
	4560	sumQrateFromEMP += 2gainloss/(gain+loss);
	4561	}
	4562	meanQrateFromEMP = sumQrateFromEMP/_MPPerPos.size();
	4563	if(isUsePeudoCountForEmpirical){
	4564	LOGnOUT(4,<<"To avoid zero rates, Use pseudo counts= "<<minPeudoCountForEmpirical<<endl);
	4565	meanGainFromEMP += minPeudoCountForEmpirical;
	4566	meanLossFromEMP += minPeudoCountForEmpirical;
	4567	}
	4568	meanEventsFromEMP = meanGainFromEMP+meanLossFromEMP;
	4569	LOGnOUT(4,<<" The mean number by MP of gain= "<<meanGainFromEMP<<", loss= "<<meanLossFromEMP<<endl);
	4570	if(normalizationFactorForLoss1AsInTree){ // not is use
	4571	meanGainFromEMP = meanGainFromEMP/meanLossFromEMP;
	4572	meanLossFromEMP = meanLossFromEMP/meanLossFromEMP; // results in 1
	4573	LOGnOUT(4,<<" The mean number by MP after normalization of Loss=1 (compatible with tree). gain= "<<meanGainFromEMP<<", loss= "<<meanLossFromEMP<<endl);
	4574	}
	4575	if(isComputeEmpiricalCorrection){
	4576	expectedQvalEmpirical=ComputeEmpiricalExpectedQforStationaryProcess(_MPPerPos,minPeudoCountForEmpirical);
	4577	cout<<"expectedQvalEmpirical="<<expectedQvalEmpirical<<endl;
	4578	}
	4579	}
	4580	if(gainLossOptions::_simulationType == gainLossOptions::SMestEmp){
	4581	if(isNormalizeQwithEmpricialQ)
	4582	isNormalizeQAfterRatesSample = false; // in this case, no other normalization
	4583
	4584	if(_SMPerPos.size()==0){
	4585	LOGnOUT(4,<<" _SMPerPos size="<<_SMPerPos.size()<<endl);
	4586	startComputePosteriorExpectationOfChange();
	4587	}
	4588	meanGainFromEMP = max(getVMatrixJK(_SMPerPos,0,1)/_SMPerPos.size(), minAllowedMeanEMP); // used for initParamsFromMPEstimation
	4589	meanLossFromEMP = max(getVMatrixJK(_SMPerPos,1,0)/_SMPerPos.size(), minAllowedMeanEMP); // used for initParamsFromMPEstimation
	4590	MDOUBLE sumQrateFromEMP = 0;
	4591	for(int i=0; i<_SMPerPos.size(); ++i){
	4592	MDOUBLE gain = _SMPerPos[i][0][1];
	4593	MDOUBLE loss = _SMPerPos[i][1][0];
	4594	if(gain+loss>0)
	4595	sumQrateFromEMP += 2gainloss/(gain+loss);
	4596	}
	4597	meanQrateFromEMP = sumQrateFromEMP/_SMPerPos.size();
	4598	if(isUsePeudoCountForEmpirical){
	4599	LOGnOUT(4,<<"To avoid zero rates, Use pseudo counts= "<<minPeudoCountForEmpirical<<endl);
	4600	meanGainFromEMP += minPeudoCountForEmpirical;
	4601	meanLossFromEMP += minPeudoCountForEmpirical;
	4602	}
	4603	meanEventsFromEMP = meanGainFromEMP+meanLossFromEMP;
	4604	LOGnOUT(4,<<" The mean number by SM of gain= "<<meanGainFromEMP<<", loss= "<<meanLossFromEMP<<endl);
	4605	if(normalizationFactorForLoss1AsInTree){ // not is use
	4606	meanGainFromEMP = meanGainFromEMP/meanLossFromEMP;
	4607	meanLossFromEMP = meanLossFromEMP/meanLossFromEMP; // results in 1
	4608	LOGnOUT(4,<<" The mean number by SM after normalization of Loss=1 (compatible with tree). gain= "<<meanGainFromEMP<<", loss= "<<meanLossFromEMP<<endl);
	4609	}
	4610	if(isComputeEmpiricalCorrection){
	4611	expectedQvalEmpirical=ComputeEmpiricalExpectedQforStationaryProcess(_SMPerPos,minPeudoCountForEmpirical);
	4612	cout<<"expectedQvalEmpirical="<<expectedQvalEmpirical<<endl;
	4613	}
	4614	}
	4615	if(gainLossOptions::_isRootFreqEQstationaryInSimulations){
	4616	LOGnOUT(4,<<" Theta (Root freq of '1's) is equal to the stationary one"<<endl);
	4617	}
	4618	else if(gainLossOptions::_isTheataFromObservedFreq /&& !(gainLossOptions::_simulationType==gainLossOptions::Gamma)/){
	4619	Vdouble observedFreq = evaluateCharacterFreq(_scWithFullLength);
	4620	Theta = observedFreq[1];
	4621	MDOUBLE maxDiffFromObservedFreq = 0.0; // 0.2
	4622	maxThetaRandSample = min(maxThetaRandSample,Theta+maxDiffFromObservedFreq) ;
	4623	minThetaRandSample = max(minThetaRandSample, Theta-maxDiffFromObservedFreq);
	4624	LOGnOUT(4,<<" Theta taken from 'counting' freq= "<<observedFreq[1]<<" with random perturbation of "<<maxDiffFromObservedFreq<<endl);
	4625	}// else it's from _userTheta
	4626
	4627	if(isTestForGainEventsInEqSeq){
	4628	numOfSequenceSets = 1000;
	4629	LOGnOUT(4,<<endl<<"Using TestForGainEventsInEqSeq with numOfSequenceSets= "<<numOfSequenceSets<<endl);
	4630	}
	4631
	4632	//string treeFile = gainLossOptions::_treeFile; // input tree - same for all iterations
	4633	if(gainLossOptions::_isRootFreqEQstationaryInSimulations)
	4634	LOGnOUT(4,<<"\tIn statrionary model - Theta=Root(1) is driven from the gain/gain+loss"<<endl);
	4635	if(gainLossOptions::_isMPratio)
	4636	LOGnOUT(4,<<"\tMPratio simulations: gain is sampled, and loss if multiplied by the MPcost= "<<costMatrixGainLossRatio<<endl);
	4637	switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	4638	{
	4639	case gainLossOptions::Uniform:
	4640	LOGnOUT(4,<<"\n\n(*) UNIFORM simulations: Using RandSample parameters:"<<endl);
	4641	LOGnOUT(4,<<"\tGain Min="<<minGainRandSample<<" Max="<<maxGainRandSample<<" "<<endl);
	4642	if(!gainLossOptions::_isMPratio)
	4643	LOGnOUT(4,<<"\tLoss Min="<<minLossRandSample<<" Max="<<maxLossRandSample<<" "<<endl);
	4644	if(!gainLossOptions::_isRootFreqEQstationaryInSimulations)
	4645	LOGnOUT(4,<<"\tTheta Min="<<minThetaRandSample<<" Max="<<maxThetaRandSample<<endl);
	4646	break;
	4647	case gainLossOptions::Normal:
	4648	LOGnOUT(4,<<"\n\n(*) Gaussian simulations: Using RandSample parameters:"<<endl);
	4649	LOGnOUT(4,<<"\tGain mean="<<meanGaussianGain<<" var="<<varianceGaussianGain<<" "<<endl);
	4650	if(!gainLossOptions::_isMPratio)
	4651	LOGnOUT(4,<<"\tLoss mean="<<meanGaussianLoss<<" var="<<varianceGaussianLoss<<" "<<endl);
	4652	break;
	4653	case gainLossOptions::Gamma:
	4654	LOGnOUT(4,<<"\n\n(*) GAMMA simulations: Sample from Gamma with parameters:\n");
	4655	if(isThetaSampledForGamma){
	4656	LOGnOUT(4,<<"Note: Theta is sampled for Gamma, not taken from userTheta\n"<<endl);
	4657	LOGnOUT(4,<<"Theta Min="<<minThetaRandSample<<" Max="<<maxThetaRandSample); }
	4658	else if (!gainLossOptions::_isRootFreqEQstationaryInSimulations)
	4659	LOGnOUT(4,<<"\n_userTheta="<<gainLossOptions::_userTheta);
	4660	LOGnOUT(4,<<"\n_userAlphaGain=\t"<<AlphaGain
	4661	<<"\n_userBetaGain=\t"<<BetaGain<<endl);
	4662	if(!gainLossOptions::_isMPratio)
	4663	LOGnOUT(4,<<"_userAlphaLoss=\t"<<AlphaLoss
	4664	<<"\n_BetaLoss=\t"<<BetaLoss<<endl);
	4665	if(isGammaRatioAdjusted)
	4666	LOGnOUT(4,<<"Note: Gamma is RatioAdjusted, the BetaLoss is determined by the required gain:loss ratio "<<loss2gainRatioToSim<<endl);
	4667	loss2gainRatioToSim = (AlphaLoss/BetaLoss)/(AlphaGain/BetaGain);
	4668	break;
	4669	case gainLossOptions::MPestEmp:
	4670	LOGnOUT(4,<<"\n\n(*) MP simulations: Using Maximum parsimony empirical estimated gain and loss rates \n sampled from "<<_MPPerPos.size()<<" positions "<<endl);
	4671	loss2gainRatioToSim = meanLossFromEMP/meanGainFromEMP;
	4672	//if(isUseMeanEventFromEMP)
	4673	// LOGnOUT(4,<<" Note: meanGainFromMP = meanLossFromMP = Events (Thus, sampleGain/all and sampleLoss/all) "<<endl);
	4674	if(isThetaFromObservedForEmpiricalSimulations && !gainLossOptions::_isRootFreqEQstationaryInSimulations)
	4675	LOGnOUT(4,<<"Note: for the Empirical simulation - theta is taken from Observed freq"<<endl);
	4676	break;
	4677	case gainLossOptions::SMestEmp:
	4678	LOGnOUT(4,<<"\n\n(*) SM simulations: Using stochstic mapping empirical estimated gain and loss rates \n sampled from "<<_SMPerPos.size()<<" positions "<<endl);
	4679	loss2gainRatioToSim = meanLossFromEMP/meanGainFromEMP;
	4680	//if(isUseMeanEventFromEMP)
	4681	// LOGnOUT(4,<<" Note: meanGainFromEMP = meanLossFromEMP = Events (Thus, sampleGain/all and sampleLoss/all) "<<endl);
	4682	if(isThetaFromObservedForEmpiricalSimulations && !gainLossOptions::_isRootFreqEQstationaryInSimulations)
	4683	LOGnOUT(4,<<"Note: for the Empirical simulation - theta is taken from Observed freq"<<endl);
	4684	break;
	4685	case gainLossOptions::GammaNoise:
	4686	LOGnOUT(4,<<"\n\n(*) GAMMA simulations with noise level "<< gainLossOptions::_noiseLevelInGammaSimulation<<" parameter before noise:");
	4687	LOGnOUT(4,<<"\n_userTheta="<<gainLossOptions::_userTheta);
	4688	LOGnOUT(4,<<"\n_userAlphaGain="<<AlphaGain
	4689	<<"\n_userBetaGain="<<BetaGain<<endl);
	4690	break;
	4691	case gainLossOptions::EQ_gEql:
	4692	LOGnOUT(4,<<" Simulation - EQ rate, gain=loss (via stationary freq)"<<endl);
	4693	loss2gainRatioToSim = 1.0;
	4694	break;
	4695	case gainLossOptions::EQ_gVrl:
	4696	LOGnOUT(4,<<" Simulation - EQ rate, gain/loss ratio unif variable. Mean loss/gain="<<loss2gainRatioToSim<<" epsilon="<< epsilonForgainLossRatio<<endl);
	4697	break;
	4698	case gainLossOptions::Gam_gEql:
	4699	LOGnOUT(4,<<" Simulation - Gamma rate, alpha="<<AlphaRate<<", gain=loss (via stationary freq)"<<endl);
	4700	loss2gainRatioToSim = 1.0;
	4701	break;
	4702	case gainLossOptions::Gam_gVrl:
	4703	LOGnOUT(4,<<" Simulation - Gamma rate, alpha="<<AlphaRate<<", gain/loss ratio unif variable. Mean loss/gain="<<loss2gainRatioToSim<<" epsilon="<< epsilonForgainLossRatio<<endl);
	4704	break;
	4705	default:
	4706	errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp GammaNoise, MPratio}");
	4707	}
	4708	if(isNormalizeQwithEmpricialQ
	4709	&& (gainLossOptions::_simulationType == gainLossOptions::MPestEmp \|\| gainLossOptions::_simulationType == gainLossOptions::SMestEmp) )
	4710	LOGnOUT(4,<<" Q matrix is normalized given empirical expected Q values."<<endl);
	4711	if(isNormalizeQAfterRatesSample)
	4712	LOGnOUT(4,<<" Q matrix is normalized after sampling."<<endl);
	4713
	4714	if(isMultBy2_normQ
	4715	&& !(gainLossOptions::_simulationType == gainLossOptions::Gamma)
	4716	&& !(gainLossOptions::_simulationType == gainLossOptions::SMestEmp)
	4717	&& !(gainLossOptions::_simulationType == gainLossOptions::MPestEmp) ) // with mult=2, Q matrix is normalized with respect to the tree (after multiplied by freq=0.5)
	4718	LOGnOUT(4,<<" Gain and loss rates multiplied by 2. Mainitaining normalized Q matrix."<<endl);
	4719
	4720
	4721	////////////////////////////////////////////////////////////////////////// Replicates
	4722	for(int replicat=1; replicat<=numOfRepeats; ++replicat){
	4723	LOGnOUT(4,<<endl<<".......................................Replicate= "<<replicat<<endl);
	4724	time_t t1,t2;
	4725	time(&t1);
	4726
	4727	createDir(gainLossOptions::_outDir, "SimulatedPostExp"+ int2string(replicat));
	4728	string outDirSeq = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seqAll" ;
	4729	createDir(gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat), "seqAll" );
	4730	string simulatedEventsSimString = outDirSeq + "//" + "simulatedEvents.txt";
	4731	ofstream* simulatedEventsFile = new ofstream(simulatedEventsSimString.c_str());
	4732	string posSim = outDirSeq + "//" + "nodesContentSim" + ".txt";
	4733	ofstream* posSim_out = new ofstream(posSim.c_str());
	4734
	4735
	4736
	4737	string perPosStat = outDirSeq + "//" + "statPos.txt";
	4738	ofstream perPosStatStream(perPosStat.c_str());
	4739	perPosStatStream<<"pos"<<"\t"<<"rate"<<"\t"<<"theta"<<"\t"<<"occur"<<"\n";
	4740
	4741	string perBranchStat = outDirSeq + "//" + "statBranch.txt";
	4742	ofstream perBranchStatStream(perBranchStat.c_str());
	4743	perBranchStatStream<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<endl;
	4744	treeIterTopDownConst tit(_tr);
	4745	for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) {
	4746	if(myN->isRoot())
	4747	continue;
	4748	perBranchStatStream<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()<<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()<<endl;
	4749	}
	4750	perBranchStatStream.close();
	4751	MDOUBLE init_gainsForCostMatrix = 0.0; // sum all position
	4752	MDOUBLE init_lossesForCostMatrix = 0.0; // sum all position
	4753	MDOUBLE init_losses2gainRatioForCostMatrixSum = 0.0;
	4754	//MDOUBLE QnormTest = 0.0;
	4755
	4756	// produce random noise
	4757	if(gainLossOptions::_simulationType == gainLossOptions::GammaNoise){
	4758	randomNoise = talRandom::giveRandomNumberBetweenTwoPoints(-gainLossOptions::_noiseLevelInGammaSimulation, gainLossOptions::_noiseLevelInGammaSimulation);
	4759	// if noiseLevel=200% than param may be up to x3 or down to x0.33 its value
	4760	if(randomNoise>=0)
	4761	randomNoise = 1+randomNoise;
	4762	else
	4763	randomNoise = 1/(1-randomNoise);
	4764	LOGnOUT(4,<<"Noise over all parameters="<< randomNoise<<endl);
	4765	}
	4766	// Theta for all positions, (not relevant to stationary models)
	4767	if(!gainLossOptions::_isRootFreqEQstationaryInSimulations){ // else Theta is driven from gain/gain+loss
	4768	switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	4769	{
	4770	case gainLossOptions::Uniform:
	4771	case gainLossOptions::Normal:
	4772	freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	4773	break;
	4774	case gainLossOptions::MPestEmp:
	4775	case gainLossOptions::SMestEmp:
	4776	if(isThetaFromObservedForEmpiricalSimulations)
	4777	freq[1]=Theta;
	4778	else
	4779	freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	4780	break;
	4781	case gainLossOptions::Gamma:
	4782	if(isThetaSampledForGamma)
	4783	freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	4784	else
	4785	freq[1]= gainLossOptions::_userTheta;
	4786	break;
	4787	case gainLossOptions::GammaNoise:
	4788	freq[1] = gainLossOptions::_userTheta*randomNoise;
	4789	freq[1] = max(freq[1],minThetaRandSample); // added to avoid too small or too big theta
	4790	freq[1] = min(freq[1],maxThetaRandSample);
	4791	break;
	4792	case gainLossOptions::EQ_gEql:
	4793	case gainLossOptions::EQ_gVrl:
	4794	case gainLossOptions::Gam_gEql:
	4795	case gainLossOptions::Gam_gVrl:
	4796	freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	4797	break;
	4798	default:
	4799	errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}");
	4800	}
	4801	if(!gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos)
	4802	LOGnOUT(4,<<" For all positions, Root(1)= "<<freq[1]<<endl);
	4803	}
	4804	else{
	4805	LOGnOUT(4,<<" Stationary model - Theta=Root(1) is driven from the gain/gain+loss"<<endl);
	4806	}
	4807
	4808	vector<bool> isGainEventInAnode; // DEBUG
	4809	if(isTestForGainEventsInEqSeq)
	4810	isGainEventInAnode.resize(numOfSequenceSets+1);
	4811
	4812
	4813	////////////////////////////////////////////////////////////////////////// Positions
	4814	MDOUBLE ratePerPosSum = 0;
	4815	int randomPosition; //used in MPestEmp or SMestEmp
	4816	sequenceContainer seqSimulated;
	4817	gainLossAlphabet alph;
	4818
	4819
	4820	for(int i=0; i<numOfSequenceSets; ++i)
	4821	{
	4822	rateSample = 1;
	4823	lossGainRatioSample = 1;
	4824	switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	4825	{
	4826	case gainLossOptions::Uniform:
	4827	init_gain = talRandom::giveRandomNumberBetweenTwoPoints(minGainRandSample, maxGainRandSample);
	4828	if(gainLossOptions::_isMPratio)
	4829	init_loss = init_gain * costMatrixGainLossRatio;
	4830	else
	4831	init_loss = talRandom::giveRandomNumberBetweenTwoPoints(minLossRandSample, maxLossRandSample);
	4832	break;
	4833	case gainLossOptions::Normal:
	4834	init_gain = talRandom::rand_gaussian(meanGaussianGain, varianceGaussianGain);
	4835	if(gainLossOptions::_isMPratio)
	4836	init_loss = init_gain * costMatrixGainLossRatio;
	4837	else
	4838	init_loss = talRandom::rand_gaussian(meanGaussianLoss, varianceGaussianLoss);
	4839	break;
	4840	case gainLossOptions::Gamma:
	4841	init_gain = talRandom::SampleGamma(AlphaGain,BetaGain);
	4842	if(gainLossOptions::_isMPratio)
	4843	init_loss = init_gain * costMatrixGainLossRatio;
	4844	else
	4845	init_loss = talRandom::SampleGamma(AlphaLoss,BetaLoss);
	4846	break;
	4847	case gainLossOptions::MPestEmp:
	4848	randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _MPPerPos.size());
	4849	if(isRateEQnumOfEvents)
	4850	init_gain = _MPPerPos[randomPosition][0][1];
	4851	else{
	4852	if(isUsePeudoCountForEmpirical)
	4853	init_gain = (_MPPerPos[randomPosition][0][1]+minPeudoCountForEmpirical)/(meanGainFromEMP2) sqrt(meanGainFromEMP/meanLossFromEMP) ; // was /meanEventsFromEMP
	4854	else
	4855	init_gain = _MPPerPos[randomPosition][0][1]/meanEventsFromEMP; // was /meanEventsFromEMP
	4856	}
	4857	if(gainLossOptions::_isMPratio)
	4858	init_loss = init_gain * costMatrixGainLossRatio;
	4859	else{
	4860	if(isRateEQnumOfEvents)
	4861	init_loss = _MPPerPos[randomPosition][1][0];
	4862	else{
	4863	if(isUsePeudoCountForEmpirical)
	4864	init_loss = (_MPPerPos[randomPosition][1][0]+minPeudoCountForEmpirical)/(meanLossFromEMP2)sqrt(meanLossFromEMP/meanGainFromEMP) ; // was /meanEventsFromEMP
	4865	else
	4866	init_loss = _MPPerPos[randomPosition][1][0]/meanEventsFromEMP ; // was /meanEventsFromEMP
	4867	}
	4868	}
	4869	break;
	4870	case gainLossOptions::SMestEmp:
	4871	randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _SMPerPos.size());
	4872	if(isRateEQnumOfEvents)
	4873	init_gain = _SMPerPos[randomPosition][0][1];
	4874	else{
	4875	if(isUsePeudoCountForEmpirical)
	4876	init_gain = (_SMPerPos[randomPosition][0][1]+minPeudoCountForEmpirical)/(meanGainFromEMP2) sqrt(meanGainFromEMP/meanLossFromEMP) ; // was /meanEventsFromEMP
	4877	else
	4878	init_gain = _SMPerPos[randomPosition][0][1]/meanEventsFromEMP; // was /meanEventsFromEMP
	4879	}
	4880	if(gainLossOptions::_isMPratio)
	4881	init_loss = init_gain * costMatrixGainLossRatio;
	4882	else{
	4883	if(isRateEQnumOfEvents)
	4884	init_loss = _SMPerPos[randomPosition][1][0];
	4885	else{
	4886	if(isUsePeudoCountForEmpirical)
	4887	init_loss = (_SMPerPos[randomPosition][1][0]+minPeudoCountForEmpirical)/(meanLossFromEMP2)sqrt(meanLossFromEMP/meanGainFromEMP) ; // was /meanEventsFromEMP
	4888	else
	4889	init_loss = _SMPerPos[randomPosition][1][0]/meanEventsFromEMP ; // was /meanEventsFromEMP
	4890	}
	4891	}
	4892	break;
	4893	case gainLossOptions::GammaNoise:
	4894	init_gain = talRandom::SampleGamma( (AlphaGain*randomNoise)
	4895	,(BetaGain*randomNoise));
	4896	if(gainLossOptions::_isMPratio)
	4897	init_loss = init_gain * costMatrixGainLossRatio;
	4898	else{
	4899	init_loss = talRandom::SampleGamma((AlphaLoss*randomNoise)
	4900	,(BetaLoss*randomNoise));
	4901	}
	4902	break;
	4903	case gainLossOptions::EQ_gEql:
	4904	init_gain = -(rateSample/(-1-lossGainRatioSample)); //init_gain = init_loss =0.5;
	4905	init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	4906	break;
	4907	case gainLossOptions::Gam_gEql:
	4908	rateSample = talRandom::SampleGamma(AlphaRate); //init_gain = init_loss = 0.5*rateSample;
	4909	init_gain = -(rateSample/(-1-lossGainRatioSample));
	4910	init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	4911	break;
	4912	case gainLossOptions::EQ_gVrl:
	4913	lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	4914	init_gain = -(rateSample/(-1-lossGainRatioSample));
	4915	init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	4916	break;
	4917	case gainLossOptions::Gam_gVrl:
	4918	rateSample = talRandom::SampleGamma(AlphaRate);
	4919	lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	4920	init_gain = -(rateSample/(-1-lossGainRatioSample));
	4921	init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	4922	//init_gain = -(1/(-1-lossGainRatioSample))*rateSample;
	4923	//init_loss = 1+(1/(-1-lossGainRatioSample))*rateSample;
	4924	break;
	4925	default:
	4926	errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp GammaNoise}");
	4927	}
	4928	init_gain = min(maxAllowedRate, max(init_gain,minAllowedRate)); // added to avoid too small gain rate
	4929	init_loss = min(maxAllowedRate,max(init_loss,minAllowedRate));
	4930	if(isMultBy2_normQ
	4931	&& !(gainLossOptions::_simulationType == gainLossOptions::Gamma)
	4932	&& !(gainLossOptions::_simulationType == gainLossOptions::SMestEmp)
	4933	&& !(gainLossOptions::_simulationType == gainLossOptions::MPestEmp)){ // with mult=2, Q matrix is normalized with respect to the tree (after multiplied by freq=0.5)
	4934	init_gain *=2;
	4935	init_loss *=2;
	4936	}
	4937	///////////// Theta random per pos
	4938	if(gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos && !gainLossOptions::_isRootFreqEQstationaryInSimulations){
	4939	freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	4940	}
	4941	if(gainLossOptions::_isRootFreqEQstationaryInSimulations){ //Theta=Root(1) is driven from the gain/gain+loss
	4942	freq[1]= init_gain/(init_gain+init_loss);
	4943	}
	4944	freq[0]= 1 - freq[1];
	4945	gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q);
	4946	trivialAccelerator pijAcc(&glm);
	4947	uniDistribution uniDistr;
	4948	stochasticProcess *spSimSingle = NULL;
	4949	spSimSingle = new stochasticProcess(&uniDistr,&pijAcc,false);
	4950	MDOUBLE sumQii = 1.0;
	4951
	4952	if(isNormalizeQAfterRatesSample ){ // if normalizeQ for each position, there is no rate variability in practice
	4953	sumQii = normalizeQ(spSimSingle); // (1) Normalize
	4954	MDOUBLE scalingParameterExpectancyOfOne = init_gain+init_loss; //init_gain+init_loss
	4955	if(gainLossOptions::_simulationType == gainLossOptions::Gamma)
	4956	scalingParameterExpectancyOfOne /=gainPlusLossExpectancyGamma;
	4957	static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->norm((scalingParameterExpectancyOfOne)); // (2) multiply by g+l
	4958	}
	4959	if(isNormalizeQwithEmpricialQ
	4960	&& (gainLossOptions::_simulationType == gainLossOptions::MPestEmp \|\| gainLossOptions::_simulationType == gainLossOptions::SMestEmp) )
	4961	static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->norm((1/meanQrateFromEMP));
	4962	if(isComputeEmpiricalCorrection
	4963	&& (gainLossOptions::_simulationType == gainLossOptions::MPestEmp \|\| gainLossOptions::_simulationType == gainLossOptions::SMestEmp) ){
	4964	static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->norm(1/expectedQvalEmpirical);
	4965	}
	4966	//////////////////////////////////////////////////////////////////////////
	4967	//MDOUBLE gGLM = static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->getMu1();
	4968	//MDOUBLE lGLM = static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->getMu2();
	4969	//MDOUBLE freq1 = static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel())->getTheta();
	4970	//MDOUBLE sumPijQijGLM=(static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	4971	//MDOUBLE rateGLM = gGLM(1-freq1)+lGLM(freq1);
	4972
	4973	//MDOUBLE gFormula = (1+lossGainRatioSample)/(2*lossGainRatioSample);
	4974	//MDOUBLE lFormula = gFormula*lossGainRatioSample;
	4975	//MDOUBLE rateFormula = (2gFormulalFormula)/(gFormula+lFormula);
	4976	//cout<<gGLM<<"\t"<<gFormula<<"\t"<<gGLM-gFormula<<endl;
	4977	//cout<<rateGLM<<"\t"<<rateFormula<<"\t"<<rateGLM-rateFormula<<endl;
	4978	//////////////////////////////////////////////////////////////////////////
	4979	//QnormTest += init_gainfreq[0]+init_lossfreq[1];
	4980	init_losses2gainRatioForCostMatrixSum += init_loss/init_gain;
	4981	init_gainsForCostMatrix += init_gain;
	4982	init_lossesForCostMatrix += init_loss;
	4983
	4984	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	4985	//string resFile = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "resSim" + int2string(i+1) + ".sim";
	4986
	4987	simulateOnePos *simulateOnePosObj = NULL;
	4988	MDOUBLE ratePerPos=0;
	4989	if(gainLossOptions::_is3states){
	4990	Vdouble init_cpN_vals(4);
	4991	init_cpN_vals[0]=gainLossOptions::_3statesGain; //gain (0->1)
	4992	init_cpN_vals[1]=gainLossOptions::_3statesMore; //more (1->more)
	4993	init_cpN_vals[2]=gainLossOptions::_3statesLess; // less (more->1)
	4994	init_cpN_vals[3]=gainLossOptions::_3statesLoss; // loss (1->0)
	4995	Vdouble freq_cpN(3);
	4996	freq_cpN[0]=gainLossOptions::_3states0;
	4997	freq_cpN[1]=gainLossOptions::_3states1;
	4998	freq_cpN[2]=1 - (freq_cpN[0] + freq_cpN[1]);
	4999	simulateOnePosObj = new simulateOnePos(strSeqNum, posSim_out, simulatedEventsFile, i,gainLossOptions::_treeFile,init_cpN_vals[0]+init_cpN_vals[3],freq[1],gainLossOptions::_is3states,NULL,&_tr,&init_cpN_vals,&freq_cpN);
	5000	}
	5001	else{
	5002	ratePerPos=(static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	5003	simulateOnePosObj = new simulateOnePos(strSeqNum, posSim_out, simulatedEventsFile, i,gainLossOptions::_treeFile,ratePerPos,freq[1],gainLossOptions::_is3states,spSimSingle,&_tr);
	5004	}
	5005	ratePerPosSum+=ratePerPos;
	5006	perPosStatStream<<i+1<<"\t"<<ratePerPos<<"\t"<<freq[1]<<"\t"<<simulateOnePosObj->getOccurFraction()<<"\n";
	5007
	5008	if(spSimSingle) delete spSimSingle;
	5009	if(simulateOnePosObj) delete simulateOnePosObj;
	5010	if(isTestForGainEventsInEqSeq){ // DEBUG
	5011	if(simulateOnePosObj->getChangesForBranch(2)[0][1]>0) // "A" == 2
	5012	isGainEventInAnode[i+1] = true;
	5013	}
	5014	//if(i==0){
	5015	// seqSimulated = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph);
	5016	//}
	5017	//else{
	5018	// sequenceContainer tempSeq = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph);
	5019	// seqSimulated.concatenate(tempSeq);
	5020	// fastaFormat::write(cout,seqSimulated);
	5021	//}
	5022
	5023	}
	5024	if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss
	5025	costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix;
	5026
	5027	//LOGnOUT(5,<<"QnormTest=\t"<<QnormTest/numOfSequenceSets<<"\n");
	5028	LOGnOUT(5,<<"AveLoss/AveGain=\t"<<costMatrixGainLossRatio<<"\n");
	5029	LOGnOUT(5,<<"Ave (loss/gain)=\t"<<init_losses2gainRatioForCostMatrixSum/numOfSequenceSets<<"\n");
	5030	LOGnOUT(4,<<"All positions Q Ave="<<ratePerPosSum/(double)numOfSequenceSets<<"\n");
	5031	time(&t2);
	5032	LOGnOUT(4,<<"End simulations.\nTIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	5033	////////////////////////////////////////////////////////////////////////// end of per-position simulations
	5034
	5035	//fastaFormat::write(cout,seqSimulated);
	5036	//vector<int> posToRemove(seqSimulated.seqLen(),false);
	5037	//posToRemove[0] = true;
	5038	//seqSimulated.removePositions(posToRemove);
	5039	//fastaFormat::write(cout,seqSimulated);
	5040
	5041	//re-open seq
	5042	string strSeqFirst = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(1) + ".fa";
	5043	ifstream in(strSeqFirst.c_str());
	5044	sequenceContainer seqReOpened = recognizeFormat::read(in,&alph);
	5045	in.close();
	5046	remove( strSeqFirst.c_str() ); // remove seq
	5047
	5048	// Test for gain events in Eq sequences
	5049	int totalNumberOfEqSeqs = 0;
	5050	int totalNumberOfGainsInEqSeqs = 0;
	5051
	5052	for(int i=1; i<numOfSequenceSets; i++){
	5053	string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	5054	ifstream in(strSeqNum.c_str());
	5055	sequenceContainer seqSeqNum = recognizeFormat::read(in,&alph);
	5056	in.close();
	5057	seqReOpened.concatenate(seqSeqNum);
	5058	if(isTestForGainEventsInEqSeq){
	5059	if(_sc==seqSeqNum){
	5060	++totalNumberOfEqSeqs;
	5061	if(isGainEventInAnode[i+1]){ // to be consistent with previous calculations
	5062	++totalNumberOfGainsInEqSeqs;
	5063	//LOGnOUT(4,<<i+1<<" gain event\n");
	5064	}
	5065	//LOGnOUT(4,<<i+1<<" same seq\n");
	5066	}
	5067	else{
	5068	//LOGnOUT(4,<<i<<" Diff seq\n");
	5069	}
	5070	}
	5071	remove( strSeqNum.c_str() ); // remove seq
	5072	}
	5073
	5074	if(isTestForGainEventsInEqSeq){
	5075	LOGnOUT(3,<<totalNumberOfEqSeqs<<" total same seqs\n");
	5076	LOGnOUT(3,<<totalNumberOfGainsInEqSeqs<<" with gain event\n");
	5077	LOGnOUT(3,<<(float)totalNumberOfGainsInEqSeqs/totalNumberOfEqSeqs<<" posteriorProb empirical\n");
	5078	}
	5079	LOGnOUT(5,<<"seqReOpened length "<<seqReOpened.seqLen()<<endl);
	5080	string treeSimString = outDirSeq + "//" + "TreeSim.ph";
	5081	string seqSim = outDirSeq + "//" + "seq" + ".fa";
	5082	ofstream seq_out(seqSim.c_str());
	5083	fastaFormat:: write(seq_out,seqReOpened);
	5084
	5085	if(gainLossOptions::_isOnlySimulateSeq)
	5086	continue;
	5087
	5088	// Parsimony
	5089	if(gainLossOptions::_calculeMaxParsimonyChangeSeveralGainLossRatios){
	5090	MDOUBLE GLratioMulti = 1;
	5091	for(MDOUBLE glRatio = 1+glRatioTieBreakerInCostMatrix; glRatio <=MaxGLratio; glRatio+=GLratioMulti){
	5092	startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5093	,glRatio,_distanceFromNearestOTUForRecent,false);
	5094	if(glRatio>2)
	5095	GLratioMulti*=2;
	5096	}
	5097	if(!gainLossOptions::_isMPratio && isMPcostEmpirical)
	5098	startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5099	,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent,false);
	5100	startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5101	,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent,false);
	5102	}
	5103	else{
	5104	if(isMPcostEmpirical)
	5105	startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5106	,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent,false);
	5107	startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5108	,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent,false);
	5109	}
	5110
	5111	// Estimation of model paramers + Stochastic mapping
	5112	tree trSim = _tr;
	5113	if(gainLossOptions::_gainLossDist){
	5114	cloneSpVVec(_spVVec,spVVecSim);
	5115	gainDistSim = _gainDist->clone();
	5116	lossDistSim = _lossDist->clone();
	5117	}
	5118	else{
	5119	spSim = _sp->clone();
	5120	}
	5121	if(_unObservableData_p){
	5122	unObservableDataSim = _unObservableData_p->clone();
	5123	}
	5124	if(gainLossOptions::_isFlatTreeBeforOpt){
	5125	FlatTree(trSim);
	5126	}
	5127
	5128	if(!gainLossOptions::_gainLossDist){// a single Stochastic processes (M)
	5129	if(Parameters::getInt("_isFlatSpBeforeOpt")){
	5130	FlatSpBeforeOpt(*spSim,unObservableDataSim);
	5131	}
	5132	if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){
	5133	Vdouble freqSim = evaluateCharacterFreq(seqReOpened);
	5134	LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "<<freqSim[1]<<endl);
	5135	MDOUBLE init_gain = freqSim[1];
	5136	MDOUBLE init_loss = freqSim[0];
	5137	static_cast<gainLossModel*>(spSim->getPijAccelerator()->getReplacementModel())->setMu1(init_gain, gainLossOptions::_isReversible);
	5138	static_cast<gainLossModelNonReversible*>(spSim->getPijAccelerator()->getReplacementModel())->setMu2(init_loss);
	5139	if(isThetaOptimization())
	5140	static_cast<gainLossModel*>(spSim->getPijAccelerator()->getReplacementModel())->setTheta(freqSim[1]);
	5141	printModellValuesOfParams(spSim,trSim);
	5142	_logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,*spSim,_weightsUniqPatterns,unObservableDataSim);
	5143
	5144	spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0'
	5145	if(gainLossOptions::_isFlatTreeBeforOpt \|\| gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){
	5146	bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5147	}
	5148	}
	5149	if(gainLossOptions::_modelOptimizationSimPostExp){
	5150	gainLossOptimizer glOpt(trSim,spSim,seqReOpened,
	5151	gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5152	(int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5153	gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5154	(int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5155	gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5156	(int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5157	NULL,unObservableDataSim, gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove);
	5158	if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){
	5159	trSim = glOpt.getOptTree();
	5160	printTree(trSim, treeSimString);
	5161	}
	5162	}
	5163	}
	5164
	5165	else{// Mixture of Stochastic processes (GLM)
	5166	if(Parameters::getInt("_isFlatSpBeforeOpt")){
	5167	FlatSpBeforeOpt(spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5168	}
	5169	if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){
	5170	Vdouble freqSim = evaluateCharacterFreq(seqReOpened);
	5171	LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "<<freqSim[1]<<endl);
	5172	MDOUBLE init_gain = freqSim[1];
	5173	MDOUBLE init_loss = freqSim[0];
	5174	MDOUBLE AlphasGainLossRatio = getRateAlpha(gainDistSim)/getRateAlpha(lossDistSim);
	5175
	5176	updateGainBeta((1/init_gain)*(1/AlphasGainLossRatio), spVVecSim,gainDistSim,lossDistSim,false);
	5177	updateLossBeta((1/init_loss)*AlphasGainLossRatio, spVVecSim,gainDistSim,lossDistSim,false);
	5178	if(isThetaOptimization())
	5179	updateTheta(freqSim[1], spVVecSim,gainDistSim, lossDistSim);
	5180	normalizeQ(spVVecSim,gainDistSim,lossDistSim);
	5181	printModellValuesOfParams(trSim,spVVecSim,gainDistSim,lossDistSim);
	5182	_logL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,spVVecSim,gainDistSim,lossDistSim,_weightsUniqPatterns,unObservableDataSim);
	5183
	5184	spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0'
	5185	if(gainLossOptions::_isFlatTreeBeforOpt \|\| gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){
	5186	bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5187	}
	5188	}
	5189	if(gainLossOptions::_modelOptimizationSimPostExp){
	5190	gainLossOptimizer glOpt(trSim,spVVecSim,gainDistSim,lossDistSim,seqReOpened,
	5191	gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5192	(int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5193	gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5194	(int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5195	gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5196	(int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5197	NULL, _unObservableData_p ,gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove);
	5198	if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){
	5199	trSim = glOpt.getOptTree();
	5200	printTree(trSim, treeSimString);
	5201	}
	5202	}
	5203	}
	5204	//////////////////////////////////////// compute Stochastic Mapping
	5205	MDOUBLE distanceFromNearestOTUForRecent = computeDistanceNearestOTUforRecent(trSim);
	5206	if(!gainLossOptions::_gainLossDist){
	5207	startComputePosteriorExpectationOfChange(seqReOpened,trSim,spSim,LpostPerCatSim,unObservableDataSim,outDirSeq,distanceFromNearestOTUForRecent,false);
	5208	LpostPerCatSim.clear(); // when cleared - each replicate will recompute the _LpostPerCat
	5209	}
	5210	else{
	5211	startComputePosteriorExpectationOfChange(seqReOpened,trSim,spVVecSim,gainDistSim,lossDistSim,LpostPerSpPerCatSim,unObservableDataSim,outDirSeq,distanceFromNearestOTUForRecent,false);
	5212	LpostPerSpPerCatSim.clear(); // when cleared - each replicate will recompute the _LpostPerSpPerCat
	5213	}
	5214	time(&t2);
	5215	LOGnOUT(4,<<"Replicate SimultePosteriorExpectationOfChange RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	5216	}
	5217	}
	5218
	5219	/********************************************************************************************
	5220	*********************************************************************************************/
	5221	MDOUBLE gainLoss::ComputeEmpiricalExpectedQforStationaryProcess(VVVdouble& EmpPerPos, MDOUBLE minRate){
	5222	MDOUBLE expectedQvalEmpirical = 0;
	5223	for(int pos=0; pos<EmpPerPos.size();++pos){
	5224	MDOUBLE rateForPos=0;
	5225	MDOUBLE gain =EmpPerPos[pos][0][1];
	5226	MDOUBLE loss =EmpPerPos[pos][1][0];
	5227	gain += minRate;
	5228	loss += minRate;
	5229	MDOUBLE Freq_0 = loss/(gain+loss);
	5230	MDOUBLE Freq_1 = gain/(gain+loss);
	5231	rateForPos = gainFreq_0 + lossFreq_1;
	5232	expectedQvalEmpirical += rateForPos;
	5233	}
	5234	expectedQvalEmpirical /= EmpPerPos.size();
	5235	return expectedQvalEmpirical;
	5236	}
	5237
	5238
	5239
	5240	/********************************************************************************************
	5241	*********************************************************************************************/
	5242	void gainLoss::RemoveSeqWithUnknownForSelectedSiteForCorrelation(sequenceContainer& sc, tree& tr){
	5243
	5244	gainLossAlphabet alph;
	5245	int pos_2_remove = _sc.seqLen()-1;
	5246	char char_2_match = alph.unknown();
	5247	vector<int> seqIDs2remove;
	5248	vector<string> SeqNamesThatMatchPos = _sc.getSeqNamesThatMatchPos(pos_2_remove,char_2_match);
	5249
	5250	sequenceContainer::constTaxaIterator myseq=sc.constTaxaBegin();
	5251	for (;myseq != sc.constTaxaEnd(); ++myseq){
	5252	bool bFound = false;
	5253	for (int i=0; i<SeqNamesThatMatchPos.size(); ++i) {
	5254
	5255	if (myseq->name() == SeqNamesThatMatchPos[i])
	5256	{
	5257	bFound = true;
	5258	break;
	5259	}
	5260	}
	5261	if (bFound == true)
	5262	{
	5263	string errMsg = "The taxID name:\t";
	5264	errMsg += myseq->name();
	5265	errMsg += "\twas found in with missing data. Removed.";
	5266	LOGnOUT(4,<<errMsg<<endl);
	5267	seqIDs2remove.push_back(myseq->id());
	5268	}
	5269	}
	5270	for(int i=0; i<seqIDs2remove.size(); ++i){
	5271	sc.remove(seqIDs2remove[i]);
	5272	}
	5273	intersectNamesInTreeAndSequenceContainer(tr,sc);
	5274	// Write seq and tree (required for re-labeling IDs
	5275	string strSeqNum = gainLossOptions::_outDir + "//" + "seq.noUnknown.fa";
	5276	ofstream seq_out(strSeqNum.c_str());
	5277	fastaFormat:: write(seq_out,sc);
	5278	string treeSampled = gainLossOptions::_outDir + "//" + "TheTree.noUnknonwn.ph";
	5279	ofstream treeStream(treeSampled.c_str());
	5280	tr.output(treeStream);
	5281
	5282	// re-Read
	5283	ifstream in(strSeqNum.c_str());
	5284	sc = recognizeFormat::read(in,&alph);
	5285	tr= tree(treeSampled);
	5286
	5287	}
	5288
	5289
	5290
	5291
	5292
	5293	/********************************************************************************************
	5294	*********************************************************************************************/
	5295	//void gainLoss::simultePhyleticData(const int numOfSequenceSets, string strSeqFirst,MDOUBLE loss2gainRatioToSim, gainLossOptions::simulationType simulationType
	5296	// , MDOUBLE AlphaGain, MDOUBLE BetaGain, MDOUBLE AlphaLoss, MDOUBLE BetaLoss, MDOUBLE AlphaRate)
	5297	//{
	5298	// MDOUBLE minThetaRandSample = 0.1; // was 0.01. change all from 0.01 to 0.05, and later to 0.1
	5299	// MDOUBLE maxThetaRandSample = 0.9; // was 0.09
	5300	// MDOUBLE observedTheta = 0.5;
	5301	// MDOUBLE minGainRandSample = 0.1; // was 0.01
	5302	// MDOUBLE maxGainRandSample = 2.0; // was 2.5, now E(val) = 1
	5303	// MDOUBLE minLossRandSample = 0.1; // was 0.01
	5304	// MDOUBLE maxLossRandSample = loss2gainRatioToSim*2;
	5305	// MDOUBLE meanGaussianGain = 1.0;
	5306	// MDOUBLE varianceGaussianGain = 1.0;
	5307	// MDOUBLE meanGaussianLoss = loss2gainRatioToSim;
	5308	// MDOUBLE varianceGaussianLoss = loss2gainRatioToSim;
	5309	// //MDOUBLE AlphaGain = gainLossOptions::_userAlphaGain;
	5310	// //MDOUBLE BetaGain = gainLossOptions::_userBetaGain;
	5311	// //MDOUBLE AlphaLoss = gainLossOptions::_userAlphaLoss;
	5312	// //MDOUBLE BetaLoss = gainLossOptions::_userBetaLoss;
	5313	// //MDOUBLE AlphaRate = gainLossOptions::_userAlphaRate;
	5314	//
	5315	// Vdouble freq(2,0.0);
	5316	// MDOUBLE init_gain = 1.0; //gainLossOptions::_userGain taken from original runs of COG data, get it from params file
	5317	// MDOUBLE init_loss = 1.0; //gainLossOptions::_userLoss
	5318	//
	5319	//
	5320	// MDOUBLE init_gainsForCostMatrix = 0.0; // sum all position
	5321	// MDOUBLE init_lossesForCostMatrix = 0.0; // sum all position
	5322	// MDOUBLE init_losses2gainRatioForCostMatrixSum = 0.0;
	5323	// MDOUBLE randomNoise = 0.0;
	5324	// MDOUBLE costMatrixGainLossRatio = gainLossOptions::_costMatrixGainLossRatio; // to be updated according to simulation
	5325	//
	5326	// // produce random noise
	5327	// if(gainLossOptions::_simulationType == gainLossOptions::GammaNoise){
	5328	// randomNoise = talRandom::giveRandomNumberBetweenTwoPoints(-gainLossOptions::_noiseLevelInGammaSimulation, gainLossOptions::_noiseLevelInGammaSimulation);
	5329	// // if noiseLevel=200% than param may be up to x3 or down to x0.33 its value
	5330	// if(randomNoise>=0)
	5331	// randomNoise = 1+randomNoise;
	5332	// else
	5333	// randomNoise = 1/(1-randomNoise);
	5334	// LOGnOUT(4,<<"Noise over all parameters="<< randomNoise<<endl);
	5335	// }
	5336	// // Theta for all positions, (not relevant to stationary models)
	5337	// if(!gainLossOptions::_isStationaryModelForSim){ // else Theta is driven from gain/gain+loss
	5338	// switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	5339	// {
	5340	// case gainLossOptions::Uniform:
	5341	// case gainLossOptions::Normal:
	5342	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5343	// break;
	5344	// case gainLossOptions::MPestEmp:
	5345	// case gainLossOptions::SMestEmp:
	5346	// if(isThetaFromObservedForEmpiricalSimulations)
	5347	// freq[1]=observedTheta;
	5348	// else
	5349	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5350	// break;
	5351	// case gainLossOptions::Gamma:
	5352	// if(isThetaSampledForGamma)
	5353	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5354	// else
	5355	// freq[1]= gainLossOptions::_userTheta;
	5356	// break;
	5357	// case gainLossOptions::GammaNoise:
	5358	// freq[1] = gainLossOptions::_userTheta*randomNoise;
	5359	// freq[1] = max(freq[1],minThetaRandSample); // added to avoid too small or too big theta
	5360	// freq[1] = min(freq[1],maxThetaRandSample);
	5361	// break;
	5362	// case gainLossOptions::EQ_gEql:
	5363	// case gainLossOptions::EQ_gVrl:
	5364	// case gainLossOptions::Gam_gEql:
	5365	// case gainLossOptions::Gam_gVrl:
	5366	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5367	// break;
	5368	// default:
	5369	// errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}");
	5370	// }
	5371	// freq[0]= 1 - freq[1];
	5372	// if(!gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos)
	5373	// LOGnOUT(4,<<" For all positions, Root(1)= "<<freq[1]<<endl);
	5374	// }
	5375	// else{
	5376	// LOGnOUT(4,<<" Statrionary model - Theta=Root(1) is driven from the gain/gain+loss"<<endl);
	5377	// }
	5378	//
	5379	// vector<bool> isGainEventInAnode; // DEBUG
	5380	// if(isTestForGainEventsInEqSeq)
	5381	// isGainEventInAnode.resize(numOfSequenceSets+1);
	5382	//
	5383	// ////////////////////////////////////////////////////////////////////////// Positions
	5384	// int randomPosition; //used in MPestEmp or SMestEmp
	5385	// for(int i=0; i<numOfSequenceSets; ++i)
	5386	// {
	5387	// rateSample = 1;
	5388	// lossGainRatioSample = 1;
	5389	// switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	5390	// {
	5391	// case gainLossOptions::Uniform:
	5392	// init_gain = talRandom::giveRandomNumberBetweenTwoPoints(minGainRandSample, maxGainRandSample);
	5393	// if(gainLossOptions::_isMPratio)
	5394	// init_loss = init_gain * costMatrixGainLossRatio;
	5395	// else
	5396	// init_loss = talRandom::giveRandomNumberBetweenTwoPoints(minLossRandSample, maxLossRandSample);
	5397	// break;
	5398	// case gainLossOptions::Normal:
	5399	// init_gain = talRandom::rand_gaussian(meanGaussianGain, varianceGaussianGain);
	5400	// if(gainLossOptions::_isMPratio)
	5401	// init_loss = init_gain * costMatrixGainLossRatio;
	5402	// else
	5403	// init_loss = talRandom::rand_gaussian(meanGaussianLoss, varianceGaussianLoss);
	5404	// break;
	5405	// case gainLossOptions::Gamma:
	5406	// init_gain = talRandom::SampleGamma(AlphaGain,BetaGain);
	5407	// if(gainLossOptions::_isMPratio)
	5408	// init_loss = init_gain * costMatrixGainLossRatio;
	5409	// else
	5410	// init_loss = talRandom::SampleGamma(AlphaLoss,BetaLoss);
	5411	// break;
	5412	// case gainLossOptions::MPestEmp:
	5413	// randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _MPPerPos.size());
	5414	// init_gain = _MPPerPos[randomPosition][0][1]/meanEventsFromEMP;
	5415	// if(gainLossOptions::_isMPratio)
	5416	// init_loss = init_gain * costMatrixGainLossRatio;
	5417	// else
	5418	// init_loss = _MPPerPos[randomPosition][1][0]/meanEventsFromEMP;
	5419	// break;
	5420	// case gainLossOptions::SMestEmp:
	5421	// randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _SMPerPos.size());
	5422	// init_gain = _SMPerPos[randomPosition][0][1]/meanEventsFromEMP;
	5423	// if(gainLossOptions::_isMPratio)
	5424	// init_loss = init_gain * costMatrixGainLossRatio;
	5425	// else
	5426	// init_loss = _SMPerPos[randomPosition][1][0]/meanEventsFromEMP;
	5427	// break;
	5428	// case gainLossOptions::GammaNoise:
	5429	// init_gain = talRandom::SampleGamma( (AlphaGain*randomNoise)
	5430	// ,(BetaGain*randomNoise));
	5431	// if(gainLossOptions::_isMPratio)
	5432	// init_loss = init_gain * costMatrixGainLossRatio;
	5433	// else{
	5434	// init_loss = talRandom::SampleGamma((AlphaLoss*randomNoise)
	5435	// ,(BetaLoss*randomNoise));
	5436	// }
	5437	// break;
	5438	// case gainLossOptions::EQ_gEql:
	5439	// init_gain = -(rateSample/(-1-lossGainRatioSample)); //init_gain = init_loss =0.5;
	5440	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5441	// break;
	5442	// case gainLossOptions::Gam_gEql:
	5443	// rateSample = talRandom::SampleGamma(AlphaRate); //init_gain = init_loss = 0.5*rateSample;
	5444	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5445	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5446	// break;
	5447	// case gainLossOptions::EQ_gVrl:
	5448	// lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	5449	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5450	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5451	// break;
	5452	// case gainLossOptions::Gam_gVrl:
	5453	// rateSample = talRandom::SampleGamma(AlphaRate);
	5454	// lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	5455	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5456	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5457	// //init_gain = -(1/(-1-lossGainRatioSample))*rateSample;
	5458	// //init_loss = 1+(1/(-1-lossGainRatioSample))*rateSample;
	5459	// break;
	5460	// default:
	5461	// errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp GammaNoise}");
	5462	// }
	5463	// //cout<<init_loss/init_gain<<"\n";
	5464	//
	5465	// init_gain = max(init_gain,minAllowedRate); // added to avoid too small gain rate
	5466	// init_gain = min(init_gain,maxAllowedRate);
	5467	// init_loss = max(init_loss,minAllowedRate);
	5468	// init_loss = min(init_loss,maxAllowedRate);
	5469	//
	5470	// init_losses2gainRatioForCostMatrixSum += init_loss/init_gain;
	5471	// init_gainsForCostMatrix += init_gain;
	5472	// init_lossesForCostMatrix += init_loss;
	5473	//
	5474	// ///////////// Theta random per pos
	5475	// if(gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos){
	5476	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5477	// freq[0]= 1 - freq[1];
	5478	// }
	5479	// if(gainLossOptions::_isStationaryModelForSim){ //Theta=Root(1) is driven from the gain/gain+loss
	5480	// freq[1]= init_gain/(init_gain+init_loss);
	5481	// freq[0]= 1 - freq[1];
	5482	// }
	5483	// gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q);
	5484	// trivialAccelerator pijAcc(&glm);
	5485	// uniDistribution uniDistr;
	5486	// stochasticProcess *spSimSingle;
	5487	// spSimSingle = new stochasticProcess(&uniDistr,&pijAcc,false);
	5488	// if(isnormalizeQ){
	5489	// MDOUBLE sumQii = normalizeQ(spSimSingle); // added.
	5490	// LOG(6,<<" Pos= "<<i+1<<
	5491	// "\tfreq1="<<freq[1]<<"\tgain="<<init_gain/sumQii<<"\tloss="<<init_loss/sumQii<<endl);
	5492	// }
	5493	// string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	5494	// string resFile = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "resSim" + int2string(i+1) + ".sim";
	5495	// simulateOnePos simulateOnePosObj(strSeqNum, resFile, i,gainLossOptions::_treeFile,spSimSingle);
	5496	// if(spSimSingle) delete spSimSingle;
	5497	// if(isTestForGainEventsInEqSeq){ // DEBUG
	5498	// if(simulateOnePosObj.getChangesForBranch(2)[0][1]>0) // "A" == 2
	5499	// isGainEventInAnode[i+1] = true;
	5500	// }
	5501	// }
	5502	// if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss
	5503	// costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix;
	5504	//
	5505	// cout<<"AveLoss/AveGain"<<costMatrixGainLossRatio<<"\n";
	5506	// cout<<"Ave(loss/gain)"<<init_losses2gainRatioForCostMatrixSum/numOfSequenceSets<<"\n";
	5507	// ////////////////////////////////////////////////////////////////////////// end of per-position simulations
	5508	//
	5509	// //re-open seq
	5510	// gainLossAlphabet alph;
	5511	// //string strSeqFirst = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(1) + ".fa";
	5512	// ifstream in(strSeqFirst.c_str());
	5513	// sequenceContainer seqReOpened = recognizeFormat::read(in,&alph);
	5514	// in.close();
	5515	// remove( strSeqFirst.c_str() ); // remove seq
	5516	//
	5517	// // Test for gain events in Eq sequences
	5518	// int totalNumberOfEqSeqs = 0;
	5519	// int totalNumberOfGainsInEqSeqs = 0;
	5520	//
	5521	// for(int i=1; i<numOfSequenceSets; i++){
	5522	// string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	5523	// ifstream in(strSeqNum.c_str());
	5524	// sequenceContainer seqSeqNum = recognizeFormat::read(in,&alph);
	5525	// in.close();
	5526	// seqReOpened.concatenate(seqSeqNum);
	5527	// remove( strSeqNum.c_str() ); // remove seq
	5528	// }
	5529	//}
	5530
	5531
	5532	//void gainLoss::startSimultePosteriorExpectationOfChange(int numOfSequenceSets, const int numOfRepeats)
	5533	//{
	5534	// LOGnOUT(4,<<endl<<"****************************************************\n startSimultePosteriorExpectationOfChange... "<<endl);
	5535	// LOGnOUT(4,<<"Replicates="<<numOfRepeats<<" Positions="<<numOfSequenceSets<<endl);
	5536	// LOGnOUT(4,<<" simulationType {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise;..."<<endl);
	5537	// LOGnOUT(4,<<" EQ_gEql,EQ_gVrl,Gam_gEql,GamgVrl}="<<gainLossOptions::_simulationType<<endl);
	5538	//
	5539	// if(gainLossOptions::_simulationType == gainLossOptions::SMestEmp && _SMPerPos.size()==0){
	5540	// LOGnOUT(4,<<" WARN!!! _SMPerPos size="<<_SMPerPos.size()<<endl);
	5541	// startComputePosteriorExpectationOfChange();
	5542	// }
	5543	//
	5544	// if(gainLossOptions::_simulationType == gainLossOptions::MPestEmp && _MPPerPos.size()==0){
	5545	// LOGnOUT(4,<<" WARN!!! _MPPerPos size="<<_MPPerPos.size()<<endl);
	5546	// startMaxParsimonyChange();
	5547	// }
	5548	// gainLossAlphabet alph;
	5549	// simulatePhyleticPatternsAndPredictEvents simulateObj(_tr,_sp, alph);
	5550	//
	5551	//
	5552	//
	5553	// ////////////////////////////////////////////////////////////////////////// Replicates
	5554	// for(int replicat=1; replicat<=numOfRepeats; ++replicat){
	5555	// LOGnOUT(4,<<endl<<".......................................Replicate= "<<replicat<<endl);
	5556	// time_t t1,t2;
	5557	// time(&t1);
	5558	//
	5559	// createDir(gainLossOptions::_outDir, "SimulatedPostExp"+ int2string(replicat));
	5560	// string outDirSeq = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seqAll" ;
	5561	// createDir(gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat), "seqAll" );
	5562	// string simulatedEventsSimString = outDirSeq + "//" + "simulatedEvents.txt";
	5563	// ofstream* simulatedEventsFile = new ofstream(simulatedEventsSimString.c_str());
	5564	//
	5565	// string perPosStat = outDirSeq + "//" + "statPos.txt";
	5566	// ofstream perPosStatStream(perPosStat.c_str());
	5567	// perPosStatStream<<"pos"<<"\t"<<"rate"<<"\t"<<"theta"<<"\t"<<"occur"<<"\n";
	5568	//
	5569	// string perBranchStat = outDirSeq + "//" + "statBranch.txt";
	5570	// ofstream perBranchStatStream(perBranchStat.c_str());
	5571	// perBranchStatStream<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<endl;
	5572	// treeIterTopDownConst tit(_tr);
	5573	// for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) {
	5574	// if(myN->isRoot())
	5575	// continue;
	5576	// perBranchStatStream<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()<<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()<<endl;
	5577	// }
	5578	// perBranchStatStream.close();
	5579	// MDOUBLE init_gainsForCostMatrix = 0.0; // sum all position
	5580	// MDOUBLE init_lossesForCostMatrix = 0.0; // sum all position
	5581	// MDOUBLE init_losses2gainRatioForCostMatrixSum = 0.0;
	5582	// MDOUBLE QnormTest = 0.0;
	5583	//
	5584	// // produce random noise
	5585	// if(gainLossOptions::_simulationType == gainLossOptions::GammaNoise){
	5586	// randomNoise = talRandom::giveRandomNumberBetweenTwoPoints(-gainLossOptions::_noiseLevelInGammaSimulation, gainLossOptions::_noiseLevelInGammaSimulation);
	5587	// // if noiseLevel=200% than param may be up to x3 or down to x0.33 its value
	5588	// if(randomNoise>=0)
	5589	// randomNoise = 1+randomNoise;
	5590	// else
	5591	// randomNoise = 1/(1-randomNoise);
	5592	// LOGnOUT(4,<<"Noise over all parameters="<< randomNoise<<endl);
	5593	// }
	5594	// // Theta for all positions, (not relevant to stationary models)
	5595	// if(!gainLossOptions::_isStationaryModelForSim){ // else Theta is driven from gain/gain+loss
	5596	// switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	5597	// {
	5598	// case gainLossOptions::Uniform:
	5599	// case gainLossOptions::Normal:
	5600	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5601	// break;
	5602	// case gainLossOptions::MPestEmp:
	5603	// case gainLossOptions::SMestEmp:
	5604	// if(isThetaFromObservedForEmpiricalSimulations)
	5605	// freq[1]=observedTheta;
	5606	// else
	5607	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5608	// break;
	5609	// case gainLossOptions::Gamma:
	5610	// if(isThetaSampledForGamma)
	5611	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5612	// else
	5613	// freq[1]= gainLossOptions::_userTheta;
	5614	// break;
	5615	// case gainLossOptions::GammaNoise:
	5616	// freq[1] = gainLossOptions::_userTheta*randomNoise;
	5617	// freq[1] = max(freq[1],minThetaRandSample); // added to avoid too small or too big theta
	5618	// freq[1] = min(freq[1],maxThetaRandSample);
	5619	// break;
	5620	// case gainLossOptions::EQ_gEql:
	5621	// case gainLossOptions::EQ_gVrl:
	5622	// case gainLossOptions::Gam_gEql:
	5623	// case gainLossOptions::Gam_gVrl:
	5624	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5625	// break;
	5626	// default:
	5627	// errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}");
	5628	// }
	5629	// if(!gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos)
	5630	// LOGnOUT(4,<<" For all positions, Root(1)= "<<freq[1]<<endl);
	5631	// }
	5632	// else{
	5633	// LOGnOUT(4,<<" Stationary model - Theta=Root(1) is driven from the gain/gain+loss"<<endl);
	5634	// }
	5635	//
	5636	// vector<bool> isGainEventInAnode; // DEBUG
	5637	// if(isTestForGainEventsInEqSeq)
	5638	// isGainEventInAnode.resize(numOfSequenceSets+1);
	5639	//
	5640	// ////////////////////////////////////////////////////////////////////////// Positions
	5641	// int randomPosition; //used in MPestEmp or SMestEmp
	5642	// sequenceContainer seqSimulated;
	5643	// gainLossAlphabet alph;
	5644	//
	5645	//
	5646	// for(int i=0; i<numOfSequenceSets; ++i)
	5647	// {
	5648	// rateSample = 1;
	5649	// lossGainRatioSample = 1;
	5650	// switch (gainLossOptions::_simulationType) //{Uniform, Normal, Gamma, MPestEmp, GammaNoise}
	5651	// {
	5652	// case gainLossOptions::Uniform:
	5653	// init_gain = talRandom::giveRandomNumberBetweenTwoPoints(minGainRandSample, maxGainRandSample);
	5654	// if(gainLossOptions::_isMPratio)
	5655	// init_loss = init_gain * costMatrixGainLossRatio;
	5656	// else
	5657	// init_loss = talRandom::giveRandomNumberBetweenTwoPoints(minLossRandSample, maxLossRandSample);
	5658	// break;
	5659	// case gainLossOptions::Normal:
	5660	// init_gain = talRandom::rand_gaussian(meanGaussianGain, varianceGaussianGain);
	5661	// if(gainLossOptions::_isMPratio)
	5662	// init_loss = init_gain * costMatrixGainLossRatio;
	5663	// else
	5664	// init_loss = talRandom::rand_gaussian(meanGaussianLoss, varianceGaussianLoss);
	5665	// break;
	5666	// case gainLossOptions::Gamma:
	5667	// init_gain = talRandom::SampleGamma(AlphaGain,BetaGain);
	5668	// if(gainLossOptions::_isMPratio)
	5669	// init_loss = init_gain * costMatrixGainLossRatio;
	5670	// else
	5671	// init_loss = talRandom::SampleGamma(AlphaLoss,BetaLoss);
	5672	// break;
	5673	// case gainLossOptions::MPestEmp:
	5674	// randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _MPPerPos.size());
	5675	// init_gain = _MPPerPos[randomPosition][0][1]/meanEventsFromEMP;
	5676	// if(gainLossOptions::_isMPratio)
	5677	// init_loss = init_gain * costMatrixGainLossRatio;
	5678	// else
	5679	// init_loss = _MPPerPos[randomPosition][1][0]/meanEventsFromEMP;
	5680	// break;
	5681	// case gainLossOptions::SMestEmp:
	5682	// randomPosition = (int)talRandom::giveRandomNumberBetweenTwoPoints(0, _SMPerPos.size());
	5683	// init_gain = _SMPerPos[randomPosition][0][1]/meanEventsFromEMP;
	5684	// if(gainLossOptions::_isMPratio)
	5685	// init_loss = init_gain * costMatrixGainLossRatio;
	5686	// else
	5687	// init_loss = _SMPerPos[randomPosition][1][0]/meanEventsFromEMP;
	5688	// break;
	5689	// case gainLossOptions::GammaNoise:
	5690	// init_gain = talRandom::SampleGamma( (AlphaGain*randomNoise)
	5691	// ,(BetaGain*randomNoise));
	5692	// if(gainLossOptions::_isMPratio)
	5693	// init_loss = init_gain * costMatrixGainLossRatio;
	5694	// else{
	5695	// init_loss = talRandom::SampleGamma((AlphaLoss*randomNoise)
	5696	// ,(BetaLoss*randomNoise));
	5697	// }
	5698	// break;
	5699	// case gainLossOptions::EQ_gEql:
	5700	// init_gain = -(rateSample/(-1-lossGainRatioSample)); //init_gain = init_loss =0.5;
	5701	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5702	// break;
	5703	// case gainLossOptions::Gam_gEql:
	5704	// rateSample = talRandom::SampleGamma(AlphaRate); //init_gain = init_loss = 0.5*rateSample;
	5705	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5706	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5707	// break;
	5708	// case gainLossOptions::EQ_gVrl:
	5709	// lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	5710	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5711	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5712	// break;
	5713	// case gainLossOptions::Gam_gVrl:
	5714	// rateSample = talRandom::SampleGamma(AlphaRate);
	5715	// lossGainRatioSample = talRandom::giveRandomNumberBetweenTwoPoints(epsilonForgainLossRatio, loss2gainRatioToSim*2-epsilonForgainLossRatio);
	5716	// init_gain = -(rateSample/(-1-lossGainRatioSample));
	5717	// init_loss = rateSample+(rateSample/(-1-lossGainRatioSample));
	5718	// //init_gain = -(1/(-1-lossGainRatioSample))*rateSample;
	5719	// //init_loss = 1+(1/(-1-lossGainRatioSample))*rateSample;
	5720	// break;
	5721	// default:
	5722	// errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp GammaNoise}");
	5723	// }
	5724	// if(isMultBy2_normQ){ // with mult=2, Q matrix is normalized with respect to the tree (after multiplied by freq=0.5)
	5725	// init_gain *=2;
	5726	// init_loss *=2;
	5727	// }
	5728	// init_gain = max(init_gain,minAllowedRate); // added to avoid too small gain rate
	5729	// init_gain = min(init_gain,maxAllowedRate);
	5730	// init_loss = max(init_loss,minAllowedRate);
	5731	// init_loss = min(init_loss,maxAllowedRate);
	5732	//
	5733	// ///////////// Theta random per pos
	5734	// if(gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos){
	5735	// freq[1]= talRandom::giveRandomNumberBetweenTwoPoints(minThetaRandSample, maxThetaRandSample);
	5736	// }
	5737	// if(gainLossOptions::_isStationaryModelForSim){ //Theta=Root(1) is driven from the gain/gain+loss
	5738	// freq[1]= init_gain/(init_gain+init_loss);
	5739	// }
	5740	// freq[0]= 1 - freq[1];
	5741	// gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q);
	5742	// trivialAccelerator pijAcc(&glm);
	5743	// uniDistribution uniDistr;
	5744	// stochasticProcess *spSimSingle;
	5745	// spSimSingle = new stochasticProcess(&uniDistr,&pijAcc,false);
	5746	// MDOUBLE sumQii = 1.0;
	5747	// if(isNormalizeQ){ // if normalizeQ for each position, there is no rate variability in practice
	5748	// sumQii = normalizeQ(spSimSingle);
	5749	// LOG(6,<<" Pos= "<<i+1<<
	5750	// "\tfreq1="<<freq[1]<<"\tgain="<<init_gain/sumQii<<"\tloss="<<init_loss/sumQii<<endl);
	5751	// }
	5752	// //QnormTest += init_gainfreq[0]+init_lossfreq[1];
	5753	// init_losses2gainRatioForCostMatrixSum += init_loss/init_gain;
	5754	// init_gainsForCostMatrix += init_gain;
	5755	// init_lossesForCostMatrix += init_loss;
	5756	//
	5757	// string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	5758	// string resFile = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "resSim" + int2string(i+1) + ".sim";
	5759	//
	5760	// simulateOnePos *simulateOnePosObj = NULL;
	5761	// MDOUBLE ratePerPos=0;
	5762	// if(gainLossOptions::_is3states){
	5763	// Vdouble init_cpN_vals(4);
	5764	// init_cpN_vals[0]=gainLossOptions::_3statesGain; //gain (0->1)
	5765	// init_cpN_vals[1]=gainLossOptions::_3statesMore; //more (1->more)
	5766	// init_cpN_vals[2]=gainLossOptions::_3statesLess; // less (more->1)
	5767	// init_cpN_vals[3]=gainLossOptions::_3statesLoss; // loss (1->0)
	5768	// Vdouble freq_cpN(3);
	5769	// freq_cpN[0]=gainLossOptions::_3states0;
	5770	// freq_cpN[1]=gainLossOptions::_3states1;
	5771	// freq_cpN[2]=1 - (freq_cpN[0] + freq_cpN[1]);
	5772	// simulateOnePosObj = new simulateOnePos(strSeqNum, resFile, simulatedEventsFile, i,gainLossOptions::_treeFile,init_cpN_vals[0]+init_cpN_vals[3],freq[1],gainLossOptions::_is3states,NULL,&_tr,&init_cpN_vals,&freq_cpN);
	5773	// }
	5774	// else{
	5775	// ratePerPos=(static_cast<gainLossModel*>(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	5776	// simulateOnePosObj = new simulateOnePos(strSeqNum, resFile, simulatedEventsFile, i,gainLossOptions::_treeFile,ratePerPos,freq[1],gainLossOptions::_is3states,spSimSingle,&_tr);
	5777	// }
	5778	// perPosStatStream<<i+1<<"\t"<<ratePerPos<<"\t"<<freq[1]<<"\t"<<simulateOnePosObj->getOccurFraction()<<"\n";
	5779	//
	5780	//
	5781	// if(spSimSingle) delete spSimSingle;
	5782	// if(isTestForGainEventsInEqSeq){ // DEBUG
	5783	// if(simulateOnePosObj->getChangesForBranch(2)[0][1]>0) // "A" == 2
	5784	// isGainEventInAnode[i+1] = true;
	5785	// }
	5786	// //if(i==0){
	5787	// // seqSimulated = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph);
	5788	// //}
	5789	// //else{
	5790	// // sequenceContainer tempSeq = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph);
	5791	// // seqSimulated.concatenate(tempSeq);
	5792	// // fastaFormat::write(cout,seqSimulated);
	5793	// //}
	5794	//
	5795	// }
	5796	// if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss
	5797	// costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix;
	5798	//
	5799	// //LOGnOUT(5,<<"QnormTest=\t"<<QnormTest/numOfSequenceSets<<"\n");
	5800	// LOGnOUT(5,<<"AveLoss/AveGain=\t"<<costMatrixGainLossRatio<<"\n");
	5801	// LOGnOUT(5,<<"Ave (loss/gain)=\t"<<init_losses2gainRatioForCostMatrixSum/numOfSequenceSets<<"\n");
	5802	//
	5803	// time(&t2);
	5804	// LOGnOUT(4,<<"End simulations.\nTIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	5805	// ////////////////////////////////////////////////////////////////////////// end of per-position simulations
	5806	//
	5807	// //fastaFormat::write(cout,seqSimulated);
	5808	// //vector<int> posToRemove(seqSimulated.seqLen(),false);
	5809	// //posToRemove[0] = true;
	5810	// //seqSimulated.removePositions(posToRemove);
	5811	// //fastaFormat::write(cout,seqSimulated);
	5812	//
	5813	// //re-open seq
	5814	// string strSeqFirst = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(1) + ".fa";
	5815	// ifstream in(strSeqFirst.c_str());
	5816	// sequenceContainer seqReOpened = recognizeFormat::read(in,&alph);
	5817	// in.close();
	5818	// remove( strSeqFirst.c_str() ); // remove seq
	5819	//
	5820	// // Test for gain events in Eq sequences
	5821	// int totalNumberOfEqSeqs = 0;
	5822	// int totalNumberOfGainsInEqSeqs = 0;
	5823	//
	5824	// for(int i=1; i<numOfSequenceSets; i++){
	5825	// string strSeqNum = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(i+1) + ".fa";
	5826	// ifstream in(strSeqNum.c_str());
	5827	// sequenceContainer seqSeqNum = recognizeFormat::read(in,&alph);
	5828	// in.close();
	5829	// seqReOpened.concatenate(seqSeqNum);
	5830	// if(isTestForGainEventsInEqSeq){
	5831	// if(_sc==seqSeqNum){
	5832	// ++totalNumberOfEqSeqs;
	5833	// if(isGainEventInAnode[i+1]){ // to be consistent with previous calculations
	5834	// ++totalNumberOfGainsInEqSeqs;
	5835	// //LOGnOUT(4,<<i+1<<" gain event\n");
	5836	// }
	5837	// //LOGnOUT(4,<<i+1<<" same seq\n");
	5838	// }
	5839	// else{
	5840	// //LOGnOUT(4,<<i<<" Diff seq\n");
	5841	// }
	5842	// }
	5843	// remove( strSeqNum.c_str() ); // remove seq
	5844	// }
	5845	//
	5846	// if(isTestForGainEventsInEqSeq){
	5847	// LOGnOUT(3,<<totalNumberOfEqSeqs<<" total same seqs\n");
	5848	// LOGnOUT(3,<<totalNumberOfGainsInEqSeqs<<" with gain event\n");
	5849	// LOGnOUT(3,<<(float)totalNumberOfGainsInEqSeqs/totalNumberOfEqSeqs<<" posteriorProb empirical\n");
	5850	// }
	5851	// LOGnOUT(5,<<"seqReOpened length "<<seqReOpened.seqLen()<<endl);
	5852	// string treeSimString = outDirSeq + "//" + "TreeSim.ph";
	5853	// string seqSim = outDirSeq + "//" + "seq" + ".fa";
	5854	// ofstream seq_out(seqSim.c_str());
	5855	// fastaFormat:: write(seq_out,seqReOpened);
	5856	//
	5857	// // Parsimony
	5858	// if(gainLossOptions::_calculeMaxParsimonyChangeSeveralGainLossRatios){
	5859	// MDOUBLE GLratioMulti = 1;
	5860	// for(MDOUBLE glRatio = 1+glRatioTieBreakerInCostMatrix; glRatio <=MaxGLratio; glRatio+=GLratioMulti){
	5861	// startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5862	// ,glRatio,_distanceFromNearestOTUForRecent);
	5863	// if(glRatio>2)
	5864	// GLratioMulti*=2;
	5865	// }
	5866	// if(!gainLossOptions::_isMPratio && isMPcostEmpirical)
	5867	// startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5868	// ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent);
	5869	// startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5870	// ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent);
	5871	// }
	5872	// else{
	5873	// if(isMPcostEmpirical)
	5874	// startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5875	// ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent);
	5876	// startMaxParsimonyChange(seqReOpened,_tr,outDirSeq
	5877	// ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent);
	5878	// }
	5879	//
	5880	// // Estimation of model paramers + Stochastic mapping
	5881	// tree trSim = _tr;
	5882	// if(gainLossOptions::_gainLossDist){
	5883	// cloneSpVVec(_spVVec,spVVecSim);
	5884	// gainDistSim = _gainDist->clone();
	5885	// lossDistSim = _lossDist->clone();
	5886	// }
	5887	// else{
	5888	// spSim = _sp->clone();
	5889	// }
	5890	// if(_unObservableData_p){
	5891	// unObservableDataSim = _unObservableData_p->clone();
	5892	// }
	5893	// if(gainLossOptions::_isFlatTreeBeforOpt){
	5894	// FlatTree(trSim);
	5895	// }
	5896	//
	5897	// if(!gainLossOptions::_gainLossDist){// a single Stochastic processes (M)
	5898	// if(Parameters::getInt("_isFlatSpBeforeOpt")){
	5899	// FlatSpBeforeOpt(*spSim,unObservableDataSim);
	5900	// }
	5901	// if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){
	5902	// Vdouble freqSim = evaluateCharacterFreq(seqReOpened);
	5903	// LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "<<freqSim[1]<<endl);
	5904	// MDOUBLE init_gain = freqSim[1];
	5905	// MDOUBLE init_loss = freqSim[0];
	5906	// static_cast<gainLossModel*>(spSim->getPijAccelerator()->getReplacementModel())->setMu1(init_gain, gainLossOptions::_isReversible);
	5907	// static_cast<gainLossModelNonReversible*>(spSim->getPijAccelerator()->getReplacementModel())->setMu2(init_loss);
	5908	// static_cast<gainLossModel*>(spSim->getPijAccelerator()->getReplacementModel())->setTheta(freqSim[1]);
	5909	//
	5910	// spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0'
	5911	// _logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,*spSim,_weightsUniqPatterns,unObservableDataSim);
	5912	//
	5913	// if(gainLossOptions::_isFlatTreeBeforOpt \|\| gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){
	5914	// bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5915	// }
	5916	// }
	5917	// if(gainLossOptions::_modelOptimizationSimPostExp){
	5918	// gainLossOptimizer glOpt(trSim,spSim,seqReOpened,
	5919	// gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5920	// (int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5921	// gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5922	// (int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5923	// gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5924	// (int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5925	// NULL,unObservableDataSim, gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove);
	5926	// if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){
	5927	// trSim = glOpt.getOptTree();
	5928	// printTree(trSim, treeSimString);
	5929	// }
	5930	// }
	5931	// }
	5932	//
	5933	// else{// Mixture of Stochastic processes (GLM)
	5934	// if(Parameters::getInt("_isFlatSpBeforeOpt")){
	5935	// FlatSpBeforeOpt(spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5936	// }
	5937	// if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){
	5938	// Vdouble freqSim = evaluateCharacterFreq(seqReOpened);
	5939	// LOGnOUT(4,<<"\nBefore optimization - init ssp with simulated freq(1)= "<<freqSim[1]<<endl);
	5940	// MDOUBLE init_gain = freqSim[1];
	5941	// MDOUBLE init_loss = freqSim[0];
	5942	// MDOUBLE gainLossRatioToCompleteByBeta = (init_gain/init_loss)*(gainLossOptions::_userAlphaLoss/gainLossOptions::_userAlphaGain);
	5943	// MDOUBLE initBetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35
	5944	// MDOUBLE initBetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9
	5945	// updateGainBeta(initBetaGain, spVVecSim,gainDistSim,lossDistSim);
	5946	// updateLossBeta(initBetaLoss, spVVecSim,gainDistSim,lossDistSim);
	5947	// updateTheta(freqSim[1], spVVecSim,gainDistSim, lossDistSim);
	5948	//
	5949	// spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0'
	5950	// _logL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,spVVecSim,gainDistSim,lossDistSim,_weightsUniqPatterns,unObservableDataSim);
	5951	//
	5952	// if(gainLossOptions::_isFlatTreeBeforOpt \|\| gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){
	5953	// bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim);
	5954	// }
	5955	// }
	5956	// if(gainLossOptions::_modelOptimizationSimPostExp){
	5957	// gainLossOptimizer glOpt(trSim,spVVecSim,gainDistSim,lossDistSim,seqReOpened,_spSimple,
	5958	// gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5959	// (int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5960	// gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5961	// (int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5962	// gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor,
	5963	// (int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor),
	5964	// NULL, _unObservableData_p ,gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove);
	5965	// if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){
	5966	// trSim = glOpt.getOptTree();
	5967	// printTree(trSim, treeSimString);
	5968	// }
	5969	// }
	5970	// }
	5971	// //////////////////////////////////////// compute Stochastic Mapping
	5972	// if(!gainLossOptions::_gainLossDist){
	5973	// startComputePosteriorExpectationOfChange(seqReOpened,trSim,spSim,LpostPerCatSim,unObservableDataSim,outDirSeq);
	5974	// LpostPerCatSim.clear(); // when cleared - each replicate will recompute the _LpostPerCat
	5975	// }
	5976	// else{
	5977	// startComputePosteriorExpectationOfChange(seqReOpened,trSim,spVVecSim,gainDistSim,lossDistSim,LpostPerSpPerCat,unObservableDataSim,outDirSeq);
	5978	// LpostPerSpPerCat.clear(); // when cleared - each replicate will recompute the _LpostPerSpPerCat
	5979	// }
	5980	// time(&t2);
	5981	// LOGnOUT(4,<<"Replicate SimultePosteriorExpectationOfChange RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	5982	// }
	5983	//}
	5984
	5985

+318

-0

programs/gainLoss/gainLoss.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAIN_LOSS_
	19	#define ___GAIN_LOSS_
	20
	21	#include "aaJC.h"
	22	#include "bblEM.h"
	23	#include "bestAlpha.h"
	24	#include "chebyshevAccelerator.h"
	25	#include "checkcovFanctors.h"
	26	#include "checkcovFanctorsWithFactors.h"
	27	#include "definitions.h"
	28	#include "distanceTable.h"
	29	#include "distributionPlusInvariant.h"
	30	#include "errorMsg.h"
	31	#include "evaluateCharacterFreq.h"
	32	#include "fastStartTree.h"
	33	#include "gainLossAlphabet.h"
	34	#include "gainLossModel.h"
	35	#include "gainLossUtils.h"
	36	#include "ancestralReconstructStates.h"
	37	#include "gammaDistribution.h"
	38	#include "generalGammaDistribution.h"
	39	#include "generalGammaDistributionPlusInvariant.h"
	40	#include "jcDistance.h"
	41	#include "likeDist.h"
	42	#include "likelihoodComputation.h"
	43	#include "likelihoodComputationGL.h"
	44	#include "logFile.h"
	45	#include "matrixUtils.h"
	46	#include "nj.h"
	47	#include "nucJC.h"
	48	#include "numRec.h"
	49	#include "optimizeGainLossModel.h"
	50	#include "optimizeGainLossModelVV.h"
	51	#include "readDatMatrix.h"
	52	#include "recognizeFormat.h"
	53	#include "seqContainerTreeMap.h"
	54	#include "sequence.h"
	55	#include "sequenceContainer.h"
	56	#include "siteSpecificRate.h"
	57	#include "someUtil.h"
	58	#include "stochasticProcess.h"
	59	#include "tree.h"
	60	#include "treeIt.h"
	61	#include "trivialAccelerator.h"
	62	#include "uniDistribution.h"
	63	#include "unObservableData.h"
	64
	65	#include <cassert>
	66	#include <cmath>
	67	#include <ctime>
	68	#include <fstream>
	69	#include <iomanip>
	70	#include <iostream>
	71	#include <map>
	72	#include <string>
	73	#include <time.h>
	74	#include <algorithm>
	75
	76	#ifdef WIN32
	77	#include <process.h>
	78	#else
	79	#include <unistd.h>
	80	#endif
	81
	82	class gainLoss {
	83
	84	public:
	85	explicit gainLoss();
	86	virtual ~gainLoss();
	87	void run();
	88
	89
	90	private:
	91	void initialize(bool isComputeLikelihood=true);
	92	void initializeBranchLengthDiff();
	93	void initializeUnObservableData();
	94
	95	void fillOptionsParameters(int argc, char* argv[]);
	96	void printOptionParameters(ostream& out= cout);
	97
	98	void startSequenceContainer();
	99	void checkMinNumOfOnesOrZeros(sequenceContainer& sc, int minNumOfOnes, int minNumOfZeros, bool isRemovePosNotWithinMinMax=false, bool isReportRemovedPos=false);
	100	void produceUnionPAP_against_pos(sequenceContainer& sc, int pos_for_union, bool is_ignore_last_pos=true);
	101
	102	void startSequenceContainerUniqPatterns();
	103	void countOccurPerPos();
	104	void removePositionsWithHighPercentOfMissingData(MDOUBLE PercentOfMissingDataToRemove);
	105
	106	void startStochasticProcess(bool gainLossDist);
	107	void setRootFreq();
	108	void startStochasticProcess();
	109	stochasticProcess* startStochasticProcessGeneric(gainLossOptions::distributionType rateDistributionType, const bool isReversible);
	110	void startStochasticProcessVec();
	111
	112	void startEvolTreeTopology(ostream& out=cout);
	113	void startOptimizations();
	114	void startRate4Site(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p);
	115	void startGainLoss4Site(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> > spVVec,distribution gainDist,distribution* lossDist,
	116	string& outDir, unObservableData* unObservableData_p);
	117
	118	void computePosteriorExpectationOfChangeRunOnly();
	119	void startComputePosteriorExpectationOfChange();
	120	void startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, stochasticProcess* sp, VVdouble LpostPerCat, unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos=true);
	121	void startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution* lossDist, VVVdouble& LpostPerSpPerCat,unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos=true);
	122
	123	void startComputeAmongSitesCorrelations();
	124	void computeCoEvolutionScoresBasedOnSimulatedData(sequenceContainer& scSimulated);
	125	void startParametricBootstapCorrelation();
	126	int computeCoEvolutionScoresBasedOnSimulatedDataCoMap(sequenceContainer& scSimulated,tree& trSampled ,MDOUBLE qNminOfRealData, bool& isLastIteration, int& numOfpairsWithRateAboveMinRequiredExp, MDOUBLE& T_BH_prev, ofstream* simCorrelStream);
	127
	128
	129	void startMaxParsimonyChange(bool isUpdateMPPerPos=true);
	130	void startMaxParsimonyChange(sequenceContainer& sc, tree& tr, string& outDir,MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent,bool isUpdateMPPerPos=true);
	131
	132	void startSimulateSequences(int numOfSequenceSets, int seqLengthInSet); // if default=0, take length for input sequence
	133
	134	void startSimultePosteriorExpectationOfChange(int numOfSequenceSets=5, const int numOfRepeats=1);
	135	MDOUBLE ComputeEmpiricalExpectedQforStationaryProcess(VVVdouble& EmpPerPos, MDOUBLE minRate=0.01);
	136
	137	//void simultePhyleticData(const int numOfSequenceSets, string strSeqFirst,MDOUBLE loss2gainRatioToSim, gainLossOptions::simulationType simulationType
	138	// , MDOUBLE AlphaGain, MDOUBLE BetaGain, MDOUBLE AlphaLoss, MDOUBLE BetaLoss, MDOUBLE AlphaRate);
	139
	140	void FlatSpBeforeOpt(stochasticProcess& sp , unObservableData* unObservableData_p);
	141	void FlatSpBeforeOpt(vector<vector<stochasticProcess> >& spVVec,distribution gainDist, distribution * lossDist, unObservableData* unObservableData_p);
	142
	143	void getStartingTreeFromTreeFile();
	144	void getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector<string>& vNames);
	145
	146	void fillReferenceSequence();
	147	Vdouble computeFreq();
	148
	149	void optimizationsManyStarts(const MDOUBLE epsilonOptimization, const int numIterations);
	150	void optimizationsManyStartsNoVec(const MDOUBLE epsilonOptimization, const int numIterations);
	151
	152	void optimizationsVVManyStarts(const MDOUBLE epsilonOptimization, const int numIterations);
	153
	154	void optimizations(ostream& out =cout);
	155	void printModellValuesOfParams();
	156	void printModellValuesOfParams(stochasticProcess* sp, tree& tr);
	157	void printModellValuesOfParams(tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution* lossDist);
	158
	159
	160	void optimizationsSPvv(ostream& out =cout);
	161	MDOUBLE optimizeParameters(ostream& out =cout);
	162	MDOUBLE optimizeParametersSPvv(ostream& out =cout);
	163	MDOUBLE optimizeBranchLengths();
	164	void normalizeQandTree(bool isComputeLikelihood=true, bool isMultipleAllBranchesByNormFactor= true); // normalizeQ or normalizeMatrices and the corresponding tree
	165	void convertGainLossRatesToFreq();
	166	void AlphaEqBetaManipulation();
	167
	168	void printPij_t(MDOUBLE dist=0.1,ostream& out= cout);
	169	void printQ(ostream& out= cout);
	170	void printTreeLikelihoodAllPosAlphTheSame(bool isLOGnOUT = true,ostream& out =cout);
	171	void printLofPos();
	172	MDOUBLE printLofPos(ostream& out);
	173	void printLofPosBothModels();
	174	MDOUBLE printLofPosBothModels(ostream& out);
	175
	176	void printLikelihoodLandscape(stochasticProcess* sp);
	177	void printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio();
	178
	179	void computeAveAndStd();
	180	void normalizeRates();
	181	void printRatesML(ostream& out, const Vdouble & rate2print);
	182	void printRatesBayes(ostream& out, const Vdouble & rate2print);
	183	void printAveAndStd(ostream& out= cout);
	184
	185	Vdouble computeRate4site(); // needed also for computePosteriorExpectationOfChangePerSite (if not run befor)
	186	void printRates(ostream & out, const Vdouble & rate2print); // needed also for gammaMix
	187
	188
	189	void printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist);
	190
	191	void initParamsAtRandPoints(int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out=cout);
	192	void initParamsAtRandPointsSPvv(int numOfRandPoints, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist, unObservableData* currUnObs,ostream& out =cout);
	193	//void initParamsAtIntervalPoints(int pointIndex,int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out);
	194
	195
	196	void computePosteriorExpectationOfChangePerSite(Vdouble& expV01, Vdouble& expV10);
	197	void initMixtureParams(Vdouble& initAlphaRates, Vdouble& initBetaRates, Vdouble& initCompProbRates, int numOfGammaComp,
	198	MDOUBLE initAlphaRate=1, MDOUBLE initBetaRate=1, MDOUBLE initCompProbRate=1);
	199	void printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out=cout, ostream& outCount=cout);
	200	void printGainLossExpectationPerBranch(VVVdouble& probChanges, ostream& out=cout);
	201	void computeBranchLegthDiffFactor(ostream& out=cout);
	202	//void initMissingDataInfo();
	203	vector<sequenceContainer> simulateSequences(int numOfSequenceSets, int seqLengthInSet, bool writeSeq,
	204	bool useTheSame, bool isReversible, bool isGeqL, gainLossOptions::distributionType rateDistributionTypeSim);
	205	sequenceContainer simulateSequencesForParametricBootstrap(int seqLengthInSet, sequenceContainer& scSimulated, tree& trSampled, bool writeSeq=true, bool useTheSame=true);
	206	void ancestralReconstructor();
	207	void ancestralReconstructorBasedOnJoint();
	208	Vdouble getRatesVector(){return _rates;};
	209
	210	// co evol functions
	211	void findCoEvolvingSites(const int numberOfSequences2simulateForCoEvol);
	212	MDOUBLE computeCorrelationBetweenVis(const VVVdouble & VIpos_i, const VVVdouble & VIpos_j);
	213
	214	MDOUBLE computeDistanceFromRootForRecent(tree& tr); //
	215	MDOUBLE computeDistanceNearestOTUforRecent(tree& tr); //
	216	//void bBLEMwithSimpleSpBeforeFullOptimization(tree& tr);
	217	void bBLEMwithSimpleSpBeforeFullOptimization(tree& tr, const sequenceContainer& sc, stochasticProcess* spSimple,
	218	stochasticProcess* sp,
	219	const vector<vector<stochasticProcess> >& spVVec,const distribution gainDist, const distribution * lossDist,
	220	unObservableData *unObservableData_p);
	221
	222	void updateSetLofMissingData();
	223	void multipleAllBranchesByFactorAtStart(MDOUBLE epsilonOptimization);
	224	void multipleAllBranchesByFactorAtStartByMaxParsimonyCost(int costOfTreeMP);
	225	void RemoveSeqWithUnknownForSelectedSiteForCorrelation(sequenceContainer& sc, tree& tr);
	226
	227
	228
	229
	230	private:
	231	stochasticProcess *_sp;
	232	vector<vector<stochasticProcess*> > _spVVec; //save stochasticProcess for each category
	233	stochasticProcess *_spSimple;
	234	Vdouble _freq;
	235
	236	VVVdouble _postProbPerSpPerCatPerPos; // the posterior probability for each stochastic process for each rate Cat for each site
	237
	238	distribution* _gainDist;
	239	distribution* _lossDist;
	240	tree _tr;
	241	tree _trOrig; // used for diff(Branch length comparisons)
	242	tree _trGain;
	243	tree _trLoss;
	244
	245	MDOUBLE _gainExp;
	246	MDOUBLE _lossExp;
	247
	248	MDOUBLE _meanGain;
	249	MDOUBLE _meanLoss;
	250	MDOUBLE _medianGain;
	251	MDOUBLE _medianLoss;
	252
	253	sequenceContainer _sc;
	254	sequenceContainer _scUniqPatterns; // to contain a non-redundant set of patterns with _weights
	255	sequenceContainer _scWithFullLength; //
	256	sequenceContainer _scFilterMissingData; //
	257
	258	vector<int> _alphVecDist; // number of each letter
	259
	260	//sequenceContainer _scZero;
	261	//MDOUBLE _logLforMissingData;
	262	//MDOUBLE* _plogLforMissingData;
	263	//Vdouble _LforMissingDataPerCat; // used foreach rate category
	264	//Vdouble* _pLforMissingDataPerCat;
	265	unObservableData* _unObservableData_p;
	266	Vdouble* _weightsUniqPatterns;
	267
	268	MDOUBLE _logL;
	269	MDOUBLE _distanceFromRootForRecent;
	270	MDOUBLE _distanceFromNearestOTUForRecent;
	271
	272	sequence* _refSeq; // the reference sequence
	273	VVVVdouble _jointProb_PosNodeXY; // store the information from computePosteriorOfChangeGivenTerminals
	274	VVVdouble _MPPerPos; // The MP estimation of gain and loss events _MPPerPos[i][0][1] - gain events in i position
	275	int _CostOfTreeMP;
	276	VVVdouble _SMPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i][0][1] - gain events in i position
	277	VVVVdouble _MP_PosNodeXY; // _MP_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	278
	279	Vint _occurPerPos; // # 1
	280	Vint _unknownPerPos; // # ?
	281
	282	Vdouble _gainPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i] - gain events in i position
	283	Vdouble _lossPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i] - loss events in i position
	284	Vdouble _lossMPPerPos; // Maximum Parsimony
	285	Vdouble _gainMPPerPos;
	286
	287	Vdouble _gainPerPosCorr; // either_SMPerPos[i], or _MPPerPos[i]
	288	Vdouble _lossPerPosCorr;
	289
	290	Vdouble _rates;// the rates themselves
	291	Vdouble _Lrate;// the log likelihood of each position
	292	VVdouble _postProbPerCatPerPos; // the posterior probability for each category and each site
	293	Vdouble _normalizedRates; // the rates when their ave = 0 and std = 1.
	294	MDOUBLE _ave; // the average over all rates.
	295	MDOUBLE _std; // the std over all rates.
	296	Vdouble _BayesianSTD;// the std of the Bayesian rates
	297	Vdouble _BayesianLowerBound;// lower bound of rate in Bayesian inference
	298	Vdouble _BayesianUpperBound;// upper bound of rate in Bayesian inference
	299	MDOUBLE _alphaConf; // the alpha confidence interval of Bayesian rates (set to 0.5). interval - rates that are in the 95% area under the curve.
	300
	301	VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	302	VVVVdouble _expChanges_PosNodeXYSampledData; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
	303
	304	// correlation vectors
	305	VVVdouble _correlationsPerSitePerPosVec;
	306	VVVdouble _correlationsPerSitePerPosVecSampledData;
	307	vector<vector<bool> > _isComputePairWithRateAboveNim; // not dependent on correlation type
	308	Vint _selectedSites; // either all or selected sited (e.g., test correlation with specific traits)
	309	Vint _evolvingSites; // sub-set of all sites in the sequence (e.g., with >=2 Event By MP) e.g., from seqLen = 5 _evolvingSites=[0,1,4]
	310	Vint _numOfGapsTillSite; // sub-set of all sites in the sequence (e.g., with >=2 Event By MP), e.g., _numOfGapsTillSite=[0,0,2]
	311
	312	sequenceContainer _scEvolvingSites;
	313
	314	map<int, map<int, map<string, map<string, MDOUBLE > > > > _correlationsData; // _correlationsData["i"]["j"]["type"]["R" / "pVal" / "qVal" / "Nmin"]
	315	};
	316
	317	#endif // ___GAIN_LOSS_

+411

-0

programs/gainLoss/gainLoss.oldFunc_moved2rate4siteGL.txt less more

	0
	1	// Rate4site - function are now in rate4siteGL.cpp
	2	/********************************************************************************************
	3	*********************************************************************************************/
	4	void gainLoss::startRate4Site(){
	5
	6	LOGnOUT(4,<<"Starting rate4site..."<<endl);
	7	computeRate4site();
	8	computeAveAndStd(); // put them in _ave, and _std
	9
	10	ofstream nonNormalizedOutStream(gainLossOptions::_outFileNotNormalize.c_str());
	11	printRates(nonNormalizedOutStream,_rates);
	12	nonNormalizedOutStream.close();
	13
	14	normalizeRates(); // change also the _ave, the _std the quantiles, etc.
	15
	16	ofstream normalizedOutStream(gainLossOptions::_outFile.c_str());
	17	printRates(normalizedOutStream,_normalizedRates);
	18	normalizedOutStream.close();
	19	}
	20	/********************************************************************************************
	21	computeRate4site
	22	*********************************************************************************************/
	23	Vdouble gainLoss::computeRate4site()
	24	{
	25	time_t t1;
	26	time(&t1);
	27	time_t t2;
	28
	29	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::ebExp) {
	30	LOGnOUT (4,<<"perform computeEB_EXP_siteSpecificRate... while computing posteriorProb PerCategory PerPosition"<<endl);
	31	_LpostPerCat.resize(_sp->categories());
	32	for (int rateIndex=0 ; rateIndex<_sp->categories(); ++rateIndex){
	33	_LpostPerCat[rateIndex].resize(_sc.seqLen());
	34	}
	35	computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_LpostPerCat);
	36	}
	37	else if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) {
	38	LOGnOUT (4,<<"perform computeML_siteSpecificRate with maxRate= "<<gainLossOptions::_maxRateForML<<endl);
	39	computeML_siteSpecificRate(_rates,_Lrate,_sc, *_sp,_tr, gainLossOptions::_maxRateForML);
	40	}
	41	else
	42	errorMsg::reportError("non such method for rate inference, in function void rate4site::computeRate4site()");
	43
	44	time(&t2);
	45	LOGnOUT(4,<<endl<<"computeRate4site RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	46	return _rates;
	47	}
	48	/********************************************************************************************
	49	printRates
	50	*********************************************************************************************/
	51	void gainLoss::printRates(ostream & out, const Vdouble & rate2print) {
	52
	53	if (gainLossOptions::_rateDistributionType == gainLossOptions::GAMMA_MIXTURE){
	54	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_sp->distr());
	55	pMixture->printParams(out);
	56	}
	57
	58	switch (gainLossOptions::_rateEstimationMethod){
	59	case (gainLossOptions::ebExp):
	60	printRatesBayes(out,rate2print);
	61	break;
	62	case (gainLossOptions::mlRate):
	63	printRatesML(out,rate2print);
	64	break;
	65	}
	66	printAveAndStd(out);
	67	}
	68	/********************************************************************************************
	69	*********************************************************************************************/
	70	void gainLoss::printRatesML(ostream& out, const Vdouble & rate2print) {
	71	out<<"#Rates were calculated using Maximum Likelihood"<<endl;
	72	out<<"#SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	73	out<<"#SCORE: The conservation scores. lower value = higher conservation."<<endl;
	74	out<<"#MSA DATA: The number of aligned sequences having an amino acid (non-gapped) from the overall number of sequences at each position."<<endl;
	75	out<<endl;
	76	out<<"========================================================================================================================================================="<<endl;
	77	out<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	78	out<<"========================================================================================================================================================="<<endl;
	79
	80	#ifdef unix
	81	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	82	out<<pos+1<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<<setprecision(7)<<rate2print[pos]<<"\t";
	83	out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	84	}
	85	#else
	86	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	87	out<<left<<pos+1<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	88	out<<left<<setprecision(7)<<fixed<<rate2print[pos]<<"\t";
	89	out<<right<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	90	}
	91	#endif
	92	}
	93	/********************************************************************************************
	94	*********************************************************************************************/
	95	void gainLoss::printRatesBayes(ostream& out, const Vdouble & rate2print) {
	96	out<<"# Rates were calculated using the expectation of the posterior rate distribution"<<endl;
	97	out<<"# Prior distribution is Gamma with "<<gainLossOptions::_numberOfRateCategories<<" discrete categories"<<endl;
	98	out<<"# SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	99	out<<"# SCORE: The conservation scores. lower value = higher conservation."<<endl;
	100	out<<"# QQ-INTERVAL: the confidence interval for the rate estimates. The default interval is 25-75 percentiles"<<endl;
	101	out<<"# STD: the standard deviation of the posterior rate distribution."<<endl;
	102	out<<"# MSA DATA: The number of aligned sequences having an amino acid (non-gapped) from the overall number of sequences at each position."<<endl;
	103	MDOUBLE AlphaRate;
	104	if(dynamic_cast<gammaDistribution*>(_sp->distr()) ) {
	105	AlphaRate = static_cast<gammaDistribution*>(_sp->distr())->getAlpha();
	106	}
	107	if(dynamic_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())){
	108	AlphaRate = static_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())->getAlpha();
	109	}
	110	if(dynamic_cast<gammaDistributionFixedCategories*>(_sp->distr())){
	111	AlphaRate = static_cast<gammaDistributionFixedCategories*>(_sp->distr())->getAlpha();
	112	}
	113	out<<"# The alpha parameter "<<AlphaRate<<endl;
	114	int k=0;
	115	while (k < _sp->categories()){
	116	out<<"# sp.rates(j) j= " <<k<<"\t"<<_sp->rates(k)<<"\t"<<_sp->ratesProb(k)<<endl;
	117	k++;
	118	}
	119
	120
	121	out<<endl;
	122	out<<"========================================================================================================================================================="<<endl;
	123	out<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"QQ-INTERVAL"<<"\t"<<"STD"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	124	out<<"========================================================================================================================================================="<<endl;
	125
	126	#ifdef unix
	127	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	128	out<<pos+1<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<<setprecision(7)<<rate2print[pos]<<"\t";
	129	out<<"["<<setprecision(4)<<_BayesianLowerBound[pos]<<","<<setprecision(4)<<_BayesianUpperBound[pos]<<"]"<<"\t";
	130	out<<setprecision(4)<<_BayesianSTD[pos]<<"\t";
	131	out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	132	}
	133	#else
	134	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	135	out<<left<<pos+1;
	136	out<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	137	out<<left<<setprecision(7)<<fixed<<rate2print[pos]<<"\t";
	138	out<<right<<"["<<setprecision(4)<<left<<_BayesianLowerBound[pos]<<","<<setprecision(4)<<right<<_BayesianUpperBound[pos]<<"]"<<"\t";
	139	out<<right<<setprecision(4)<<_BayesianSTD[pos];
	140	out<<right<<"\t"<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	141	}
	142	#endif
	143	}
	144	/********************************************************************************************
	145	*********************************************************************************************/
	146	void gainLoss::printAveAndStd(ostream& out) {
	147	out<<"#Average = "<<_ave<<endl;
	148	out<<"#Standard Deviation = "<<_std<<endl;
	149	}
	150	/********************************************************************************************
	151	computeAveAndStd
	152	*********************************************************************************************/
	153	void gainLoss::computeAveAndStd(){
	154	MDOUBLE sum = 0;
	155	MDOUBLE sumSqr=0.0;
	156	for (int i=0; i < _sc.seqLen(); ++i) {
	157	sum+=_rates[i];
	158	sumSqr+=(_rates[i]*_rates[i]);
	159	}
	160	_ave = sum/_sc.seqLen();
	161	_std= sumSqr-(sum*sum/_sc.seqLen());
	162	_std /= (_sc.seqLen()-1.0);
	163	_std = sqrt(_std);
	164	if (((_ave<1e-9)) && (_ave>(-(1e-9)))) _ave=0;
	165	if ((_std>(1-(1e-9))) && (_std< (1.0+(1e-9)))) _std=1.0;
	166	}
	167	/********************************************************************************************
	168	normalizeRates
	169	*********************************************************************************************/
	170	void gainLoss::normalizeRates() {
	171	int i=0;
	172	if (_std==0) errorMsg::reportError(" std = 0 in function normalizeRates",1);
	173	_normalizedRates.resize(_sc.seqLen(),0.0);
	174	for (i=0;i<_normalizedRates.size();++i) {
	175	_normalizedRates[i]=(_rates[i]-_ave)/_std;
	176	}
	177
	178	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::ebExp) {
	179	for (int k=0; k < _sc.seqLen(); ++k) {
	180	_BayesianUpperBound[k] = (_BayesianUpperBound[k] - _ave)/_std;
	181	_BayesianLowerBound[k] = (_BayesianLowerBound[k] - _ave)/_std;
	182	_BayesianSTD[k] = (_BayesianSTD[k])/_std;
	183	}
	184	}
	185	_ave = 0.0;
	186	_std = 1.0;
	187	}
	188
	189
	190
	191
	192
	193
	194	// gainLoss4site - function are now in gainLoss4siteGL.cpp
	195	/********************************************************************************************
	196	Computes the Empirical Bayesian expectation of the posterior
	197	estimators for GL values
	198	*********************************************************************************************/
	199	void gainLoss::computeEB_EXP_GL4Site()
	200	{
	201	time_t t1;
	202	time(&t1);
	203	time_t t2;
	204	LOGnOUT(4,<<"Starting gain4site and loss4site..."<<endl);
	205
	206	Vdouble gainV,stdGainV,lowerBoundGainV,upperBoundGainV;
	207	VVdouble posteriorsGainV;
	208	computeEB_EXP_siteSpecificGL(gainV, stdGainV, lowerBoundGainV, upperBoundGainV, posteriorsGainV, _sc, _spVVec, _tr, _gainDist,_lossDist,_gainDist);
	209
	210	ofstream outGain(gainLossOptions::_outFileGain4Site.c_str());
	211	printGainLossBayes(outGain,gainV,lowerBoundGainV,upperBoundGainV, posteriorsGainV, _gainDist);
	212	outGain.close();
	213
	214	Vdouble lossV,stdLossV,lowerBoundLossV,upperBoundLossV;
	215	VVdouble posteriorsLossV;
	216	computeEB_EXP_siteSpecificGL(lossV, stdLossV, lowerBoundLossV, upperBoundLossV, posteriorsLossV, _sc, _spVVec, _tr, _gainDist,_lossDist,_lossDist);
	217
	218	ofstream outLoss(gainLossOptions::_outFileLoss4Site.c_str());
	219	printGainLossBayes(outLoss,lossV,lowerBoundLossV,upperBoundLossV, posteriorsLossV, _lossDist);
	220	outLoss.close();
	221
	222	time(&t2);
	223	LOGnOUT(4,<<endl<<"computeEB_EXP_GL4Site RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	224	}
	225	/********************************************************************************************
	226	*********************************************************************************************/
	227	void gainLoss::printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist)
	228	{
	229	out.precision(7);
	230	out<<"#gainLoss Bayesian Results"<<endl;
	231	out<<"#Displayed on sequence "<<_refSeq->name()<<endl;
	232	out<<"========================================================================================================================================================="<<endl;
	233	out<<"POS\t"<<"1/0\t"<<"Rate\t"<<"[Confidence Interval]\t";
	234	int cat;
	235	for (cat = 0; cat <dist->categories(); ++cat)
	236	out<<dist->rates(cat)<<"\t";
	237	out<<endl;
	238
	239	int numOfCategories = dist->categories();
	240	for (int i=0;i<_sc.seqLen();i++){
	241	string aaStr = _refSeq->getAlphabet()->fromInt((*_refSeq)[i]);
	242	out<<i+1 <<"\t"<<aaStr<<"\t"<< rate2printV[i]<<"\t"<<"["<<lowerBoundV[i]<<","<<upperBoundV[i]<<"]\t";
	243	//if (lowerBoundV[i]>1) out <<"*"; //significance indicator: if entire confidence interval >1
	244	for (cat = 0; cat < numOfCategories; ++cat)
	245	out<<posteriorV[i][cat]<<"\t";
	246	out<<endl;
	247	}
	248	}
	249
	250	// comupreCounts - function are now in comupreCounts.cpp
	251	/********************************************************************************************
	252	*********************************************************************************************/
	253	void gainLoss::computePosteriorExpectationOfChangePerSite(){
	254	LOGnOUT(4,<<"Starting calculePosteriorExpectationOfChange..."<<endl);
	255	if(_LpostPerCat.size()==0){ // to fill _LpostPerCat - run computeRate4site()
	256	computeRate4site();
	257	}
	258	Vdouble expV01(_sc.seqLen(),0.0);
	259	Vdouble expV10(_sc.seqLen(),0.0);
	260	computePosteriorExpectationOfChangePerSite(expV01, expV10); //
	261
	262	// printOut the final results
	263	ofstream posteriorExpectationStream(gainLossOptions::_outFilePosteriorExpectationOfChange.c_str());
	264	posteriorExpectationStream<<"POS"<<"\t"<<"exp01"<<"\t"<<"exp10"<<endl;
	265	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	266	posteriorExpectationStream<<pos+1<<"\t"<<expV01[pos]<<"\t"<<expV10[pos]<<endl;
	267	}
	268	}
	269	/********************************************************************************************
	270	computePosteriorExpectationOfChangePerSite
	271	*********************************************************************************************/
	272	void gainLoss::computePosteriorExpectationOfChangePerSite(Vdouble& expV01, Vdouble& expV10){
	273	VVVVdouble posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y]
	274	VVVVdouble probChangesForBranch; // probChangesForBranch[pos][nodeID][x][y]
	275	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminals);
	276	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranch);
	277
	278
	279	// Per RateCategory -- All the computations is done while looping over rate categories
	280	for (int rateIndex=0 ; rateIndex< _sp->categories(); ++rateIndex)
	281	{
	282	tree copy_et = _tr;
	283	MDOUBLE rateVal = _sp->rates(rateIndex);
	284	MDOUBLE minimumRate = 0.0000001;
	285	MDOUBLE rate2multiply = max(rateVal,minimumRate);
	286	if(rateVal<minimumRate){
	287	LOGnOUT(4, <<" >>> NOTE: the rate category "<<rateVal<<" is too low for computePosteriorExpectationOfChangePerSite"<<endl); }
	288	copy_et.multipleAllBranchesByFactor(rate2multiply);
	289
	290	LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
	291	gainLossAlphabet alph; // needed for Alphabet size
	292	simulateJumps simPerRateCategory(copy_et,*_sp,&alph);
	293	simPerRateCategory.runSimulation(gainLossOptions::_numOfSimulationsForPotExp);
	294	LOGnOUT(4,<<"finished simulations"<<endl);
	295
	296	// Per POS
	297	for (int pos = 0; pos <_sc.seqLen(); ++pos)
	298	{
	299	LOG(6,<<"pos "<<pos+1<<endl);
	300	VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos;
	301	VVVdouble probChangesForBranchPerRateCategoryPerPos;
	302	computePosteriorExpectationOfChange cpecPerRateCategoryPerPos(copy_et,_sc,_sp); // Per POS,CAT
	303	cpecPerRateCategoryPerPos.computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos);
	304	MDOUBLE exp01 = cpecPerRateCategoryPerPos.computeExpectationOfChangeAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,0,1); // Per POS
	305	MDOUBLE exp10 = cpecPerRateCategoryPerPos.computeExpectationOfChangeAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,1,0); // Per POS
	306	expV01[pos]+=exp01*_LpostPerCat[rateIndex][pos];
	307	expV10[pos]+=exp10*_LpostPerCat[rateIndex][pos];
	308
	309	cpecPerRateCategoryPerPos.computePosteriorAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos);
	310
	311	// Store all information PerCat,PerPOS
	312	for(int i=0;i<posteriorsGivenTerminals[pos].size();++i){
	313	for(int j=0;j<posteriorsGivenTerminals[pos][i].size();++j){
	314	for(int k=0;k<posteriorsGivenTerminals[pos][i][j].size();++k){
	315	posteriorsGivenTerminals[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos];
	316	probChangesForBranch[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos];
	317	}
	318	}
	319	}
	320	}
	321	}
	322	// end of rateCategories loop
	323	//////////////////////////////////////////////////////////////////////////
	324
	325
	326	// ProbabilityPerPosPerBranch
	327	string gainLossProbabilityPerPosPerBranch = gainLossOptions::_outDir + "//" + "gainLossProbabilityPerPosPerBranch.txt";
	328	ofstream gainLossProbabilityPerPosPerBranchStream(gainLossProbabilityPerPosPerBranch.c_str());
	329	gainLossProbabilityPerPosPerBranchStream<<"G/L"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"probability"<<endl;
	330	string gainLossCountProbPerPos = gainLossOptions::_outDir + "//" + "gainLossCountProbPerPos.txt";
	331	ofstream gainLossCountProbPerPosStream(gainLossCountProbPerPos.c_str());
	332	gainLossCountProbPerPosStream<<"POS"<<"\t"<<"count01"<<"\t"<<"count10"<<endl;
	333	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	334	printGainLossProbabilityPerPosPerBranch(pos, gainLossOptions::_probCutOff, probChangesForBranch[pos],gainLossProbabilityPerPosPerBranchStream,gainLossCountProbPerPosStream);
	335	}
	336
	337	// ExpectationPerBranch
	338	VVVdouble posteriorsGivenTerminalsTotal;
	339	resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminalsTotal);
	340	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	341	for(int i=0;i<posteriorsGivenTerminals[pos].size();++i){
	342	for(int j=0;j<posteriorsGivenTerminals[pos][i].size();++j){
	343	for(int k=0;k<posteriorsGivenTerminals[pos][i][j].size();++k){
	344	posteriorsGivenTerminalsTotal[i][j][k] += posteriorsGivenTerminals[pos][i][j][k];
	345	}
	346	}
	347	}
	348	}
	349	string gainLossExpectationPerBranch = gainLossOptions::_outDir + "//" + "gainLossExpectationPerBranch.txt";
	350	ofstream gainLossExpectationPerBranchStream(gainLossExpectationPerBranch.c_str());
	351	printGainLossExpectationPerBranch(posteriorsGivenTerminalsTotal,gainLossExpectationPerBranchStream);
	352
	353
	354	// ProbabilityPerPosPerBranch - Print Trees
	355	Vstring Vnames;
	356	fillVnames(Vnames,_tr);
	357	if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){
	358	createDir(gainLossOptions::_outDir, "TreesWithProbabilityValuesAsBP");
	359	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	360	string strTreeNum = gainLossOptions::_outDir + "//" + "TreesWithProbabilityValuesAsBP"+ "//" + "probTree" + int2string(pos+1) + ".ph";
	361	ofstream tree_out(strTreeNum.c_str());
	362	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&probChangesForBranch[pos]);
	363	}
	364	}
	365	// ExpectationPerPosPerBranch - Print Trees
	366	if(gainLossOptions::_printTreesWithExpectationValuesAsBP){
	367	createDir(gainLossOptions::_outDir, "TreesWithExpectationValuesAsBP");
	368	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	369	string strTreeNum = gainLossOptions::_outDir + "//" + "TreesWithExpectationValuesAsBP" + "//" + "expTree" + int2string(pos+1) + ".ph";
	370	ofstream tree_out(strTreeNum.c_str());
	371	printTreeWithValuesAsBP(tree_out,_tr,Vnames,&posteriorsGivenTerminals[pos]);
	372	}
	373	}
	374	}
	375	/********************************************************************************************
	376	*********************************************************************************************/
	377	void gainLoss::printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out, ostream& outCount)
	378	{
	379	MDOUBLE count01 =0;
	380	MDOUBLE count10 =0;
	381	treeIterTopDownConst tIt(_tr);
	382	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	383	if (probChanges[mynode->id()][0][1] > probCutOff){
	384	out<<"gain"<<"\t"<<pos<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistance2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][0][1]<<endl;
	385	count01+= probChanges[mynode->id()][0][1];
	386	}
	387	//}
	388	//for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	389	if (probChanges[mynode->id()][1][0] > probCutOff){
	390	out<<"loss"<<"\t"<<pos<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistance2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][1][0]<<endl;
	391	count10+= probChanges[mynode->id()][1][0];
	392	}
	393	}
	394	outCount<<pos<<"\t"<<count01<<"\t"<<count10<<endl;
	395	}
	396	//////////////////////////////////////////////////////////////////////////
	397	void gainLoss::printGainLossExpectationPerBranch(VVVdouble& expectChanges, ostream& out)
	398	{
	399	treeIterTopDownConst tIt(_tr);
	400	out<<"# Gain"<<"\n";
	401	out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"expectation"<<endl;
	402	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	403	out<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistance2ROOT(mynode)<<"\t"<<expectChanges[mynode->id()][0][1]<<endl;
	404	}
	405	out<<"# Loss"<<"\n";
	406	out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"expectation"<<endl;
	407	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	408	out<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistance2ROOT(mynode)<<"\t"<<expectChanges[mynode->id()][1][0]<<endl;
	409	}
	410	}

+34

-0

programs/gainLoss/gainLoss.sln less more

	0	Microsoft Visual Studio Solution File, Format Version 11.00
	1	# Visual Studio 2010
	2	Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gainLoss", "gainLoss.vcxproj", "{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}"
	3	ProjectSection(ProjectDependencies) = postProject
	4	{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE} = {BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}
	5	EndProjectSection
	6	EndProject
	7	Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "phylogenyLib", "..\..\libs\phylogeny\phylogeny.vcxproj", "{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}"
	8	EndProject
	9	Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "alglib", "..\..\libs\alglib-3.4.0\alglib\alglib.vcxproj", "{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}"
	10	EndProject
	11	Global
	12	GlobalSection(SolutionConfigurationPlatforms) = preSolution
	13	Debug\|Win32 = Debug\|Win32
	14	Release\|Win32 = Release\|Win32
	15	EndGlobalSection
	16	GlobalSection(ProjectConfigurationPlatforms) = postSolution
	17	{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}.Debug\|Win32.ActiveCfg = Debug\|Win32
	18	{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}.Debug\|Win32.Build.0 = Debug\|Win32
	19	{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}.Release\|Win32.ActiveCfg = Release\|Win32
	20	{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}.Release\|Win32.Build.0 = Release\|Win32
	21	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Debug\|Win32.ActiveCfg = Debug\|Win32
	22	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Debug\|Win32.Build.0 = Debug\|Win32
	23	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Release\|Win32.ActiveCfg = Release\|Win32
	24	{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Release\|Win32.Build.0 = Release\|Win32
	25	{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}.Debug\|Win32.ActiveCfg = Debug\|Win32
	26	{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}.Debug\|Win32.Build.0 = Debug\|Win32
	27	{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}.Release\|Win32.ActiveCfg = Release\|Win32
	28	{BA8A8E1A-7D21-4070-B4B7-FFD6FFF651EE}.Release\|Win32.Build.0 = Release\|Win32
	29	EndGlobalSection
	30	GlobalSection(SolutionProperties) = preSolution
	31	HideSolutionNode = FALSE
	32	EndGlobalSection
	33	EndGlobal

+270

-0

programs/gainLoss/gainLoss.vcproj less more

	0	<?xml version="1.0" encoding="windows-1255"?>
	1	<VisualStudioProject
	2	ProjectType="Visual C++"
	3	Version="7.10"
	4	Name="gainLoss"
	5	ProjectGUID="{6D0F7BDF-4CCD-4F13-90A2-0A8D9C8C287C}"
	6	Keyword="Win32Proj">
	7	<Platforms>
	8	<Platform
	9	Name="Win32"/>
	10	</Platforms>
	11	<Configurations>
	12	<Configuration
	13	Name="Debug\|Win32"
	14	OutputDirectory="Debug"
	15	IntermediateDirectory="Debug"
	16	ConfigurationType="1"
	17	CharacterSet="2">
	18	<Tool
	19	Name="VCCLCompilerTool"
	20	Optimization="0"
	21	AdditionalIncludeDirectories="..\..\libs\phylogeny;C:\Program Files\boost\boost_1_47"
	22	PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
	23	MinimalRebuild="TRUE"
	24	BasicRuntimeChecks="3"
	25	RuntimeLibrary="5"
	26	RuntimeTypeInfo="TRUE"
	27	UsePrecompiledHeader="0"
	28	WarningLevel="3"
	29	Detect64BitPortabilityProblems="TRUE"
	30	DebugInformationFormat="4"/>
	31	<Tool
	32	Name="VCCustomBuildTool"/>
	33	<Tool
	34	Name="VCLinkerTool"
	35	OutputFile="$(OutDir)/gainLoss.exe"
	36	LinkIncremental="2"
	37	GenerateDebugInformation="TRUE"
	38	ProgramDatabaseFile="$(OutDir)/gainLoss.pdb"
	39	SubSystem="1"
	40	TargetMachine="1"/>
	41	<Tool
	42	Name="VCMIDLTool"/>
	43	<Tool
	44	Name="VCPostBuildEventTool"/>
	45	<Tool
	46	Name="VCPreBuildEventTool"/>
	47	<Tool
	48	Name="VCPreLinkEventTool"/>
	49	<Tool
	50	Name="VCResourceCompilerTool"/>
	51	<Tool
	52	Name="VCWebServiceProxyGeneratorTool"/>
	53	<Tool
	54	Name="VCXMLDataGeneratorTool"/>
	55	<Tool
	56	Name="VCWebDeploymentTool"/>
	57	<Tool
	58	Name="VCManagedWrapperGeneratorTool"/>
	59	<Tool
	60	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	61	</Configuration>
	62	<Configuration
	63	Name="Release\|Win32"
	64	OutputDirectory="Release"
	65	IntermediateDirectory="Release"
	66	ConfigurationType="1"
	67	CharacterSet="2">
	68	<Tool
	69	Name="VCCLCompilerTool"
	70	AdditionalIncludeDirectories="..\..\libs\phylogeny"
	71	PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
	72	RuntimeLibrary="4"
	73	RuntimeTypeInfo="TRUE"
	74	UsePrecompiledHeader="0"
	75	WarningLevel="3"
	76	Detect64BitPortabilityProblems="TRUE"
	77	DebugInformationFormat="3"/>
	78	<Tool
	79	Name="VCCustomBuildTool"/>
	80	<Tool
	81	Name="VCLinkerTool"
	82	OutputFile="$(OutDir)/gainLoss.exe"
	83	LinkIncremental="1"
	84	GenerateDebugInformation="TRUE"
	85	SubSystem="1"
	86	OptimizeReferences="2"
	87	EnableCOMDATFolding="2"
	88	TargetMachine="1"/>
	89	<Tool
	90	Name="VCMIDLTool"/>
	91	<Tool
	92	Name="VCPostBuildEventTool"/>
	93	<Tool
	94	Name="VCPreBuildEventTool"/>
	95	<Tool
	96	Name="VCPreLinkEventTool"/>
	97	<Tool
	98	Name="VCResourceCompilerTool"/>
	99	<Tool
	100	Name="VCWebServiceProxyGeneratorTool"/>
	101	<Tool
	102	Name="VCXMLDataGeneratorTool"/>
	103	<Tool
	104	Name="VCWebDeploymentTool"/>
	105	<Tool
	106	Name="VCManagedWrapperGeneratorTool"/>
	107	<Tool
	108	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	109	</Configuration>
	110	</Configurations>
	111	<References>
	112	</References>
	113	<Files>
	114	<Filter
	115	Name="Source Files"
	116	Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
	117	UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
	118	<File
	119	RelativePath=".\ancestralReconstructStates.cpp">
	120	</File>
	121	<File
	122	RelativePath=".\bblLS.cpp">
	123	</File>
	124	<File
	125	RelativePath=".\computeCorrelations.cpp">
	126	</File>
	127	<File
	128	RelativePath=".\computeCountsGL.cpp">
	129	</File>
	130	<File
	131	RelativePath=".\computePosteriorExpectationOfChange.cpp">
	132	</File>
	133	<File
	134	RelativePath=".\gainLoss.cpp">
	135	</File>
	136	<File
	137	RelativePath=".\gainLoss4site.cpp">
	138	</File>
	139	<File
	140	RelativePath=".\gainLossModel.cpp">
	141	</File>
	142	<File
	143	RelativePath=".\gainLossOptimizer.cpp">
	144	</File>
	145	<File
	146	RelativePath=".\gainLossOptions.cpp">
	147	</File>
	148	<File
	149	RelativePath=".\gainLossUtils.cpp">
	150	</File>
	151	<File
	152	RelativePath=".\optimizeGainLossModel.cpp">
	153	</File>
	154	<File
	155	RelativePath=".\optimizeGainLossModelVV.cpp">
	156	</File>
	157	<File
	158	RelativePath=".\rate4siteGL.cpp">
	159	</File>
	160	<File
	161	RelativePath=".\sankoffReconstructGL.cpp">
	162	</File>
	163	<File
	164	RelativePath=".\simulateChangesAlongTree.cpp">
	165	</File>
	166	<File
	167	RelativePath=".\simulateOnePos.cpp">
	168	</File>
	169	<File
	170	RelativePath=".\siteSpecificGL.cpp">
	171	</File>
	172	</Filter>
	173	<Filter
	174	Name="Header Files"
	175	Filter="h;hpp;hxx;hm;inl;inc;xsd"
	176	UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
	177	<File
	178	RelativePath=".\ancestralReconstructStates.h">
	179	</File>
	180	<File
	181	RelativePath=".\bblLS.h">
	182	</File>
	183	<File
	184	RelativePath=".\computeCorrelations.h">
	185	</File>
	186	<File
	187	RelativePath=".\computePosteriorExpectationOfChange.h">
	188	</File>
	189	<File
	190	RelativePath=".\gainLoss.h">
	191	</File>
	192	<File
	193	RelativePath=".\gainLoss4site.h">
	194	</File>
	195	<File
	196	RelativePath=".\gainLossModel.h">
	197	</File>
	198	<File
	199	RelativePath=".\gainLossOptimizer.h">
	200	</File>
	201	<File
	202	RelativePath=".\gainLossOptions.h">
	203	</File>
	204	<File
	205	RelativePath=".\gainLossUtils.h">
	206	</File>
	207	<File
	208	RelativePath=".\optimizeGainLossModel.h">
	209	</File>
	210	<File
	211	RelativePath=".\optimizeGainLossModelVV.h">
	212	</File>
	213	<File
	214	RelativePath=".\rate4siteGL.h">
	215	</File>
	216	<File
	217	RelativePath=".\sankoffReconstructGL.h">
	218	</File>
	219	<File
	220	RelativePath=".\simulateChangesAlongTree.h">
	221	</File>
	222	<File
	223	RelativePath=".\simulateOnePos.h">
	224	</File>
	225	<File
	226	RelativePath=".\siteSpecificGL.h">
	227	</File>
	228	</Filter>
	229	<Filter
	230	Name="Manuals"
	231	Filter="">
	232	<File
	233	RelativePath=".\classesInherit.costurs.clone.assignment.txt">
	234	</File>
	235	<File
	236	RelativePath=".\junk.txt">
	237	</File>
	238	<File
	239	RelativePath=".\likelihoodClasses.suffStat.computeUp.computeDown.txt">
	240	</File>
	241	<File
	242	RelativePath=".\LOG chages.txt">
	243	</File>
	244	<File
	245	RelativePath=".\LpostPerCat.PerSp.txt">
	246	</File>
	247	<File
	248	RelativePath=".\RootFreq.and.Reversibility.MixModels.txt">
	249	</File>
	250	<File
	251	RelativePath=".\setUnObs.txt">
	252	</File>
	253	<File
	254	RelativePath=".\stochasticProcessLayers.txt">
	255	</File>
	256	</Filter>
	257	<File
	258	RelativePath=".\test\Dist.Debug.params">
	259	</File>
	260	<File
	261	RelativePath=".\gainLossProject.cpp">
	262	</File>
	263	<File
	264	RelativePath=".\Makefile">
	265	</File>
	266	</Files>
	267	<Globals>
	268	</Globals>
	269	</VisualStudioProject>

+129

-0

programs/gainLoss/gainLoss4site.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLoss4site.h"
	17
	18	/********************************************************************************************
	19	gainLoss4site
	20	*********************************************************************************************/
	21	gainLoss4site::gainLoss4site(sequenceContainer& sc, tree& tr,
	22	vector<vector<stochasticProcess> > spVVec,distribution gainDist,distribution* lossDist,
	23	string& outDir, unObservableData* unObservableData_p, MDOUBLE alphaConf):
	24	_tr(tr),_spVVec(spVVec),_gainDist(gainDist),_lossDist(lossDist),_sc(sc),_outDir(outDir),_unObservableData_p(unObservableData_p),_alphaConf(alphaConf)
	25	{
	26	//init:
	27	_refSeq = &(_sc[0]);
	28	}
	29
	30	gainLoss4site& gainLoss4site::operator=(const gainLoss4site &other){
	31	if (this != &other) { // Check for self-assignment
	32	}
	33	return *this;
	34	}
	35
	36	/********************************************************************************************
	37	*********************************************************************************************/
	38	void gainLoss4site::computeGain4Site()
	39	{
	40	LOGnOUT (4,<<"perform computeGain4Site... while computing posteriorProb PerCategory PerPosition"<<endl);
	41
	42	initializeLpostPerSpPerCat();
	43	computeEB_EXP_siteSpecificGL(_gainV, _stdGainV, _lowerBoundGainV, _upperBoundGainV, _posteriorsGainV, _sc, _spVVec, _tr, _gainDist,_lossDist,_gainDist,
	44	_alphaConf,_postProbPerSpPerCatPerPos,_unObservableData_p);
	45	}
	46
	47	/********************************************************************************************
	48	*********************************************************************************************/
	49	void gainLoss4site::computeLoss4Site()
	50	{
	51	LOGnOUT (4,<<"perform computeLoss4Site... while computing posteriorProb PerCategory PerPosition"<<endl);
	52	initializeLpostPerSpPerCat();
	53	computeEB_EXP_siteSpecificGL(_lossV, _stdLossV, _lowerBoundLossV, _upperBoundLossV, _posteriorsLossV, _sc, _spVVec, _tr, _gainDist,_lossDist,_lossDist,
	54	_alphaConf,_postProbPerSpPerCatPerPos,_unObservableData_p);
	55	}
	56
	57	/********************************************************************************************
	58	*********************************************************************************************/
	59	void gainLoss4site::printGain4Site()
	60	{
	61	//ofstream outGain(gainLossOptions::_outFileGain4Site.c_str());
	62	string g4s = _outDir + "//" + "gain4site.txt";
	63	ofstream outGain(g4s.c_str());
	64	outGain.precision(PRECISION);
	65	printGainLossBayes(outGain,_gainV,_lowerBoundGainV,_upperBoundGainV, _posteriorsGainV, _gainDist,_spVVec[0][0]);
	66	outGain.close();
	67
	68	}
	69	/********************************************************************************************
	70	*********************************************************************************************/
	71	void gainLoss4site::printLoss4Site()
	72	{
	73	//ofstream outLoss(gainLossOptions::_outFileLoss4Site.c_str());
	74	string l4s = _outDir + "//" + "loss4site.txt";
	75	ofstream outLoss(l4s.c_str());
	76	outLoss.precision(PRECISION);
	77	printGainLossBayes(outLoss,_lossV,_lowerBoundLossV,_upperBoundLossV, _posteriorsLossV, _lossDist,_spVVec[0][0]);
	78	outLoss.close();
	79	}
	80
	81	/********************************************************************************************
	82	*********************************************************************************************/
	83	void gainLoss4site::printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist, const stochasticProcess* sp)
	84	{
	85	out.precision(7);
	86	out<<"# Empirical Bayesian Rates"<<endl;
	87	//out<<"#Displayed on sequence "<<_refSeq->name()<<endl;
	88	if(sp->categories() > 1){
	89	out<<"# each sp with overall rate distribution cat: ";
	90	for (int cat = 0; cat < sp->categories(); ++cat)
	91	out<<sp->rates(cat)<<" ";
	92	out<<endl;
	93	}
	94	out<<"# Rate in each gamma category: ";
	95	for (int cat = 0; cat <dist->categories(); ++cat)
	96	out<<"category "<<cat+1<<", Rate= "<<dist->rates(cat)<<" ";
	97	out<<endl;
	98	out<<"# Posterior probability for each category, and each position is given.\n";
	99
	100	//out<<"========================================================================================================================================================="<<endl;
	101	out<<"POS\t"<<"Rate\t";//<<"[Confidence Interval]\t";
	102	for (int cat = 0; cat <dist->categories(); ++cat)
	103	out<<"Categ "<<cat+1<<"\t";
	104	out<<endl;
	105	int numOfCategories = dist->categories();
	106	for (int i=0;i<_sc.seqLen();i++){
	107	//string aaStr = _refSeq->getAlphabet()->fromInt((*_refSeq)[i]);
	108	out<<i+1 /<<"\t"<<aaStr/<<"\t"<< rate2printV[i]<<"\t";
	109	//<<"["<<lowerBoundV[i]<<","<<upperBoundV[i]<<"]\t";
	110	//if (lowerBoundV[i]>1) out <<"*"; //significance indicator: if entire confidence interval >1
	111	for (int cat = 0; cat < numOfCategories; ++cat)
	112	out<<posteriorV[i][cat]<<"\t";
	113	out<<endl;
	114	}
	115	}
	116
	117	/********************************************************************************************
	118	*********************************************************************************************/
	119	void gainLoss4site::initializeLpostPerSpPerCat()
	120	{
	121	int numOfSPs = _gainDist->categories()*_lossDist->categories();
	122	int rateCategories = _spVVec[0][0]->categories();
	123	if(_postProbPerSpPerCatPerPos.size()==0){
	124	resizeVVV(numOfSPs,rateCategories,_sc.seqLen(),_postProbPerSpPerCatPerPos);
	125	}
	126	}
	127
	128

+84

-0

programs/gainLoss/gainLoss4site.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAIN_LOSS_4site
	19	#define ___GAIN_LOSS_4site
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "gainLoss.h"
	24	#include "siteSpecificRate.h"
	25	#include "siteSpecificGL.h"
	26
	27	/********************************************************************************************
	28	gainLoss4site
	29	*********************************************************************************************/
	30	class gainLoss4site{
	31	public:
	32	explicit gainLoss4site(sequenceContainer& sc, tree& tr, vector<vector<stochasticProcess> > spVVec,distribution gainDist,distribution* lossDist,
	33	string& outDir, unObservableData* unObservableData_p, MDOUBLE alphaConf= 0.05);
	34	gainLoss4site(const gainLoss4site& other) {*this = other;}
	35	gainLoss4site& operator=(const gainLoss4site &other);
	36	virtual ~gainLoss4site() {;}
	37
	38	void computeGain4Site();
	39	void computeLoss4Site();
	40	void printGain4Site();
	41	void printLoss4Site();
	42
	43	Vdouble get_gainV(){return _gainV;};
	44	Vdouble get_lossV(){return _lossV;};
	45
	46	Vdouble get_stdGainV(){return _stdGainV;};
	47	Vdouble get_stdLossV(){return _stdLossV;};
	48
	49	VVdouble get_posteriorsGainV(){return _posteriorsGainV;};
	50	VVdouble get_posteriorsLossV(){return _posteriorsLossV;};
	51	VVVdouble getLpostPerSpPerCat() {return _postProbPerSpPerCatPerPos;}
	52	void initializeLpostPerSpPerCat();
	53
	54
	55	protected:
	56	//func
	57	void printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist,const stochasticProcess* sp);
	58
	59	protected:
	60	vector<vector<stochasticProcess*> > _spVVec; //save stochasticProcess for each category
	61	distribution* _gainDist;
	62	distribution* _lossDist;
	63
	64	VVVdouble _postProbPerSpPerCatPerPos; // the posterior probability for each stochastic process for each Cat for each site
	65
	66
	67	tree _tr;
	68	sequenceContainer _sc;
	69	sequence* _refSeq; // the reference sequence
	70	string _outDir;
	71
	72	Vdouble _gainV,_stdGainV,_lowerBoundGainV,_upperBoundGainV;
	73	VVdouble _posteriorsGainV;
	74
	75	Vdouble _lossV,_stdLossV,_lowerBoundLossV,_upperBoundLossV;
	76	VVdouble _posteriorsLossV;
	77	MDOUBLE _alphaConf;
	78	unObservableData* _unObservableData_p; //
	79
	80	};
	81
	82
	83	#endif

+75

-0

programs/gainLoss/gainLossAlphabet.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLossAlphabet.h"
	17
	18	gainLossAlphabet::gainLossAlphabet() {}
	19
	20	int gainLossAlphabet::fromChar(const char s) const{
	21	switch (s) {
	22	case '0': return 0; break;
	23	case '1': return 1; break;
	24	case '-' : case'_' : return -1; break;
	25
	26	default:
	27	vector<string> err;
	28	err.push_back(" The gainLoss sequences contained the character: ");
	29	err[0]+=s;
	30	err.push_back(" gainLoss was not one of the following: ");
	31	err.push_back(" 0, 1");
	32	errorMsg::reportError(err);
	33	}// end of switch
	34	return -99; // never suppose to be here.
	35	}// end of function
	36
	37	vector<int> gainLossAlphabet::fromString(const string &str) const {
	38	vector<int> vec;
	39	for (int i=0;i<str.size();i++)
	40	vec.push_back(fromChar(str[i]));
	41	return vec;
	42	}
	43
	44	string gainLossAlphabet::fromInt(const int in_id) const{
	45	char res = 0;
	46	switch (in_id) {
	47	case 0 : res = '0' ; break;
	48	case 1 : res = '1' ; break;
	49	case -2 : res = '-'; break;
	50	default:
	51	vector<string> err;
	52	err.push_back("unable to print gainLoss_id. gainLossl_id was not one of the following: ");
	53	err.push_back("0,1,2");
	54	errorMsg::reportError(err);
	55	}//end of switch
	56	string vRes;
	57	vRes.append(1,res);
	58	return vRes;
	59	}// end of function
	60
	61	// There are no relations here.
	62	int gainLossAlphabet::relations(const int charInSeq, const int charToCheck) const{
	63	if (charInSeq == charToCheck)
	64	return 1;
	65	return 0;
	66	}
	67
	68	int gainLossAlphabet::fromChar(const string& str, const int pos) const{
	69	return fromChar(str[pos]);
	70	}
	71
	72
	73
	74

+43

-0

programs/gainLoss/gainLossAlphabet.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAIN_LOSS_ALPH
	19	#define ___GAIN_LOSS_ALPH
	20
	21	#include "alphabet.h"
	22	#include "errorMsg.h"
	23
	24	class gainLossAlphabet : public alphabet {
	25	public:
	26	explicit gainLossAlphabet();
	27	virtual ~gainLossAlphabet() {}
	28	virtual alphabet* clone() const { return new gainLossAlphabet(*this); }
	29	int unknown() const {return -2;}
	30	int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
	31	int size() const {return 2;} // presence or absence only
	32	int stringSize() const {return 1;} // one letter code.
	33	int relations(const int charInSeq, const int charToCheck) const;
	34	int fromChar(const string& str, const int pos) const;
	35	int fromChar(const char s) const;
	36	string fromInt(const int in_id) const;
	37	vector<int> fromString(const string& str) const;
	38	bool isSpecific(const int id) const {return (id>=0 && id < size());}
	39
	40	};
	41
	42	#endif

+305

-0

programs/gainLoss/gainLossModel.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLossModel.h"
	17
	18	/********************************************************************************************
	19	gainLossModel
	20	Note: All gainLossOptions parameter are sent
	21	to the c'tor as a preperation for the model to be part of the Lib.
	22	*********************************************************************************************/
	23	gainLossModel::gainLossModel(const MDOUBLE m1, const Vdouble freq, bool isRootFreqEQstationary, bool isReversible, bool isHGT_normal_Pij, bool isHGT_with_Q):
	24	_gain(m1),_freq(freq),_isRootFreqEQstationary(isRootFreqEQstationary),_isReversible(isReversible),_isHGT_normal_Pij(isHGT_normal_Pij),_isHGT_with_Q(isHGT_with_Q),_q2pt(NULL){
	25	if (freq.size() != alphabetSize())
	26	errorMsg::reportError("Error in gainLossModel, size of frequency vector must be as in alphabet");
	27	for(int i=0; i<freq.size(); ++i)
	28	if(freq[i]<0 \|\| freq[i]>1)
	29	errorMsg::reportError("Freq not within [0,1]\n");
	30	if(!_isHGT_with_Q){_gain = 0;}
	31	resizeMatrix(_Q,alphabetSize(),alphabetSize());
	32	updateQ(_isReversible);
	33	//setTheta(_freq[1]); // no Need
	34	if(_isRootFreqEQstationary) {
	35	setTheta(getMu1()/(getMu1()+getMu2()));
	36	}
	37	}
	38	/********************************************************************************************
	39	*********************************************************************************************/
	40	gainLossModel& gainLossModel::operator=(const gainLossModel &other){
	41	if (this != &other) { // Check for self-assignment
	42	if (_q2pt) delete _q2pt;
	43	if (other._q2pt != NULL)
	44	_q2pt = (q2pt*)(other._q2pt->clone());
	45	}
	46	_isReversible = other.isReversible();
	47	_isRootFreqEQstationary = other.isRootFreqEQstationary();
	48	_isHGT_normal_Pij = other.isHGT_normal_Pij();
	49	_isHGT_with_Q = other.isHGT_with_Q();
	50	_gain = other._gain;
	51	_freq = other._freq;
	52	_Q = other._Q;
	53	return *this;
	54	}
	55	/********************************************************************************************
	56	*********************************************************************************************/
	57	void gainLossModel::setMu1(const MDOUBLE val, bool isReversible) {
	58	if(_isHGT_with_Q) {_gain = val;}
	59	updateQ(isReversible);
	60	if(_isRootFreqEQstationary) {
	61	setTheta(getMu1()/(getMu1()+getMu2()));
	62	}
	63	//if(gainLossOptions::_isNormalizeQ) // part of update Q
	64	// normalizeQ();
	65	}
	66	/********************************************************************************************
	67	*********************************************************************************************/
	68	MDOUBLE gainLossModel::setTheta(const MDOUBLE val) {
	69	if(val<0 \|\| val>1)
	70	errorMsg::reportError("Freq not within [0,1]\n");
	71	_freq[1]=val;
	72	_freq[0]= 1-val;
	73	MDOUBLE normFactor = updateQ(_isReversible);
	74	return normFactor;
	75	}
	76
	77	/********************************************************************************************
	78	*********************************************************************************************/
	79	MDOUBLE gainLossModel::updateQ(bool isReversible){
	80	MDOUBLE normFactor=1;
	81	_Q[0][1] = _gain;
	82	_Q[0][0] = -_Q[0][1];
	83
	84	if (isReversible) {
	85	_Q[1][0] = _Q[0][1] * _freq[0] / _freq[1]; // m1*pi0/pi1
	86	_Q[1][1] = -_Q[1][0];
	87	}
	88	//else{
	89	// _Q[1][0] = 1; //To be overwritten by gainLossModelNonReversible
	90	// _Q[1][1] = -1; //To be overwritten by gainLossModelNonReversible
	91	//}
	92	if (gainLossOptions::_gainEQloss) {
	93	_Q[1][0] = _gain;
	94	_Q[1][1] = -_Q[1][0];
	95	}
	96	if (gainLossOptions::_gainLossRateAreFreq) {
	97	_Q[1][0] = 1 - _gain;
	98	_Q[1][1] = -_Q[1][0];
	99	}
	100
	101	for (int i=0; i<_Q.size();i++) {
	102	MDOUBLE sum = _Q[i][0]+_Q[i][1];
	103	if ((abs(sum)>err_allow_for_pijt_function()))
	104	errorMsg::reportError("Error in gainLossModel::updateQ, sum of row is not 0");
	105	}
	106	//if (isReversible){
	107	// if (!_q2pt)
	108	// _q2pt = new q2pt();
	109	// _q2pt->fillFromRateMatrix(_freq,_Q);
	110	//}
	111	if(gainLossOptions::_isNormalizeQ && !gainLossOptions::_gainLossDist && (_Q[1][0]>0)) //
	112	normFactor= normalizeQ();
	113	return normFactor;
	114	}
	115	/********************************************************************************************
	116	*********************************************************************************************/
	117	const MDOUBLE gainLossModel::freq(const int i) const {
	118	if (i >= _freq.size())
	119	errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector");
	120	return _freq[i];
	121	}
	122	/********************************************************************************************
	123	// normalize Q so that sum of changes = 1
	124	*********************************************************************************************/
	125	MDOUBLE gainLossModel::normalizeQ(){
	126	MDOUBLE norm_factor=0.0;
	127	for (int i=0;i<_Q.size();i++)
	128	norm_factor+=(_freq[i]*_Q[i][i]);
	129	MDOUBLE fac = -1.0/norm_factor;
	130	_Q = multiplyMatrixByScalar(_Q,fac);
	131	return fac;
	132	}
	133	/********************************************************************************************
	134	*********************************************************************************************/
	135	void gainLossModel::norm(const MDOUBLE scale)
	136	{
	137	for (int i=0; i < _Q.size(); ++i) {
	138	for (int j=0; j < _Q.size(); ++j) {
	139	_Q[i][j] *= scale;
	140	}
	141	}
	142	}
	143	/********************************************************************************************
	144	*********************************************************************************************/
	145	MDOUBLE gainLossModel::sumPijQij(){
	146	MDOUBLE sum=0.0;
	147	for (int i=0; i < _Q.size(); ++i) {
	148	sum -= (_Q[i][i])*_freq[i];
	149	}
	150	return sum;
	151	}
	152
	153	/********************************************************************************************
	154	Pij_t - Based on Analytic solution
	155	*********************************************************************************************/
	156	const MDOUBLE gainLossModel::Pij_t(const int i,const int j, const MDOUBLE d) const {
	157	MDOUBLE gain = getMu1();
	158	MDOUBLE loss = getMu2();
	159	MDOUBLE eigenvalue = -(gain + loss);
	160	bool withHGT = isHGT_normal_Pij();
	161
	162	MDOUBLE noHGTfactor = 0.0001;
	163
	164	VVdouble Pt;
	165	resizeMatrix(Pt,_Q.size(),_Q.size());
	166	int caseNum = i + j*2;
	167	switch (caseNum) {
	168	case 0 : Pt[0][0] = loss/(-eigenvalue) + exp(eigenvalued)(1 - loss/(-eigenvalue)); break;
	169	case 1 : Pt[1][0] = loss/(-eigenvalue) - exp(eigenvalued)(1 - gain/(-eigenvalue)); break;
	170	case 2 : if(withHGT)
	171	{ Pt[0][1] = gain/(-eigenvalue) - exp(eigenvalued)(1 - loss/(-eigenvalue));}
	172	else
	173	{ Pt[0][1] = (gain/(-eigenvalue) - exp(eigenvalued)(1 - loss/(-eigenvalue)))*noHGTfactor;} break;
	174	case 3 : Pt[1][1] = gain/(-eigenvalue) + exp(eigenvalued)(1 - gain/(-eigenvalue)); break;
	175	}
	176	MDOUBLE val = (Pt[i][j]);
	177	if (!pijt_is_prob_value(val)){
	178	string err = "Error in gainLossModelNonReversible::Pij_t, pijt <0 or >1. val=";
	179	err+=double2string(val);
	180	err+=" d=";
	181	err+=double2string(d);
	182	LOG(4,<<err<<endl); //errorMsg::reportError(err);
	183	}
	184	if(!(val>VERYSMALL))
	185	val = VERYSMALL;
	186	LOG(10,<<"for gain "<<gain<<" loss "<<loss<<" P"<<i<<j<<"("<<d<<") "<<val<<endl;)
	187	return val;
	188	}
	189
	190	/********************************************************************************************
	191	dPij_t - Based on Analytic solution
	192	*********************************************************************************************/
	193	const MDOUBLE gainLossModel::dPij_dt(const int i,const int j, const MDOUBLE d) const {
	194	MDOUBLE gain = getMu1();;
	195	MDOUBLE loss = getMu2();;
	196	MDOUBLE eigenvalue = -(gain + loss);
	197
	198	VVdouble Pt;
	199	resizeMatrix(Pt,_Q.size(),_Q.size());
	200	int caseNum = i + j*2;
	201	switch (caseNum) {
	202	case 0 : Pt[0][0] = exp(eigenvalued)(eigenvalue + loss); break;
	203	case 1 : Pt[1][0] = -(exp(eigenvalued)(eigenvalue + gain)); break;
	204	case 2 : Pt[0][1] = -(exp(eigenvalued)(eigenvalue + loss)); break;
	205	case 3 : Pt[1][1] = exp(eigenvalued)(eigenvalue + gain); break;
	206	}
	207	MDOUBLE val = (Pt[i][j]);
	208	//if (!pijt_is_prob_value(val)){
	209	// string err = "Error in gainLossModelNonReversible::dPij_t_dt, pijt <0 or >1. val=";
	210	// err+=double2string(val);
	211	// err+=" d=";
	212	// err+=double2string(d);
	213	// LOG(6,<<err<<endl); //errorMsg::reportError(err);
	214	//}
	215	return val;
	216	}
	217	/********************************************************************************************
	218	d2Pij_dt2 - Based on Analytic solution
	219	*********************************************************************************************/
	220	const MDOUBLE gainLossModel::d2Pij_dt2(const int i,const int j, const MDOUBLE d) const {
	221	MDOUBLE gain = getMu1();;
	222	MDOUBLE loss = getMu2();;
	223	MDOUBLE eigenvalue = -(gain + loss);
	224
	225	VVdouble Pt;
	226	resizeMatrix(Pt,_Q.size(),_Q.size());
	227	int caseNum = i + j*2;
	228	switch (caseNum) {
	229	case 0 : Pt[0][0] = exp(eigenvalued)(eigenvalue + loss)*eigenvalue; break;
	230	case 1 : Pt[1][0] = -(exp(eigenvalued)(eigenvalue + gain))*eigenvalue; break;
	231	case 2 : Pt[0][1] = -(exp(eigenvalued)(eigenvalue + loss))*eigenvalue; break;
	232	case 3 : Pt[1][1] = exp(eigenvalued)(eigenvalue + gain)*eigenvalue; break;
	233	}
	234	MDOUBLE val = (Pt[i][j]);
	235	//if (!pijt_is_prob_value(val)){
	236	// string err = "Error in gainLossModelNonReversible::d2Pij_t_dt2, pijt <0 or >1. val=";
	237	// err+=double2string(val);
	238	// LOG(6,<<err<<endl); //errorMsg::reportError(err);
	239	//}
	240	return val;
	241	}
	242
	243
	244
	245	/********************************************************************************************
	246	non reversible model
	247	updateQ
	248	*********************************************************************************************/
	249	//void gainLossModelNonReversible::updateQ(){
	250	// //gainLossModel::updateQ(false);
	251	// _Q[1][1] = -_loss;
	252	// _Q[1][0] = _loss;
	253	// //normalizeQ();
	254	//}
	255
	256
	257
	258
	259
	260	/********************************************************************************************
	261	Pij_t - converging series
	262	IMPORTANT NOTE: this function is VERY inefficient. It calculates all of Pt for every call of Pijt
	263	this is unimportant for a small dataset (one position) but pre-processing should be done for larger datasets:
	264
	265	SOLUTION: save the computed Pijt matrix each time it is called. In every call of Pij_t, check if a saved value exists
	266	*********************************************************************************************/
	267	//const MDOUBLE gainLossModelNonReversible::Pij_t(const int i,const int j, const MDOUBLE d) const {
	268	//
	269	// VVdoubleRep QdblRep;
	270	// resizeMatrix(QdblRep,_Q.size(),_Q.size());
	271	// for (int row=0;row<_Q.size();row++){
	272	// for (int col=0;col<_Q[row].size();col++)
	273	// QdblRep[row][col]=convert(_Q[row][col]);
	274	// }
	275	// VVdoubleRep Qt = multiplyMatrixByScalar(QdblRep,d);
	276	// VVdoubleRep unit;
	277	// unitMatrix(unit,_Q.size());
	278	// VVdoubleRep Pt = add(unit,Qt) ; // I + Qt
	279	// VVdoubleRep Qt_power = Qt;
	280	// doubleRep old_val = Pt[i][j];
	281	// doubleRep diff(1.0);
	282	// int n=2;
	283	// while ((diff>err_allow_for_pijt_function()) \|\| (!pijt_is_prob_value(convert(Pt[i][j])))){//(abs(old_val-new_val) > err_allow_for_pijt_function()){
	284	// old_val = Pt[i][j];
	285	// Qt_power = multiplyMatrixes(Qt_power,multiplyMatrixByScalar(Qt,1.0/n));
	286	// Pt= add(Pt,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n!
	287	//
	288	// diff = Pt[i][j]-old_val; // difference is measured by diff between P[0][0] vals (a little primitive...)
	289	// if (diff<0) diff=-diff;
	290	// n++;
	291	// if (n>200) {
	292	// string err = "Error in gainLossModelNonReversible::Pij_t, too many (>n=200) iterations for t = " + double2string(d);
	293	// cerr<<diff<<endl;
	294	// errorMsg::reportError(err);
	295	// }
	296	// }
	297	// MDOUBLE val = convert(Pt[i][j]);
	298	// if (!pijt_is_prob_value(val))
	299	// errorMsg::reportError("Error in gainLossModelNonReversible::Pij_t, pijt <0 or >1");
	300	// LOG(10,<<"for gain "<<getMu1()<<" loss "<<getMu2()<<" P"<<i<<j<<"("<<d<<") "<<val<<endl;)
	301	//
	302	// return val;
	303	//}
	304	//

+189

-0

programs/gainLoss/gainLossModel.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAIN_LOSS_MODEL
	19	#define ___GAIN_LOSS_MODEL
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "fromQtoPt.h"
	24	#include "errorMsg.h"
	25	#include "matrixUtils.h"
	26	#include "gainLossUtils.h"
	27	#include "gainLossOptions.h"
	28
	29	/********************************************************************************************
	30	Q is a matrix of the following form:
	31	(where 0 and 1 stand for absence or presence)
	32	for a reversible case,
	33	0 1
	34	0 -m1 m1
	35	1 m1pi0/pi1 -m1pi0/pi1
	36
	37	and without assuming reversibility,
	38	0 1
	39	0 -m1 m1(gain)
	40	1 m2(loss) -m2
	41
	42
	43	1. The gainLossModel class is derived from the general replacementModel class - it models the stochastic process with one param gain=loss
	44	2. Additionally we use the gainLossModelNonReversible class which is derived from gainLossModel class - we get the second param - gain!=loss
	45	*********************************************************************************************/
	46
	47	/********************************************************************************************
	48	gainLossModel
	49	*********************************************************************************************/
	50	class gainLossModel : public replacementModel {
	51	public:
	52	explicit gainLossModel(const MDOUBLE m1, const Vdouble freq, bool isRootFreqEQstationary, bool isReversible, bool isHGT_normal_Pij, bool _isHGT_with_Q);
	53	virtual replacementModel* clone() const {
	54	return new gainLossModel(*this);
	55	}
	56	gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;}
	57	virtual gainLossModel& operator=(const gainLossModel &other);
	58
	59	virtual ~gainLossModel() {if (_q2pt) delete _q2pt; }
	60	const int alphabetSize() const {return 2;} // assumes only absence or presence
	61	const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
	62	const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
	63	const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
	64	const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
	65	const MDOUBLE freq(const int i) const;
	66	void setMu1(const MDOUBLE val, bool isReversible);
	67	MDOUBLE setTheta(const MDOUBLE val);
	68	MDOUBLE getTheta() const {return _freq[1];}
	69
	70	bool isReversible() const {return _isReversible;}
	71	bool isRootFreqEQstationary() const {return _isRootFreqEQstationary;}
	72	bool isHGT_normal_Pij() const {return _isHGT_normal_Pij;}
	73	bool isHGT_with_Q() const {return _isHGT_with_Q;}
	74
	75	const VVdouble getQ() const {return _Q;}
	76	const MDOUBLE getMu1() const {return _Q[0][1];}
	77	const MDOUBLE getMu2() const {return _Q[1][0];}
	78
	79	void norm(const MDOUBLE scale);
	80	MDOUBLE sumPijQij();
	81
	82	//const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const{
	83	// return _q2pt->Pij_t(i,j,d);
	84	//}
	85	//const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
	86	// return _q2pt->dPij_dt(i,j,d);
	87	//}
	88	//const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
	89	// return _q2pt->d2Pij_dt2(i,j,d);
	90	//}
	91
	92
	93	protected:
	94	virtual MDOUBLE updateQ(bool isReversible);
	95	virtual MDOUBLE normalizeQ();
	96
	97	bool pijt_is_prob_value(MDOUBLE val) const {
	98	if ((abs(val)+err_allow_for_pijt_function()<0) \|\| (val>1+err_allow_for_pijt_function()))
	99	return false;
	100	else
	101	return true;
	102	}
	103	protected:
	104	Vdouble _freq;
	105	Vdouble _freqQ;
	106	MDOUBLE _rQ;
	107	MDOUBLE _gain; // _Q[0][1]
	108	VVdouble _Q;
	109	q2pt *_q2pt; // dont use q2p
	110	bool _isReversible;
	111	bool _isRootFreqEQstationary;
	112	bool _isHGT_normal_Pij;
	113	bool _isHGT_with_Q;
	114
	115	};
	116
	117
	118
	119	/********************************************************************************************
	120	gainLossModelNonReversible
	121	All the methods of this class are implemented in the header
	122	*********************************************************************************************/
	123	class gainLossModelNonReversible : public gainLossModel {
	124	public:
	125	//////////////////////////////////////////////////////////////////////////
	126	explicit gainLossModelNonReversible(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq,bool isRootFreqEQstationary, bool isHGT_normal_Pij, bool _isHGT_with_Q)
	127	:_loss(m2),gainLossModel(m1,freq,isRootFreqEQstationary,false,isHGT_normal_Pij,_isHGT_with_Q)
	128	{
	129	updateQ();
	130	if(_isRootFreqEQstationary) {
	131	setTheta(getMu1()/(getMu1()+getMu2()));
	132	}
	133	}
	134	//////////////////////////////////////////////////////////////////////////
	135	virtual replacementModel* clone() const {
	136	return new gainLossModelNonReversible(*this);
	137	}
	138	gainLossModelNonReversible(const gainLossModelNonReversible& other) : gainLossModel(other)
	139	{
	140	_loss = other._loss;
	141	}
	142	virtual ~gainLossModelNonReversible(){
	143	//cout<<"gainLossModelNonReversible Deleted\n";
	144	}
	145	//gainLossModelNonReversible& operator=(const gainLossModelNonReversible &other)
	146	//{
	147	// _loss = other._loss;
	148	// return *this;
	149	//}
	150
	151
	152	//////////////////////////////////////////////////////////////////////////
	153	void setMu2(const MDOUBLE val) {
	154	_loss = val;
	155	updateQ();
	156	if(_isRootFreqEQstationary) {
	157	setTheta(getMu1()/(getMu1()+getMu2()));
	158	}
	159	//if(gainLossOptions::_isNormalizeQ) // part of update Q
	160	// normalizeQ();
	161
	162	}
	163	//const MDOUBLE getMu2() const {return _loss;} // moved to gainLossModel
	164	//const VVdouble getQ() const {return _Q;} // moved to gainLossModel
	165
	166
	167	protected:
	168	//virtual void updateQ();
	169	//////////////////////////////////////////////////////////////////////////
	170	void updateQ(){
	171	//gainLossModel::updateQ(false);
	172	_Q[1][1] = -_loss;
	173	_Q[1][0] = _loss;
	174	if(gainLossOptions::_isNormalizeQ && !gainLossOptions::_gainLossDist && (_Q[1][0]>0))//?
	175	normalizeQ();
	176	}
	177
	178	//bool pijt_is_prob_value(MDOUBLE val) const { // moved to gainLossModel
	179	// if ((abs(val)+err_allow_for_pijt_function()<0) \|\| (val>1+err_allow_for_pijt_function()))
	180	// return false;
	181	// else
	182	// return true;
	183	//}
	184	private:
	185	MDOUBLE _loss; // _Q[1][0]
	186	};
	187
	188	#endif

+834

-0

programs/gainLoss/gainLossOptimizer.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLossOptimizer.h"
	17	#include "bblEMfixRoot.h"
	18	#include "bblEM.h"
	19	#include "bblLS.h"
	20
	21	/********************************************************************************************
	22	gainLossOptimizer
	23	*********************************************************************************************/
	24	gainLossOptimizer::gainLossOptimizer(tree& tr, stochasticProcess* sp, const sequenceContainer &sc,
	25	const MDOUBLE epsilonOptimization, const int numIterations,
	26	const MDOUBLE epsilonOptimizationModel, const int numIterationsModel,
	27	const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL,
	28	Vdouble* weights,
	29	unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove):
	30	_tr(tr),_sp(sp),_sc(sc),
	31	_epsilonOptimization(epsilonOptimization),_maxNumOfIterations(numIterations),
	32	_epsilonOptimizationModel(epsilonOptimizationModel),_maxNumOfIterationsModel(numIterationsModel),
	33	_epsilonOptimizationBBL(epsilonOptimizationBBL),_maxNumOfIterationsBBL(numIterationsBBL),
	34	_weightsUniqPatterns(weights),
	35	_unObservableData_p(unObservableData_p),_performOptimizationsBBL(performOptimizationsBBL),
	36	_isbblLSWhenbblEMdontImprove(isbblLSWhenbblEMdontImprove)
	37	{
	38	//gainLossOptions::distributionType rateDistributionType = getRateDistributionType(sp->distr());
	39	//_weights = gainLossOptions::_weights; // since - no weights are used over positions
	40	_isReversible = !dynamic_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel());
	41	_isSkipBblEM = false; // will change to T if like is not improved by BBL-EM
	42	_freq.resize(_sc.alphabetSize());
	43	optimizations();
	44	}
	45
	46
	47	/********************************************************************************************
	48	*********************************************************************************************/
	49	gainLossOptimizer::gainLossOptimizer(tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist,
	50	const sequenceContainer &sc,
	51	const MDOUBLE epsilonOptimization, const int numIterations,
	52	const MDOUBLE epsilonOptimizationModel, const int numIterationsModel,
	53	const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL,
	54	Vdouble* weights,
	55	unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove):
	56	_tr(tr),_spVVec(spVVec),_gainDist(gainDist),_lossDist(lossDist),
	57	_sc(sc),//_spSimple(spSimple), // ignore sent model, make new one
	58	_epsilonOptimization(epsilonOptimization),_maxNumOfIterations(numIterations),
	59	_epsilonOptimizationModel(epsilonOptimizationModel),_maxNumOfIterationsModel(numIterationsModel),
	60	_epsilonOptimizationBBL(epsilonOptimizationBBL),_maxNumOfIterationsBBL(numIterationsBBL),
	61	_weightsUniqPatterns(weights),
	62	_unObservableData_p(unObservableData_p),_performOptimizationsBBL(performOptimizationsBBL),
	63	_isbblLSWhenbblEMdontImprove(isbblLSWhenbblEMdontImprove)
	64	{
	65	//_sp = _spVVec[0][0]; //used for reference (Alpha and such)
	66	//_weights = gainLossOptions::_weights; // since - no weights are used over positions
	67	_spSimple = NULL;
	68	_isSkipBblEM = false; // will change to T if like is not improved by BBL-EM
	69	_freq.resize(_sc.alphabetSize());
	70	_bestGainBeta = 1;
	71	_bestLossBeta = 1;
	72	optimizationsSPvv();
	73	}
	74
	75	/********************************************************************************************
	76	optimizations
	77	*********************************************************************************************/
	78	void gainLossOptimizer::optimizations(){
	79	time_t t1;
	80	time(&t1);
	81	time_t t2;
	82
	83	LOGnOUT(4,<<"-------------------------------"<<endl
	84	<<"Starting optimizations: maxNumIterations="<<_maxNumOfIterations<<endl);
	85
	86	_bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	87	MDOUBLE currBestL=VERYSMALL;
	88	MDOUBLE previousL;
	89	bool noLikeImprovmentAtBBL = false; // if BBL did not produce a new tree, end. (no point in another iteration model+BBL)
	90
	91	bool isSkipParamsOptimization = gainLossOptions::_isSkipFirstParamsOptimization;
	92	LOGnOUT(3,<<endl<<"#########"<<" optimization starting epsilonCycle="<<_epsilonOptimization<<" maxNumIterations="<<_maxNumOfIterations<<endl);
	93	LOGnOUT(3,<<"start optimization with L= "<<_bestL<<endl);
	94	int iter;
	95	for (iter=1;iter<=_maxNumOfIterations;iter++){
	96	LOGnOUT(4,<<endl<<"------"<<" Model+BBL iter="<<iter<<endl);
	97	previousL = _bestL; // breaking out of loop when no (>epsilon) improvement is made by comparing to previousL
	98	// model optimization
	99	if(!isSkipParamsOptimization){
	100	currBestL = optimizeParameters();}
	101	else{
	102	LOGnOUT(4,<<"Optimize Params - Skipped"<<endl);}
	103
	104	if (currBestL>_bestL) {
	105	_bestL = currBestL;
	106	}
	107	else if(!isSkipParamsOptimization && currBestL<_bestL){
	108	LOGnOUT(4,<<" !!! Warning !!!: after model optimization likelihood went down"<< currBestL<<" "<<_bestL<<endl);
	109	}
	110	//_bestL = max(currBestL,_bestL);
	111
	112	isSkipParamsOptimization = false; // only first iteration skipped
	113	// BBL optimization
	114	if (gainLossOptions::_performOptimizationsBBL && _performOptimizationsBBL) // we use the && 2 enable optimizationsManyStarts not to perform BBL
	115	{
	116	//LOGnOUT(4,<<"Start BBL... with epsilonOptimizationBBL= "<<_epsilonOptimizationBBL<<endl);
	117	if(gainLossOptions::_performOptimizationsBBLOnlyOnce) // Next iteration - no BBL
	118	_performOptimizationsBBL = false;
	119	currBestL = optimizeBranchLengths(iter);
	120	if (currBestL>_bestL) {
	121	_bestL = currBestL;
	122	}
	123	else{
	124	noLikeImprovmentAtBBL = true;
	125	LOGnOUT(4,<<" !!! Warning !!!: after BBL likelihood did not improve"<< currBestL<<" "<<_bestL<<endl);
	126	}
	127	//_bestL = max(currBestL,_bestL);
	128	string treeINodes = gainLossOptions::_outDir + "//" + "TheTree.INodes.iter"+ int2string(iter) + ".ph";
	129	printTree (_tr, treeINodes);
	130	}
	131
	132	// ROOT optimization
	133	if (gainLossOptions::_performOptimizationsROOT)
	134	{
	135	currBestL = optimizeRoot();
	136	if (currBestL>_bestL) {
	137	_bestL = currBestL;
	138	}
	139	else{
	140	LOGnOUT(4,<<" !!! Warning !!!: after Root likelihood did not improve"<< currBestL<<" "<<_bestL<<endl);
	141	}
	142	//_bestL = max(currBestL,_bestL);
	143	}
	144	if ( (_bestL-previousL) < max(_epsilonOptimization, abs(_bestL/10000)) \|\| noLikeImprovmentAtBBL) // stop Opt for less than epsilon likelihood point
	145	{
	146	LOGnOUT(3,<<" OverAll optimization converged. Iter= "<<iter<<" Likelihood="<<_bestL<<endl);
	147	break;
	148	}
	149	if(gainLossOptions::_simulatedAnnealing){
	150	_epsilonOptimization = max(_epsilonOptimizationgainLossOptions::_simulatedAnnealingCoolingFactor,0.3gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	151	_epsilonOptimizationModel = max(_epsilonOptimizationModelgainLossOptions::_simulatedAnnealingCoolingFactor,0.1gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	152	_epsilonOptimizationBBL = max(_epsilonOptimizationBBLgainLossOptions::_simulatedAnnealingCoolingFactor,0.2gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	153	}
	154	}
	155	if (iter>_maxNumOfIterations)
	156	LOGnOUT(4,<<" Too many="<<iter-1<<" iterations in Model+BBL. Last optimized parameters are used."<<endl);
	157	time(&t2);
	158	LOGnOUT(4,<<"Optimization RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	159	}
	160	/********************************************************************************************
	161	optimizationsSPvv
	162	*********************************************************************************************/
	163	void gainLossOptimizer::optimizationsSPvv(){
	164	time_t t1;
	165	time(&t1);
	166	time_t t2;
	167
	168	LOGnOUT(4,<<"-------------------------------"<<endl
	169	<<"Starting optimizations: maxNumIterations="<<_maxNumOfIterations<<endl);
	170	_bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	171	MDOUBLE currBestL=VERYSMALL;
	172	MDOUBLE previousL;
	173	bool noLikeImprovmentAtBBL = false; // if BBL did not produce a new tree, end. (no point in another iteration model+BBL)
	174
	175	bool isSkipParamsOptimization = gainLossOptions::_isSkipFirstParamsOptimization;
	176	int iter;
	177	LOGnOUT(3,<<endl<<"#########"<<" optimization starting epsilonCycle="<<_epsilonOptimization<<" maxNumIterations="<<_maxNumOfIterations<<endl);
	178	LOGnOUT(3,<<"start optimization with L= "<<_bestL<<endl);
	179	for (iter=1;iter<=_maxNumOfIterations;iter++){
	180	LOGnOUT(4,<<endl<<"------"<<" Model+BBL iter="<<iter<<endl);
	181	previousL = _bestL; // breaking out of loop when no (>epsilon) improvement is made by comparing to previousL
	182	// model optimization
	183	if(!isSkipParamsOptimization){
	184	currBestL = optimizeParametersSPvv();}
	185	else{
	186	LOGnOUT(4,<<"Optimize Params - Skipped"<<endl);
	187	}
	188	if (currBestL>_bestL) {
	189	_bestL = currBestL;
	190	}
	191	else if(!isSkipParamsOptimization && currBestL<_bestL){
	192	LOGnOUT(4,<<" !!! Warning !!!: after model optimization likelihood went down"<< currBestL<<" "<<_bestL<<endl);
	193	}
	194	//_bestL = max(currBestL,_bestL);
	195	isSkipParamsOptimization = false; // only first iteration skipped
	196	// ROOT optimization
	197	if (gainLossOptions::_performOptimizationsROOT)
	198	{
	199	currBestL = optimizeRootSPvv();
	200	if (currBestL>_bestL) {
	201	_bestL = currBestL;
	202	}
	203	else{
	204	LOGnOUT(4,<<" !!! Warning !!!: after Root likelihood did not improve"<< currBestL<<" "<<_bestL<<endl);
	205	}
	206	//_bestL = max(currBestL,_bestL);
	207	}
	208	// BBL optimization
	209	if (gainLossOptions::_performOptimizationsBBL && _performOptimizationsBBL){
	210	//LOGnOUT(4,<<"Start BBL... with epsilonOptimizationBBL= "<<_epsilonOptimizationBBL<<endl);
	211	if(gainLossOptions::_performOptimizationsBBLOnlyOnce) // Next iteration - no BBL
	212	_performOptimizationsBBL = false;
	213	currBestL = optimizeBranchLengthsvv(iter);
	214	if (currBestL>_bestL) {
	215	_bestL = currBestL;
	216	}
	217	else{
	218	noLikeImprovmentAtBBL = true;
	219	LOGnOUT(4,<<" !!! Warning !!!: after BBL likelihood did not improve"<< currBestL<<" "<<_bestL<<endl);
	220	}
	221	//_bestL = max(currBestL,_bestL);
	222	string treeINodes = gainLossOptions::_outDir + "//" + "TheTree.INodes.iter"+ int2string(iter) + ".ph";
	223	printTree (_tr, treeINodes);
	224	}
	225	if ((_bestL-previousL) < max(_epsilonOptimization, abs(_bestL/10000)) \|\| noLikeImprovmentAtBBL ) // stop Opt for less than 2 likelihood point
	226	{
	227	LOGnOUT(3,<<" OverAll optimization converged. Iter= "<<iter<<" Likelihood="<<_bestL<<endl);
	228	break;
	229	}
	230	if(gainLossOptions::_simulatedAnnealing){
	231	_epsilonOptimization = max(_epsilonOptimizationgainLossOptions::_simulatedAnnealingCoolingFactor,0.3gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	232	_epsilonOptimizationModel = max(_epsilonOptimizationModelgainLossOptions::_simulatedAnnealingCoolingFactor,0.1gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	233	_epsilonOptimizationBBL = max(_epsilonOptimizationBBLgainLossOptions::_simulatedAnnealingCoolingFactor,0.2gainLossOptions::_simulatedAnnealingMinEpsilonFactor); // simulated annealing
	234	}
	235	}
	236	if (iter>_maxNumOfIterations)
	237	LOGnOUT(4,<<" Too many="<<iter-1<<" iterations in Model+BBL. Last optimized parameters are used."<<endl);
	238	time(&t2);
	239	LOGnOUT(4,<<"Optimization RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	240	}
	241
	242
	243	/********************************************************************************************
	244	optimizeParameters
	245	*********************************************************************************************/
	246	MDOUBLE gainLossOptimizer::optimizeParameters(){
	247	//LOGnOUT(4,<<"Starting optimizeParameters with: numOfIterations="<<_maxNumOfIterations<<" and epsilonOptimization="<<_epsilonOptimizationModel<<endl);
	248	time_t t1;
	249	time(&t1);
	250	time_t t2;
	251
	252	optimizeGainLossModel* opt = new optimizeGainLossModel(_tr,*_sp,_sc,
	253	_isReversible,_epsilonOptimizationModel,_maxNumOfIterationsModel,_weightsUniqPatterns,_unObservableData_p);
	254	//optimizeGainLossModel* opt = new optimizeGainLossModel(_tr,*_sp,_sc,
	255	// _isReversible,_epsilonOptimizationModel,_maxNumOfIterationsModel,_plogLforMissingData);
	256
	257
	258	LOGnOUT(4,<<"-------------------------------"<<endl<<"Model optimization over with: "<<endl);
	259	_bestGain=opt->getBestMu1();
	260	LOGnOUT(4,<<"Gain "<<_bestGain<<endl);
	261
	262	//if (!gainLossOptions::_isReversible) {
	263	//MDOUBLE bestM2=opt->getBestMu2();
	264	_bestLoss = static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->getMu2();
	265	LOGnOUT(4,<<"Loss "<<_bestLoss<<endl);
	266	LOGnOUT(4,<<" Gain/Loss ratio= "<< _bestGain/_bestLoss<<endl);
	267
	268	//}
	269	if(isAlphaOptimization((*_sp).distr())){
	270	_bestAlphaRate=opt->getBestAlpha();
	271	LOGnOUT(4,<<"AlphaRateRate "<<_bestAlphaRate<<endl);
	272	}
	273	if(isBetaOptimization((*_sp).distr())){
	274	_bestBetaRate=opt->getBestBeta();
	275	LOGnOUT(4,<<"BetaRate "<<_bestBetaRate<<endl);
	276	LOGnOUT(4,<<" Rate Expectancy = "<< _bestAlphaRate/_bestBetaRate<<endl);
	277	LOGnOUT(4,<<" Rate Standatd Deviation = "<< sqrt(_bestAlphaRate/(_bestAlphaRate*_bestBetaRate))<<endl);
	278	}
	279	if(isInvariantOptimization((*_sp).distr())){
	280	_bestRateProbInvariant =opt->getBestRateProbInvariant();
	281	LOGnOUT(4,<<"ProbInvariantRate "<<_bestRateProbInvariant<<endl);
	282	}
	283	if(dynamic_cast<mixtureDistribution>((_sp).distr())){
	284	printMixtureParams();
	285	}
	286	if (isThetaOptimization() && !gainLossOptions::_isRootFreqEQstationary) {
	287	_bestTheta=opt->getBestTheta();
	288	LOGnOUT(4,<<"Theta "<<_bestTheta<<endl);
	289	_freq[1] = _bestTheta;
	290	_freq[0] = 1-_freq[1];
	291	}
	292	else{
	293	_freq[1] = _bestGain/(_bestGain+_bestLoss);
	294	_freq[0] = 1-_freq[1];
	295	}
	296	MDOUBLE bestL = opt->getBestL();
	297
	298	MDOUBLE currentlogL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	299	if(!DEQUAL(currentlogL,bestL)){ //
	300	LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-bestL <<"\n");
	301	}
	302	LOGnOUT(4,<<"updated likelihood (after optimizeParameters)= "<<bestL<<endl);
	303	time(&t2);
	304	LOGnOUT(4,<<"Model optimization RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	305	if(opt) delete opt;
	306	return bestL;
	307	}
	308	/********************************************************************************************
	309	optimizeParametersSPvv
	310	*********************************************************************************************/
	311	MDOUBLE gainLossOptimizer::optimizeParametersSPvv(){
	312	//LOGnOUT(4,<<"Starting optimizeParametersSPvv with: numOfIterations="<<_maxNumOfIterationsModel<<" and epsilonOptimization="<<_epsilonOptimizationModel<<endl);
	313	time_t t1;
	314	time(&t1);
	315	time_t t2;
	316
	317	optimizeGainLossModelVV* opt = new optimizeGainLossModelVV(_tr,_spVVec,_sc,_gainDist,_lossDist,
	318	gainLossOptions::_isReversible,_epsilonOptimizationModel,_maxNumOfIterationsModel,_weightsUniqPatterns,_unObservableData_p);
	319	LOGnOUT(4,<<"-------------------------------"<<endl<<"Model optimization over with: "<<endl);
	320
	321	_bestGainAlpha=opt->getBestGainAlpha();
	322	LOGnOUT(4,<<"AlphaGain "<<_bestGainAlpha<<endl);
	323	if(isBetaOptimization(_gainDist)){
	324	_bestGainBeta=opt->getBestGainBeta();
	325	LOGnOUT(4,<<"BetaGain "<<_bestGainBeta<<endl);
	326	_gainExp = _bestGainAlpha/_bestGainBeta;
	327	_gainSTD = sqrt(_bestGainAlpha/(_bestGainBeta*_bestGainBeta));
	328	LOGnOUT(4,<<" Gain Expectation = "<< rateExpectation(_gainDist)<<endl);
	329	LOGnOUT(4,<<" Gain Expectancy = "<< _gainExp<<endl);
	330	LOGnOUT(4,<<" Gain Standard Deviation = "<<_gainSTD<<endl);
	331	}
	332	if(isInvariantOptimization(_gainDist)){
	333	_bestGainProbInvariant=opt->getBestGainProbInvariant();
	334	LOGnOUT(4,<<"ProbInvariantGain "<<_bestGainProbInvariant<<endl);
	335	}
	336	if (!gainLossOptions::_isReversible) {
	337	_bestLossAlpha=opt->getBestLossAlpha();
	338	LOGnOUT(4,<<"AlphaLoss "<<_bestLossAlpha<<endl);
	339	if(isBetaOptimization(_lossDist)){
	340	_bestLossBeta=opt->getBestLossBeta();
	341	_lossExp = _bestLossAlpha/_bestLossBeta;
	342	_lossSTD = sqrt(_bestLossAlpha/(_bestLossBeta*_bestLossBeta));
	343	LOGnOUT(4,<<"BetaLoss "<<_bestLossBeta<<endl);
	344	LOGnOUT(4,<<" Loss Expectation = "<< rateExpectation(_lossDist)<<endl);
	345	LOGnOUT(4,<<" Loss Expectancy = "<< _lossExp<<endl);
	346	LOGnOUT(4,<<" Loss Standard Deviation = "<<_lossSTD<<endl);
	347	LOGnOUT(4,<<" Gain/Loss Expectancy ratio= "<< (_bestGainAlpha/_bestGainBeta)/(_bestLossAlpha/_bestLossBeta)<<endl);
	348	LOGnOUT(4,<<" Expectancy(Gain)/Expectancy(Loss) by computation = "<< computeExpOfGainByExpOfLossRatio(_gainDist, _lossDist)<<endl);
	349	}
	350	if(isInvariantOptimization(_lossDist)){
	351	_bestLossProbInvariant=opt->getBestLossProbInvariant();
	352	LOGnOUT(4,<<"ProbInvariantLoss "<<_bestLossProbInvariant<<endl);
	353	}
	354	}
	355	if(isAlphaOptimization((*_spVVec[0][0]).distr())){
	356	MDOUBLE bestAlpha=opt->getBestRateAlpha();
	357	LOGnOUT(4,<<"AlphaRate "<<bestAlpha<<endl);
	358	}
	359	if(isInvariantOptimization((*_spVVec[0][0]).distr())){
	360	MDOUBLE bestRateProbInvariant =opt->getBestRateProbInvariant();
	361	LOGnOUT(4,<<"ProbInvariantRate "<<bestRateProbInvariant<<endl);
	362	}
	363	if (isThetaOptimization() && !gainLossOptions::_isRootFreqEQstationary) {
	364	_bestTheta=opt->getBestTheta();
	365	LOGnOUT(4,<<"Theta "<<_bestTheta<<endl);
	366	_freq[1] = _bestTheta;
	367	_freq[0] = 1-_freq[1];
	368	}
	369	else{
	370	_freq[1] = _gainExp/(_gainExp+_lossExp);
	371	_freq[0] = 1-_freq[1];
	372	}
	373	MDOUBLE bestL = opt->getBestL();
	374
	375	//if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	376	MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	377	if(!DEQUAL(currentlogL,bestL)){ //DEQUAL(currentlogL,bestL)
	378	LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-bestL <<"\n");
	379	}
	380	LOGnOUT(4,<<"updated likelihood (after optimizeParameters)= "<<bestL<<endl);
	381	time(&t2);
	382	LOGnOUT(4,<<"Model optimization RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	383	if(opt) delete opt;
	384	// update spSimple based on latest est parameters - deleted at the end
	385	if(_spSimple) delete _spSimple;
	386	_spSimple = startStochasticProcessSimpleGamma(_gainExp, _lossExp, _freq);
	387	return bestL;
	388	}
	389
	390	/********************************************************************************************
	391	optimizeRoot
	392	*********************************************************************************************/
	393	MDOUBLE gainLossOptimizer::optimizeRootSPvv()
	394	{
	395	time_t t1;
	396	time(&t1);
	397	time_t t2;
	398
	399	MDOUBLE oldL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	400	MDOUBLE newL = VERYSMALL;
	401	tree tempTree = _tr;
	402	LOGnOUT(4,<<"*** Starting optimizeRoot="<<oldL<<endl);
	403
	404	treeIterDownTopConst tIt(tempTree);
	405	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	406	if(mynode->isLeaf())
	407	continue;
	408
	409	tempTree.rootAt(mynode);
	410	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tempTree,_spVVec,_gainDist,_lossDist);
	411	MDOUBLE newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tempTree,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	412	if((newL>oldL+_epsilonOptimizationBBL*gainLossOptions::_epsilonForReRootFactor)){ // only for substantial improvement the tree will be re-rooted
	413	_tr = tempTree;
	414	LOGnOUT(4,<<"tree rooted at "<<_tr.getRoot()->name()<<" with lL="<<newL<<endl);
	415	LOGnOUT(4,<<"sons of root are "<<_tr.getRoot()->getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<<endl);
	416	}
	417	else{
	418	if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	419	}
	420
	421	}
	422	LOGnOUT(4,<<"*** After optimizeRoot="<<max(newL,oldL)<<endl);
	423	time(&t2);
	424	LOGnOUT(4,<<"optimizeRoot RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	425	return newL;
	426	}
	427	/********************************************************************************************
	428	optimizeRoot
	429	*********************************************************************************************/
	430	MDOUBLE gainLossOptimizer::optimizeRoot()
	431	{
	432	time_t t1;
	433	time(&t1);
	434	time_t t2;
	435
	436	MDOUBLE oldL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	437	MDOUBLE newL = VERYSMALL;
	438	tree tempTree = _tr;
	439	LOGnOUT(4,<<"*** Starting optimizeRoot= "<<oldL<<endl);
	440
	441	treeIterDownTopConst tIt(tempTree);
	442	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	443	if(mynode->isLeaf())
	444	continue;
	445	tempTree.rootAt(mynode);
	446
	447	if(_unObservableData_p){
	448	_unObservableData_p->setLforMissingData(tempTree,_sp);
	449	}
	450	MDOUBLE newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tempTree,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	451	if((newL>oldL+_epsilonOptimizationBBL*gainLossOptions::_epsilonForReRootFactor))// only for substantial improvement the tree will be re-rooted
	452	{
	453	_tr = tempTree;
	454	LOGnOUT(4,<<"tree re-rooted at "<<_tr.getRoot()->name()<<" with lL="<<newL<<endl);
	455	LOGnOUT(4,<<"sons of root are "<<_tr.getRoot()->getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<<endl);
	456	}
	457	else{
	458	if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_sp); // go back to original tree value
	459	}
	460
	461	}
	462	LOGnOUT(4,<<"*** After optimizeRoot= "<<max(newL,oldL)<<endl);
	463	time(&t2);
	464	LOGnOUT(4,<<"optimizeRoot RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	465	return newL;
	466	}
	467
	468
	469	/********************************************************************************************
	470	optimizeBranchLengths
	471	*********************************************************************************************/
	472	MDOUBLE gainLossOptimizer::optimizeBranchLengths(const int outerIter)
	473	{
	474	time_t t1;
	475	time(&t1);
	476	time_t t2;
	477	MDOUBLE tollForPairwiseDist=0.01; // the BBL default, epsilon per branch (brent's value)
	478	MDOUBLE bblEMfactor = 10;
	479
	480	int numberOfBranchs = _tr.getNodesNum();
	481	MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/5; // (is 1.5) for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	482	epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	483	MDOUBLE epsilonOptimizationBBLIter = _epsilonOptimizationBBL*epsilonOptimizationIterFactor/bblEMfactor; // The next iteration epsilon, multiply per-branch value
	484
	485	MDOUBLE oldL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	486	MDOUBLE newLnoUnObservableDataCorrection = VERYSMALL;
	487	MDOUBLE newL = VERYSMALL;
	488	tree oldTree = _tr;
	489	bool isFixedRoot = !_isReversible && !gainLossOptions::_isRootFreqEQstationary;
	490
	491	MDOUBLE minLikeImprovmentForNoLS = 5.0;
	492	MDOUBLE minLikeImprovmentForNoSkip = 2.0;
	493
	494	if(gainLossOptions::_isBblLS){
	495	if (gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel) {
	496	// start with BBL-EM and additional iteration of Line-Search optimizeBranches
	497	MDOUBLE oldLnoUnObservableDataCorrection = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,NULL);
	498	LOGnOUT(4,<<"*** Prior BBL-EM... followed by addtional iteration of Line-Search "<<"\t"<<oldL<<endl);
	499	if(isFixedRoot){
	500	LOGnOUT(4,<<"*** Start Fix Root BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	501	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	502	bblEMfixRoot bblEM1(_tr, _sc, _sp, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	503	,NULL,NULL); // optional &oldLnoUnObservableDataCorrection
	504	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	505	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	506	if(_unObservableData_p)
	507	_unObservableData_p->setLforMissingData(_tr,_sp);
	508	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	509	}
	510	else{
	511	LOGnOUT(4,<<"*** Start BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	512	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	513	// Note: likelihood does not improve with iterations compared to the likelihood under correction for UnObs, hence NULL
	514	bblEM bblEM1(_tr, _sc, _sp, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	515	,NULL,NULL); // optional &oldLnoUnObservableDataCorrection
	516	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	517	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	518	if(_unObservableData_p)
	519	_unObservableData_p->setLforMissingData(_tr,_sp);
	520	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	521	}
	522
	523	bblLS bbl;
	524	MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	525	newL = bbl.optimizeBranches(_tr,_sp,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL, 1 ,bestL);
	526	if(newL<oldL){
	527	_tr = oldTree;
	528	LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	529	}
	530	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	531	}
	532	// only Line-Search optimizeBranches
	533	else{
	534	bblLS bbl;
	535	MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	536	newL = bbl.optimizeBranches(_tr,_sp,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,_maxNumOfIterationsBBL,bestL);
	537	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	538	}
	539	}
	540	else{
	541	if(!_isSkipBblEM){
	542	MDOUBLE oldLnoUnObservableDataCorrection = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,NULL);
	543	if(isFixedRoot){
	544	LOGnOUT(4,<<"*** Start Fix Root BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	545	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	546	bblEMfixRoot bblEM1(_tr, _sc, _sp, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	547	,NULL,NULL); // optional &oldLnoUnObservableDataCorrection
	548	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	549	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	550	if(_unObservableData_p)
	551	_unObservableData_p->setLforMissingData(_tr,_sp);
	552	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	553	}
	554	else{
	555	LOGnOUT(4,<<"*** Start BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	556	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	557	// Note: likelihood does not improve with iterations compared to the likelihood under correction for UnObs, hence NULL
	558	bblEM bblEM1(_tr, _sc, _sp, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	559	,NULL,NULL); // optional &oldLnoUnObservableDataCorrection
	560	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	561	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	562	if(_unObservableData_p)
	563	_unObservableData_p->setLforMissingData(_tr,_sp);
	564	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	565	}
	566	}
	567	// if include LS when BBL-EM fails
	568	if((newL-oldL < max(minLikeImprovmentForNoLS, abs(newL/10000)) ) && _isbblLSWhenbblEMdontImprove){ // Do LS if less than 5 likelihood points were gained
	569	LOGnOUT(4,<<" Only "<< newL-oldL<<" improvement with BBL-EM -> Perform BBL-LS one iteration"<<endl);
	570	if(gainLossOptions::_isSkipBblEMWhenbblEMdontImprove && (newL-oldL < minLikeImprovmentForNoSkip)){
	571	LOGnOUT(4,<<"Since no improvement (less than "<<minLikeImprovmentForNoSkip<<"), BBL-EM will be skipped next iteration, go directly to LS "<<endl);
	572	_isSkipBblEM = true; // once BBL-EM is not improving Like, next time - skip
	573	}
	574	_tr = oldTree;
	575	if(_unObservableData_p)
	576	_unObservableData_p->setLforMissingData(_tr,_sp);
	577	bblLS bbl;
	578	MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	579	newL = bbl.optimizeBranches(_tr,_sp,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL, 1 ,bestL);
	580	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	581	}
	582	}
	583
	584	LOGnOUT(4,<<"*** After Branch Lengths Opt. returned Likelihood="<<"\t"<<newL<<endl);
	585	if((newL<oldL)){
	586	_tr = oldTree;
	587	if(_unObservableData_p)
	588	_unObservableData_p->setLforMissingData(_tr,_sp);
	589	newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	590	LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	591	}
	592
	593	MDOUBLE postL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p);
	594	if( !DEQUAL(newL,postL) ){
	595	LOGnOUT(3,<<"*ERROR*: Diff returned L, and re-calculated L"<<" "<<newL<<" "<<postL<<" "<<postL-newL<<endl);
	596	}
	597	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	598	time(&t2);
	599	LOGnOUT(4,<<"Branch Lengths RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	600	return newL;
	601	}
	602
	603
	604
	605	/********************************************************************************************
	606	optimizeBranchLengthsvv - new
	607	*********************************************************************************************/
	608	MDOUBLE gainLossOptimizer::optimizeBranchLengthsvv(const int outerIter)
	609	{
	610	time_t t1;
	611	time(&t1);
	612	time_t t2;
	613	MDOUBLE tollForPairwiseDist=0.01; // the BBL default, epsilon per branch (brent's value)
	614	MDOUBLE bblEMfactor = 10;
	615
	616	int numberOfBranchs = _tr.getNodesNum();
	617	MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/5; // (is 1.5) for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	618	epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	619	MDOUBLE epsilonOptimizationBBLIter = _epsilonOptimizationBBL*epsilonOptimizationIterFactor/bblEMfactor; // The next iteration epsilon, multiply per-branch value
	620	MDOUBLE oldL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	621
	622	MDOUBLE newLnoUnObservableDataCorrection = VERYSMALL;
	623	MDOUBLE newL = VERYSMALL;
	624	tree oldTree = _tr;
	625	//bool isFixedRoot = !_isReversible && !gainLossOptions::_isRootFreqEQstationary;
	626
	627	MDOUBLE minLikeImprovmentForNoLS = 5.0;
	628	MDOUBLE minLikeImprovmentForNoSkip = 2.0;
	629
	630	if(gainLossOptions::_isBblLS){
	631	if (gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel) {
	632	// start with BBL-EM and additional iteration of Line-Search optimizeBranches
	633	MDOUBLE oldLnoUnObservableDataCorrection = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,NULL);
	634	LOGnOUT(4,<<"*** Prior BBL-EM... followed by addtional iteration of Line-Search "<<"\t"<<oldL<<endl);
	635	//if(isFixedRoot){
	636	// LOGnOUT(4,<<"*** spSimple - Fix Root BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	637	// LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	638	// bblEMfixRoot bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	639	// ,NULL,&oldLnoUnObservableDataCorrection);
	640	// newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	641	// if(_unObservableData_p)
	642	// _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	643	// newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	644	//}
	645	//else{
	646	LOGnOUT(4,<<"*** _spSimple BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	647	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	648	// Note: likelihood does not improve with iterations compared to the likelihood under correction for UnObs, hence NULL
	649	bblEM bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	650	,NULL,NULL); // optional &oldLnoUnObservableDataCorrection
	651	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	652	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	653	if(_unObservableData_p)
	654	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	655	newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	656	//}
	657	bblLS bbl;
	658	MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent
	659	newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,_maxNumOfIterationsBBL,bestL);
	660	if(newL<oldL){
	661	_tr = oldTree;
	662	LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	663	}
	664	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	665	}
	666	// only Line-Search optimizeBranches
	667	else{
	668	bblLS bbl;
	669	MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent
	670	newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,_maxNumOfIterationsBBL,bestL);
	671	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	672	}
	673	}
	674	else{
	675	if(!_isSkipBblEM){
	676	MDOUBLE oldLnoUnObservableDataCorrection = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,NULL);
	677	//if(isFixedRoot){
	678	// LOGnOUT(4,<<"*** _spSimple Fix Root BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	679	// LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	680	// bblEMfixRoot bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	681	// ,NULL,&oldLnoUnObservableDataCorrection);
	682	// newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	683	// if(_unObservableData_p)
	684	// _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	685	// newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	686	//}
	687	//else{
	688	LOGnOUT(4,<<"*** _spSimple BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	689	LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	690	// Note: likelihood does not improve with iterations compared to the likelihood under correction for UnObs, hence NULL
	691	bblEM bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist
	692	,NULL,NULL); // optional - likelihood &oldLnoUnObservableDataCorrection
	693	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	694	newLnoUnObservableDataCorrection = bblEM1.getTreeLikelihood();
	695	if(_unObservableData_p)
	696	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	697	newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	698	//}
	699	}
	700	// if include LS when BBL-EM fails
	701
	702	if((newL-oldL < max(minLikeImprovmentForNoLS, abs(newL/10000)) ) && _isbblLSWhenbblEMdontImprove){ // Do LS if less than 5 likelihood points were gained
	703	LOGnOUT(4,<<" Only "<< newL-oldL<<" improvement with BBL-EM -> Perform BBL-LS one iteration"<<endl);
	704	if(gainLossOptions::_isSkipBblEMWhenbblEMdontImprove && (newL-oldL < minLikeImprovmentForNoSkip)){
	705	LOGnOUT(4,<<"Since no improvement (less than "<<minLikeImprovmentForNoSkip<<"), BBL-EM will be skipped next iteration, go directly to LS "<<endl);
	706	_isSkipBblEM = true; // once BBL-EM is not improving Like, next time - skip
	707	}
	708	_tr = oldTree;
	709	if(_unObservableData_p)
	710	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	711	bblLS bbl;
	712	MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent
	713	newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,1,bestL);
	714	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	715	}
	716	}
	717	LOGnOUT(4,<<"*** After Branch Lengths Opt. returned Likelihood="<<"\t"<<newL<<endl);
	718	if((newL<oldL)){
	719	_tr = oldTree;
	720	if(_unObservableData_p)
	721	_unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	722	newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	723	LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	724	}
	725	MDOUBLE postL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	726	if( !DEQUAL(newL,postL) ){
	727	LOGnOUT(3,<<"*ERROR*: Diff returned L, and re-calculated L"<<" "<<newL<<" "<<postL<<" "<<postL-newL<<endl);
	728	}
	729	LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <<endl);
	730	time(&t2);
	731	LOGnOUT(4,<<"Branch Lengths RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	732	return newL;
	733	}
	734
	735
	736
	737	/********************************************************************************************
	738	optimizeBranchLengthsvv
	739	*********************************************************************************************/
	740	//MDOUBLE gainLossOptimizer::optimizeBranchLengthsvv(const int outerIter)
	741	//{
	742	// time_t t1;
	743	// time(&t1);
	744	// time_t t2;
	745	// MDOUBLE tollForPairwiseDist=0.01; // the BBL default, epsilon per branch (brent's value)
	746	// MDOUBLE bblEMfactor = 10;
	747	//
	748	// int numberOfBranchs = _tr.getNodesNum();
	749	// MDOUBLE epsilonOptimizationIterFactor = numberOfBranchs/5; // (is 1.5) for 100 branches (~50 species) the epsilon for the entire iter is 50 times the one for branch
	750	// epsilonOptimizationIterFactor = max(5.0,epsilonOptimizationIterFactor);
	751	// MDOUBLE epsilonOptimizationBBLIter = _epsilonOptimizationBBL*epsilonOptimizationIterFactor/bblEMfactor; // The next iteration epsilon, multiply per-branch value
	752	// MDOUBLE oldL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	753	//
	754	// MDOUBLE newL = VERYSMALL;
	755	// tree oldTree = _tr;
	756	// bool isFixedRoot = !_isReversible && !gainLossOptions::_isRootFreqEQstationary;
	757	//
	758	// if(gainLossOptions::_isBblLS){
	759	// if (gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel) // not implemented to work well
	760	// {
	761	// LOGnOUT(4,<<"*** Prior BBL-EM under stationarity assumption and one Sp\n WARN: this is not working well..."<<"\t"<<oldL<<endl);
	762	// LOGnOUT(4,<<" BBL-EM: tollForPairwiseDist="<<tollForPairwiseDist<<" and epsilonOptimizationBBLIter="<<epsilonOptimizationBBLIter<<endl);
	763	// if(isFixedRoot){
	764	// LOGnOUT(4,<<"*** Start Fix Root BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	765	// bblEMfixRoot bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor) , epsilonOptimizationBBLIter,tollForPairwiseDist,_unObservableData_p,&oldL);
	766	// newL = bblEM1.getTreeLikelihood();
	767	// }
	768	// else{
	769	// LOGnOUT(4,<<"*** Start BBL-EM Optimization with Likelihood="<<"\t"<<oldL<<endl);
	770	// bblEM bblEM1(_tr, _sc, _spSimple, NULL, (int)(_maxNumOfIterationsBBLbblEMfactor), epsilonOptimizationBBLIter,tollForPairwiseDist,_unObservableData_p,&oldL);
	771	// newL = bblEM1.getTreeLikelihood();
	772	// }
	773	// LOGnOUT(4,<<"*** After BBL-EM Likelihood (with wrong model)"<<"\t"<<newL<<endl);
	774	// if(newL<oldL){
	775	// _tr = oldTree;
	776	// LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	777	// }
	778	// }
	779	// bblLS bbl;
	780	// MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent
	781	// newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,_maxNumOfIterationsBBL,bestL);
	782	// }
	783	// else{
	784	// LOGnOUT(4,<<"!!!WARNING!!!: BBL-EM is not implemented for spVVec and is not performed."<<endl);
	785	// }
	786	//
	787	// LOGnOUT(4,<<"*** After Branch Lengths Opt. returned Likelihood="<<"\t"<<newL<<endl);
	788	// if((newL<oldL)){
	789	// _tr = oldTree;
	790	// LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<<endl);
	791	// }
	792	// if(_unObservableData_p)
	793	// _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	794	// MDOUBLE postL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);
	795	//
	796	// if( !DEQUAL(newL,postL) ){
	797	// LOGnOUT(3,<<"*ERROR*: Diff returned L, and re-calculated L"<<" "<<newL<<" "<<postL<<" "<<postL-newL<<endl);
	798	// }
	799	//
	800	// time(&t2);
	801	// LOGnOUT(4,<<"Branch Lengths RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl<<endl);
	802	// return newL;
	803	//}
	804
	805	/********************************************************************************************
	806	*********************************************************************************************/
	807	void gainLossOptimizer::printMixtureParams()
	808	{
	809	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_sp->distr());
	810	for (int k = 0; k < pMixture->getComponentsNum(); ++k)
	811	{
	812	LOGnOUT(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
	813	}
	814	}
	815	/********************************************************************************************
	816	*********************************************************************************************/
	817	gainLossOptions::distributionType getRateDistributionType(distribution* dist)
	818	{
	819	gainLossOptions::distributionType res;
	820	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)){
	821	res = gainLossOptions::GENERAL_GAMMA_PLUS_INV;
	822	}
	823	else if (dynamic_cast<generalGammaDistributionFixedCategories*>(dist)){
	824	res = gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES;
	825	}
	826	else if (dynamic_cast<generalGammaDistribution*>(dist)){
	827	res = gainLossOptions::GENERAL_GAMMA;
	828	}
	829	else{
	830	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType");
	831	}
	832	return res;
	833	}

+145

-0

programs/gainLoss/gainLossOptimizer.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAIN_LOSS_OPTIMIZER
	19	#define ___GAIN_LOSS_OPTIMIZER
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "gainLoss.h"
	24	#include "gainLossOptions.h"
	25	#include "mixtureDistribution.h"
	26	#include "unObservableData.h"
	27	/********************************************************************************************
	28	The optimization flow:
	29	*note: the gainLossOptimizer changes "in place" (byRef)
	30	*note: the C_evalParam makes a copy
	31
	32	gainLoss (-> startOptimizations -> optimizationsManyStarts[/optimizationsVVManyStarts] )
	33	gainLossOptimizer//overloaded for spVVec (-> optimizations[/optimizationsSPvv]
	34	-> optimizeBranchLengths[/optimizeBranchLengthsSpvv]
	35	-> optimizeParameters [/optimizeParametersSPvv])
	36	optimizeGainLossModel (->brent)
	37	C_evalParam (->setParam)
	38	likelihoodComputation
	39
	40	*********************************************************************************************/
	41
	42	/********************************************************************************************
	43	gainLossOptimizer
	44	*********************************************************************************************/
	45	class gainLossOptimizer
	46	{
	47
	48	public:
	49	explicit gainLossOptimizer(tree& tr, stochasticProcess* sp, const sequenceContainer &sc,
	50	const MDOUBLE epsilonOptimization, const int numIterations,
	51	const MDOUBLE epsilonOptimizationModel, const int numIterationsModel,
	52	const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL,
	53	Vdouble * weights,
	54	unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove);
	55
	56	explicit gainLossOptimizer(tree& tr, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist,
	57	const sequenceContainer &sc,
	58	const MDOUBLE epsilonOptimization, const int numIterations,
	59	const MDOUBLE epsilonOptimizationModel, const int numIterationsModel,
	60	const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL,
	61	Vdouble * weights,
	62	unObservableData* _unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove);
	63
	64
	65	virtual ~gainLossOptimizer(){;}
	66
	67	MDOUBLE getBestL(){return _bestL;}
	68	tree getOptTree(){return _tr;};
	69	gainLossOptions::distributionType getRateDistributionType(distribution* dist);
	70
	71
	72	protected:
	73	//func
	74	//void initMissingDataInfo();
	75	void optimizations();
	76	void optimizationsSPvv();
	77
	78	MDOUBLE optimizeParameters();
	79	MDOUBLE optimizeParametersSPvv();
	80
	81	MDOUBLE optimizeBranchLengths(const int outerIter);
	82	MDOUBLE optimizeBranchLengthsvv(const int outerIter);
	83
	84	MDOUBLE optimizeRoot();
	85	MDOUBLE optimizeRootSPvv();
	86
	87	void printMixtureParams();
	88
	89	protected:
	90	//members
	91	MDOUBLE _bestL;
	92	MDOUBLE _epsilonOptimization;
	93	int _maxNumOfIterations;
	94	MDOUBLE _epsilonOptimizationModel;
	95	int _maxNumOfIterationsModel;
	96	MDOUBLE _epsilonOptimizationBBL;
	97	int _maxNumOfIterationsBBL;
	98	////MDOUBLE _logLforMissingData;
	99	//MDOUBLE* _plogLforMissingData;
	100	//Vdouble* _pLforMissingDataPerCat; // used foreach rate category
	101	unObservableData* _unObservableData_p;
	102
	103	Vdouble* _weightsUniqPatterns;
	104
	105	bool _performOptimizationsBBL;
	106	bool _isbblLSWhenbblEMdontImprove;
	107
	108	stochasticProcess *_sp;
	109	MDOUBLE _bestGain;
	110	MDOUBLE _bestLoss;
	111	MDOUBLE _bestAlphaRate;
	112	MDOUBLE _bestBetaRate;
	113	MDOUBLE _bestRateProbInvariant;
	114	stochasticProcess *_spSimple;
	115	MDOUBLE _bestTheta;
	116	Vdouble _freq;
	117	MDOUBLE _bestGainAlpha;
	118	MDOUBLE _bestGainBeta;
	119	MDOUBLE _bestGainProbInvariant;
	120
	121	MDOUBLE _bestLossAlpha;
	122	MDOUBLE _bestLossBeta;
	123	MDOUBLE _bestLossProbInvariant;
	124
	125	MDOUBLE _gainExp;
	126	MDOUBLE _lossExp;
	127
	128	MDOUBLE _gainSTD;
	129	MDOUBLE _lossSTD;
	130
	131	bool _isReversible;
	132	bool _isSkipBblEM;
	133	tree _tr;
	134	sequenceContainer _sc;
	135
	136	vector<vector<stochasticProcess*> > _spVVec; //save stochasticProcess for each category
	137	distribution* _gainDist;
	138	distribution* _lossDist;
	139	gainLossOptions::distributionType _rateDistributionType;
	140
	141	};
	142
	143
	144	#endif

+2423

-0

programs/gainLoss/gainLossOptions.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	/********************************************************************************************
	17	gainLossOptions - a class that contains all the parameters for the gainLossProjest as static
	18	use the 'Parameters' class to read info from txt file.
	19	initDefault. (+Parameters::addParameter)
	20	getParamsFromFile. ->with alterations of defults for consistancy
	21	verifyConsistParams.
	22	*********************************************************************************************/
	23	#include "gainLossOptions.h"
	24	#include "errorMsg.h"
	25	#include "someUtil.h"
	26	#include "Parameters.h"
	27	#include <iostream>
	28	#include <cmath>
	29
	30	using namespace std;
	31
	32	// recognize all the static members defined at .h
	33	int gainLossOptions::_alphabet_size;
	34	string gainLossOptions::_seqFile;
	35	string gainLossOptions::_treeFile;
	36	string gainLossOptions::_treeFileOrig; // used for branchDiff calc. functionality
	37
	38	string gainLossOptions::_rootAt;
	39	string gainLossOptions::_logFile;
	40	int gainLossOptions::_logValue;
	41	string gainLossOptions::_referenceSeq;
	42	string gainLossOptions::_outDir;
	43	string gainLossOptions::_treeOutFile;
	44	//string gainLossOptions::_outFile;
	45	//string gainLossOptions::_outFileNotNormalize;
	46	//string gainLossOptions::_outFileGain4Site;
	47	//string gainLossOptions::_outFileLoss4Site;
	48	//string gainLossOptions::_outFileLikeofPos;
	49	//string gainLossOptions::_outFilePosteriorExpectationOfChange;
	50	//gainLossOptions::discretizationType gainLossOptions::_discretizationType;
	51	gainLossOptions::treeSearchAlgType gainLossOptions::_treeSearchAlg;
	52	gainLossOptions::gammmaMixtureOptimizerAlgType gainLossOptions::_gammmaMixtureOptimizerAlg;
	53	gainLossOptions::distributionType gainLossOptions::_gainDistributionType;
	54	gainLossOptions::distributionType gainLossOptions::_lossDistributionType;
	55	gainLossOptions::distributionType gainLossOptions::_rateDistributionType;
	56	gainLossOptions::rateEstimationMethodType gainLossOptions::_rateEstimationMethod;
	57	gainLossOptions::characterFreqEvalType gainLossOptions::_characterFreqEval;
	58	gainLossOptions::discretizationType gainLossOptions::_rateDiscretizationType;
	59	MDOUBLE gainLossOptions::_userGainLossRatio;
	60	bool gainLossOptions::_keepUserGainLossRatio;
	61	MDOUBLE gainLossOptions::_userAlphaGain;
	62	MDOUBLE gainLossOptions::_userBetaGain;
	63	MDOUBLE gainLossOptions::_userProbInvariantGain;
	64	MDOUBLE gainLossOptions::_userAlphaLoss;
	65	MDOUBLE gainLossOptions::_userBetaLoss;
	66	MDOUBLE gainLossOptions::_userProbInvariantLoss;
	67	MDOUBLE gainLossOptions::_userProbInvariantRate;
	68	MDOUBLE gainLossOptions::_userRateInvariantVal;
	69	MDOUBLE gainLossOptions::_userAlphaRate;
	70	MDOUBLE gainLossOptions::_userBetaRate;
	71	MDOUBLE gainLossOptions::_userGain;
	72	MDOUBLE gainLossOptions::_userLoss;
	73	MDOUBLE gainLossOptions::_userTheta;
	74	MDOUBLE gainLossOptions::_userAlphaGainMax;
	75	MDOUBLE gainLossOptions::_userBetaGainMax;
	76	MDOUBLE gainLossOptions::_userProbInvariantGainMax;
	77	MDOUBLE gainLossOptions::_userAlphaLossMax;
	78	MDOUBLE gainLossOptions::_userBetaLossMax;
	79	MDOUBLE gainLossOptions::_userProbInvariantLossMax;
	80	MDOUBLE gainLossOptions::_userProbInvariantRateMax;
	81	MDOUBLE gainLossOptions::_userAlphaRateMax;
	82	MDOUBLE gainLossOptions::_userBetaRateMax;
	83	MDOUBLE gainLossOptions::_userGainMax;
	84	MDOUBLE gainLossOptions::_userLossMax;
	85	MDOUBLE gainLossOptions::_userThetaMax;
	86	MDOUBLE gainLossOptions::_userAlphaGainMin;
	87	MDOUBLE gainLossOptions::_userBetaGainMin;
	88	MDOUBLE gainLossOptions::_userProbInvariantGainMin;
	89	MDOUBLE gainLossOptions::_userAlphaLossMin;
	90	MDOUBLE gainLossOptions::_userBetaLossMin;
	91	MDOUBLE gainLossOptions::_userProbInvariantLossMin;
	92	MDOUBLE gainLossOptions::_userProbInvariantRateMin;
	93	MDOUBLE gainLossOptions::_userAlphaRateMin;
	94	MDOUBLE gainLossOptions::_userBetaRateMin;
	95	MDOUBLE gainLossOptions::_userGainMin;
	96	MDOUBLE gainLossOptions::_userLossMin;
	97	MDOUBLE gainLossOptions::_userThetaMin;
	98
	99	MDOUBLE gainLossOptions::_probCutOffPrintEvent;
	100	bool gainLossOptions::_isFewCutOffCounts;
	101	MDOUBLE gainLossOptions::_probCutOffCounts;
	102
	103	int gainLossOptions::_numberOfGainCategories;
	104	int gainLossOptions::_numberOfLossCategories;
	105	int gainLossOptions::_numberOfRateCategories;
	106	int gainLossOptions::_numberOfRateComponents;
	107	int gainLossOptions::_maxNumOfIterations;
	108	int gainLossOptions::_maxNumOfIterationsModel;
	109	int gainLossOptions::_maxNumOfIterationsBBL;
	110	int gainLossOptions::_maxNumOfIterationsManyStarts;
	111	int gainLossOptions::_numberOfRandPointsInOptimization;
	112	int gainLossOptions::_numberOfRandStartPoints;
	113
	114	int gainLossOptions::_numOfSimulationsForPotExp;
	115
	116	gainLossOptions::optimizationLevel gainLossOptions::_optimizationLevel;
	117	MDOUBLE gainLossOptions::_epsilonOptimizationModel;
	118	MDOUBLE gainLossOptions::_epsilonOptimizationBBL;
	119
	120	MDOUBLE gainLossOptions::_epsilonOptimizationIterationCycleManyStarts;
	121	MDOUBLE gainLossOptions::_epsilonFactor_Model;
	122	MDOUBLE gainLossOptions::_epsilonFactor_BBL;
	123	MDOUBLE gainLossOptions::_numIterationsFactor_Model;
	124	MDOUBLE gainLossOptions::_numIterationsFactor_BBL;
	125
	126	MDOUBLE gainLossOptions::_epsilonOptimizationIterationCycle;
	127	bool gainLossOptions::_gainLossDist;
	128	bool gainLossOptions::_calculateRate4site;
	129	bool gainLossOptions::_calculeGainLoss4site;
	130	MDOUBLE gainLossOptions::_likelihoodLandscapeIncrement;
	131	bool gainLossOptions::_printLikelihoodLandscape;
	132	bool gainLossOptions::_printLikelihoodLandscapeAlphaRate;
	133	bool gainLossOptions::_printLikelihoodLandscapeGainLoss;
	134	bool gainLossOptions::_printLikelihoodLandscapeTheta;
	135	bool gainLossOptions::_optAlphaInIteration;
	136	bool gainLossOptions::_optBBL_LS_InIteration;
	137	bool gainLossOptions::_optBBL_EM_InIteration;
	138	bool gainLossOptions::_printP11forgain;
	139	bool gainLossOptions::_printTree;
	140	bool gainLossOptions::_printSeq;
	141	bool gainLossOptions::_printPij_t;
	142	bool gainLossOptions::_printLofPos;
	143	bool gainLossOptions::_printLofPosBothModels;
	144	bool gainLossOptions::_performOptimizations;
	145	bool gainLossOptions::_correctOptimizationEpsilon;
	146	bool gainLossOptions::_performOptimizationsBBL;
	147	bool gainLossOptions::_performOptimizationsBBLOnlyOnce;
	148
	149	bool gainLossOptions::_isBblLS;
	150	bool gainLossOptions::_isbblLSWhenbblEMdontImprove;
	151	bool gainLossOptions::_isSkipBblEMWhenbblEMdontImprove;
	152
	153	bool gainLossOptions::_isInitGainLossByEmpiricalFreq;
	154	bool gainLossOptions::_isBBLEMwithSimpleSpBeforeFullOptimization;
	155	bool gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately;
	156	bool gainLossOptions::_isOptimizeInvariantCategoryProb;
	157
	158	bool gainLossOptions::_isUpdateOnlyGainBetaForRatio;
	159	bool gainLossOptions::_isComputeLikelihoodDuringInit;
	160
	161	bool gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel;
	162	bool gainLossOptions::_isBblForceFactorCorrection;
	163	MDOUBLE gainLossOptions::_BblFactorCorrection;
	164
	165	bool gainLossOptions::_isSkipFirstParamsOptimization;
	166	bool gainLossOptions::_isOptimizeParamsWithLogMinMax;
	167
	168	bool gainLossOptions::_isMultipleAllBranchesByFactorAtStart;
	169	bool gainLossOptions::_isNormalizeAtStart;
	170
	171	bool gainLossOptions::_performOptimizationsROOT;
	172	bool gainLossOptions::_performOptimizationsBBLManyStarts;
	173	bool gainLossOptions::_simulatedAnnealing;
	174	MDOUBLE gainLossOptions::_simulatedAnnealingMinEpsilonFactor;
	175	MDOUBLE gainLossOptions::_simulatedAnnealingCoolingFactor;
	176	bool gainLossOptions::_performOptimizationsManyStarts;
	177	bool gainLossOptions::_gainLossDistPlusInvariant;
	178	bool gainLossOptions::_isHGT_normal_Pij;
	179	bool gainLossOptions::_isHGT_with_Q;
	180	bool gainLossOptions::_initParamsAtRandPoints;
	181	bool gainLossOptions::_initParamsAtRandPointsInOptimization;
	182	bool gainLossOptions::_calculePosteriorExpectationOfChange;
	183	bool gainLossOptions::_simulatePosteriorExpectationOfChange;
	184	bool gainLossOptions::_isOnlySimulateSeq;
	185
	186	bool gainLossOptions::_modelOptimizationSimPostExp;
	187	bool gainLossOptions::_BBLOptimizationSimPostExp;
	188	bool gainLossOptions::_initParamsAtRandPointsInSimPostExp;
	189	bool gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos;
	190	bool gainLossOptions::_isFlatTreeBeforOpt;
	191	bool gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp;
	192	MDOUBLE gainLossOptions::_noiseLevelInGammaSimulation;
	193	bool gainLossOptions::_isTheataFromObservedFreq;
	194	bool gainLossOptions::_isRootFreqEQstationaryInSimulations;
	195	bool gainLossOptions::_isMatrixGainLossFromRatioInSimulations;
	196	bool gainLossOptions::_isFlatSpBeforeOpt;
	197	MDOUBLE gainLossOptions::_epsilonOptForPostExpSimFactor;
	198	MDOUBLE gainLossOptions::_numOfIterationsOptForPostExpSimFactor;
	199	MDOUBLE gainLossOptions::_loss2gainRatioToSim;
	200
	201	bool gainLossOptions::_printAncestralReconstructPosterior;
	202	bool gainLossOptions::_saveProbChanges_PosNodeXY;
	203	bool gainLossOptions::_isComputeDistanceFromRootForRecent;
	204
	205	bool gainLossOptions::_printTreesWithProbabilityValuesAsBP;
	206	bool gainLossOptions::_printTreesWithExpectationValuesAsBP;
	207	bool gainLossOptions::_calculateAncestralReconstruct;
	208	bool gainLossOptions::_printTreesWithAncestralReconstructAsBP;
	209	bool gainLossOptions::_printAncestralReconstructFullData;
	210
	211	bool gainLossOptions::_printDEBUGinfo;
	212	bool gainLossOptions::_printPropExpOfChangeFullData;
	213	bool gainLossOptions::_printExpPerPosPerBranchMatrix;
	214	bool gainLossOptions::_printComputedCorrelations;
	215	bool gainLossOptions::_performParametricBootstapCorrelation;
	216	bool gainLossOptions::_usePosSpecificSimulations;
	217	bool gainLossOptions::_isConsiderNegativeCorrelations;
	218	bool gainLossOptions::_isDivideBinsByRange;
	219	bool gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound;
	220	bool gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound;
	221	MDOUBLE gainLossOptions::_relativeSizeOfOverLappedBins;
	222
	223	bool gainLossOptions::_isPrintpairWiseCorrelationsAndNmin;
	224	bool gainLossOptions::_isPrintCorrelationsOfAllPairs_Corr;
	225	bool gainLossOptions::_isPrintCorrelationsOfAllPairs_pVal;
	226	bool gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH;
	227	bool gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant;
	228	bool gainLossOptions::_isNminBasedOnCountBranchesOverCutOff;
	229
	230	int gainLossOptions::_numOfBinsInParametricBootstrapSimulations;
	231	bool gainLossOptions::_isAddSimulationsWithLowRate;
	232	bool gainLossOptions::_isFDRcorrectionForPValInCorrelation;
	233	bool gainLossOptions::_isComputeQVals;
	234	MDOUBLE gainLossOptions::_pValueCutOffForBootStrap;
	235	MDOUBLE gainLossOptions::_minExpThresholdForPValComputationForCorrelatingPair;
	236	bool gainLossOptions::_isUpdateMinExpThresholdGivenSimulaitonsQuantile;
	237	bool gainLossOptions::_isUpdateMinExpThresholdGivenRealDataQuantile;
	238	MDOUBLE gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal;
	239
	240	bool gainLossOptions::_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel;
	241	bool gainLossOptions::_isCompExtremeValDistribution;
	242
	243	MDOUBLE gainLossOptions::_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair;
	244
	245	bool gainLossOptions::_isCorrelateWithPearson;
	246	bool gainLossOptions::_isCorrelateWithSpearman;
	247	bool gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping;
	248
	249	bool gainLossOptions::_isAlsoCorrelateWithLoss;
	250	bool gainLossOptions::_isAlsoCorrelateWithBoth;
	251	bool gainLossOptions::_isOnlyCorrelateWithBoth;
	252	bool gainLossOptions::_isUseRateForSiteAsNminForCorrelations;
	253
	254	bool gainLossOptions::_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur;
	255	bool gainLossOptions::_isRemoveSimulatedPositionsBasedOnMP;
	256	MDOUBLE gainLossOptions::_minNumOfMPEvent2RemoveSimulatedPositions;
	257	bool gainLossOptions::_isUpdateminNumOfMPEvent2RemoveSimulatedPositions;
	258
	259
	260	bool gainLossOptions::_printComputedCorrelationsAllSites;
	261	bool gainLossOptions::_isIgnoreCorrelationAmongSelectedSites;
	262	bool gainLossOptions::_isNormalizeForBranchExpInCorrCompute;
	263	bool gainLossOptions::_isNormalizeByExpectationPerBranch;
	264	string gainLossOptions::_selectedSitesForCorrelation;
	265	bool gainLossOptions::_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation;
	266	int gainLossOptions::_checkCoEvolWithUnionPAP_against_pos;
	267
	268
	269	bool gainLossOptions::_isReversible;
	270	bool gainLossOptions::_isRootFreqEQstationary;
	271	bool gainLossOptions::_initRandomGammaMixuteParam;
	272	bool gainLossOptions::_incrementFactorForGain;
	273	bool gainLossOptions::_lossBiggerGainLimit;
	274	MDOUBLE gainLossOptions::_slopeFactorForGain;
	275
	276	bool gainLossOptions::_isStartWithTheta;
	277	bool gainLossOptions::_isSkipGainOptimization;
	278	MDOUBLE gainLossOptions::_epsilonOptimizationThetaFactor;
	279	bool gainLossOptions::_isAlphaLimit;
	280	bool gainLossOptions::_isGainLimit;
	281	//MDOUBLE gainLossOptions::_probCutOffSum;
	282	MDOUBLE gainLossOptions::_maxRateForML;
	283	MDOUBLE gainLossOptions::_minBranchLength;
	284	MDOUBLE gainLossOptions::_maxBranchLength;
	285	MDOUBLE gainLossOptions::_epsilonForReRootFactor;
	286	MDOUBLE gainLossOptions::_percentOfImprovManySarts;
	287	MDOUBLE gainLossOptions::_percentOfImprov;
	288
	289	bool gainLossOptions::_calculeBranchLegthDiffFactor;
	290
	291	gainLossOptions::simulationType gainLossOptions::_simulationType;
	292	bool gainLossOptions::_isMPratio;
	293	bool gainLossOptions::_isInitGainLossByEmpiricalFreqSimulatePostExp;
	294	bool gainLossOptions::_is3states;
	295	MDOUBLE gainLossOptions::_3statesGain;
	296	MDOUBLE gainLossOptions::_3statesMore;
	297	MDOUBLE gainLossOptions::_3statesLess;
	298	MDOUBLE gainLossOptions::_3statesLoss;
	299	MDOUBLE gainLossOptions::_3states0;
	300	MDOUBLE gainLossOptions::_3states1;
	301
	302	bool gainLossOptions::_simulateSequences;
	303	bool gainLossOptions::_isReversibleSim;
	304	bool gainLossOptions::_useTheSameSpForSim;
	305	int gainLossOptions::_numberOfSequences2simulate;
	306	int gainLossOptions::_numberOfPositions2simulate;
	307	int gainLossOptions::_numberOfIterations2simulate;
	308	int gainLossOptions::_numberOfIterationsForPrintResults;
	309	MDOUBLE gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration;
	310
	311
	312	gainLossOptions::distributionType gainLossOptions::_rateDistributionTypeSim;
	313	bool gainLossOptions::_gainEQlossSim;
	314	bool gainLossOptions::_calculateRate4siteSim;
	315	bool gainLossOptions::_writeSeqSim;
	316	bool gainLossOptions::_accountForMissingData;
	317	bool gainLossOptions::_gainEQloss;
	318	bool gainLossOptions::_gainLossRateAreFreq;
	319	bool gainLossOptions::_findCoEvolvingSitesOldNotWorking; // for the co evolving project
	320	int gainLossOptions::_numberOfSequences2simulateForCoEvol; // for the co evolving project
	321	Vdouble* gainLossOptions::_weights;
	322	int gainLossOptions::_minNumOfOnes;
	323	int gainLossOptions::_minNumOfZeros;
	324
	325	ostream* gainLossOptions::_outPtr;
	326	bool gainLossOptions::_isAnaliticComputeJumps;
	327	bool gainLossOptions::_isSequenceUniqPattern;
	328	bool gainLossOptions::_isRemovePositionsWithHighPercentOfMissingData;
	329	MDOUBLE gainLossOptions::_fractionOfMissingDataToRemove;
	330
	331	bool gainLossOptions::_isOnlyComputeLikelihood;
	332	bool gainLossOptions::_isNormalizeQ;
	333	bool gainLossOptions::_isNormalizeQinSpVVec;
	334	bool gainLossOptions::_isNormalizeQandTreeafterOpt;
	335	bool gainLossOptions::_isFlatUserParameters;
	336	bool gainLossOptions::_isAlphaEqBetaManipulation;
	337	bool gainLossOptions::_calculeBranchLegthDiffFactorFromInputTrees;
	338	bool gainLossOptions::_intersectTreeAndSeq;
	339
	340	bool gainLossOptions::_isOnlyParsimony;
	341	bool gainLossOptions::_calculeMaxParsimonyChange;
	342	bool gainLossOptions::_calculeMaxParsimonyChangeSeveralGainLossRatios;
	343	string gainLossOptions::_costMatrixfile;
	344	gainLossOptions::costMatrixType gainLossOptions::_costMatrixType;
	345	MDOUBLE gainLossOptions::_costMatrixGainLossRatio;
	346
	347
	348	//ofstream gainLossOptions::_out_f;
	349	//string gainLossOptions::_mainType;
	350
	351
	352	gainLossOptions::~gainLossOptions(){}
	353
	354	/********************************************************************************************
	355	*********************************************************************************************/
	356	void gainLossOptions::initOptions(const string& paramFileName)
	357	{
	358	getOutDirFromFile(paramFileName); // first set _outDir to be used next
	359	createDir("", gainLossOptions::_outDir);
	360	initDefault();
	361	getParamsFromFile(paramFileName);
	362	verifyConsistParams();
	363	}
	364
	365	/********************************************************************************************
	366	initDefault
	367	*********************************************************************************************/
	368	void gainLossOptions::initDefault()
	369	{
	370	// all the default values are stored in the gainLossOptions:: static members
	371	//################### Basic parameters:
	372	// input (general)
	373	_seqFile = ""; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions)
	374	_treeFile = ""; // basic - if not given - calculated based on distanceTable
	375	_treeFileOrig = ""; // for brachLength Diff.
	376	_rootAt =""; // name of node to be root (the tree must contain names of internal nodes)
	377	_referenceSeq = "non"; // the results are printed with this seq in each positions. (default - first)
	378	//static string _mainType;
	379
	380	// output
	381	//_outDir = "RESULTS"; // concatenated after current dir location 'pwd'
	382	_logFile = _outDir + "//" + "log.txt"; // print-outs of the running progress including the estimated parameters optimization
	383	_logValue = 5; // verbosity level - ~4 - normal, >7 - load of info
	384	_treeOutFile = _outDir + "//" + "TheTree.ph"; // "TheTree.ph" - tree after BBL and other changes
	385	// all of these files are still part of the output, but names are fixed
	386	//static string _outFile; // Rate4Site results (normalized - Ave=0, Sd=1)
	387	//static string _outFileNotNormalize; // Rate4Site results (original)
	388	//static string _outFileGain4Site; // gain4Site results
	389	//static string _outFileLoss4Site; // loss4Site results
	390	//static string _outFileLikeofPos; // compare to model with gainRate=0
	391	//static string _outFilePosteriorExpectationOfChange; // exp01, exp10 per gene
	392
	393	//################################################## Model params
	394	_alphabet_size =2; // 2 - presence(1)/absence(0)
	395	_gainLossDist =false; // GLM (mixture)
	396	_accountForMissingData =true; // for phyletic patterns - must be true
	397	_minNumOfOnes = 1; // for COG and EggNOG only patterns with 3 or more are observable
	398	_minNumOfZeros = 0; // for indels, change to 1_isRemoveSimulatedPositionsBasedOnMP
	399
	400	_gainEQloss =false; // M1 (the basic model - gain=loss)
	401	_isReversible =false; // if(_isReversible==False) -> the root is fixed
	402	_isRootFreqEQstationary =true; // same "-"
	403	_gainLossDistPlusInvariant =false; // Automatically True if GENERAL_GAMMA_PLUS_INV or GAMMA_PLUS_INV
	404	_gainLossRateAreFreq =false; // test parameter where gain+loss = 1, and the "r_Q" is external
	405
	406	//Each of the rates governing the stochastic process are assumed to be sampled from a prior distribution.
	407	_rateDistributionType =GAMMA;
	408	_gainDistributionType =GENERAL_GAMMA; //(only for the mixture models - _gainLossDist 1)
	409	_lossDistributionType =GENERAL_GAMMA; //(only for the mixture models - _gainLossDist 1)
	410	_numberOfGainCategories = 3; // gain 3-5 - the overall number of stochasticProcess 9-25
	411	_numberOfLossCategories = 3; // loss 3-5
	412	_numberOfRateCategories = 4; // discretization usually 4-16
	413	_numberOfRateComponents = 3; // gammaMix
	414	_rateDiscretizationType =QUANTILE; // QUANTILE, LAGUERRE - only in use for gammaMix
	415
	416	//################################################## computations (What calculations are processed)
	417	_calculateRate4site =true;
	418	_rateEstimationMethod =ebExp; // mlRate (only option for UNIFORM) or posteriorBayesianExpectation
	419	_calculeGainLoss4site =true;
	420	_calculePosteriorExpectationOfChange =true;
	421	_calculateAncestralReconstruct =true;
	422	_simulatePosteriorExpectationOfChange =false; // simulate PostExp (To test to accuracy of the stochastic mapping)
	423	_isOnlySimulateSeq =false; // no mapping or parsimony is done
	424
	425	_simulateSequences =false; // Test the rate4site computation
	426	_calculateRate4siteSim =false; // Test the rate4site computation
	427	_calculeBranchLegthDiffFactor =true; // if BBL is used for each branch - compare length before/after
	428	_findCoEvolvingSitesOldNotWorking =false; // for the co evolving project
	429	_saveProbChanges_PosNodeXY =true; // used for AnsetralReconstruc - posterior
	430	_isComputeDistanceFromRootForRecent =false; // used to classify branches
	431	_printAncestralReconstructPosterior =true; // huge file...
	432	_isOnlyParsimony = false; // only parsimony computation and Return
	433	_calculeMaxParsimonyChange = true;
	434	_calculeMaxParsimonyChangeSeveralGainLossRatios = false;
	435
	436	//################################################## Prints
	437	_printTree =true;
	438	_printSeq =true;
	439	_printPij_t =true;
	440	_printLofPos =true;
	441	_printLofPosBothModels =false;
	442	_printTreesWithProbabilityValuesAsBP =false;
	443	_printTreesWithExpectationValuesAsBP =false;
	444	_printTreesWithAncestralReconstructAsBP =false;
	445	_printPropExpOfChangeFullData =false; // Could be a huge file, if probCutOff is 0.0
	446	_printExpPerPosPerBranchMatrix =false; // Used as input for COMAP
	447	_printComputedCorrelations =false; //
	448	_performParametricBootstapCorrelation =false;
	449	_usePosSpecificSimulations =false;
	450	_isConsiderNegativeCorrelations =false;
	451	_isDivideBinsByRange =false; // if true, each bin will get different number of samples, but the rate(Nmin) is eq-partitioned
	452	_isSortVectorOfCorrelationsBinsByLowerRateBound =false;
	453	_isSortVectorOfCorrelationsBinsByMidRateBound =true; // if true, the bins are overlapping
	454	_relativeSizeOfOverLappedBins = 0.25; // if 0.25, 25% of samples per bin
	455
	456	_isPrintpairWiseCorrelationsAndNmin =false;
	457	_isPrintCorrelationsOfAllPairs_Corr =false; // huge files
	458	_isPrintCorrelationsOfAllPairs_pVal =false; // huge files
	459
	460	_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH =true; // if true, only pairs with PVal significant after BH will be printed
	461	_isAllCorrTypeReqruiedToBeSignificant =false; // if true, only pairs with PVal significant after BH will be printed
	462	_isNminBasedOnCountBranchesOverCutOff =false; // it true, Nmin is an integer= the number of branches with probEvent>cuttoff
	463
	464	_numOfBinsInParametricBootstrapSimulations =5;
	465	_isAddSimulationsWithLowRate =false; // true seems problematics with Mixture (GL) models
	466	_isFDRcorrectionForPValInCorrelation =true;
	467	_isComputeQVals =false;
	468	_pValueCutOffForBootStrap = 0.05; // was 0.05
	469	_minExpThresholdForPValComputationForCorrelatingPair = 1.0; // if 0, no Nmin is imposed, 2.0, 3.0 are possible values
	470	_isUpdateMinExpThresholdGivenSimulaitonsQuantile = false; // 0.25 quantile (more "relevant" simulation)
	471	_isUpdateMinExpThresholdGivenRealDataQuantile = false; // Given real data, minR is defined by the 0.1 percentile (updated only is higher)
	472	_updateMinExpThresholdGivenRealDataQuantileVal = 0.1; // if 0.2, Nmin is for sites above the 0.2 percentile rate
	473
	474	_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel = false; // elevate Nmin Threshold if: (A) freqOfHighCorr was too high (B) freqOfHighCorr is reduced consistently with higher Nmin (C) new Nmin is lower than medianNminOfRealData
	475	_isCompExtremeValDistribution = false; // pValue is also estimated assuming EVD distribution
	476
	477	_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair = 1; // e.g., if =1, with 500 species, minT = 5
	478
	479	_isCorrelateWithPearson =true; //o
	480	_isCorrelateWithSpearman =false; //
	481	_isCorrelationsBasedOnMaxParsimonyMapping =false; //
	482
	483	_isAlsoCorrelateWithLoss =true; // not fully functional !
	484	_isAlsoCorrelateWithBoth =true; //
	485	_isOnlyCorrelateWithBoth =true; // if true, only gain.concat.loss correlations are computed
	486	_isUseRateForSiteAsNminForCorrelations =false; //
	487
	488	_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur =false; // Remove simulated position with too low/high occur to save later computation time (quick and (VERY) dirty)
	489	_isRemoveSimulatedPositionsBasedOnMP =true; // Remove simulated positions with less than 2 events based on max parsimony (quick and dirty)
	490	_minNumOfMPEvent2RemoveSimulatedPositions =1; // If 1, then gain+loss events must be above 1 (at least one event). Must be higher for many genomes
	491	_isUpdateminNumOfMPEvent2RemoveSimulatedPositions =true; // If true, add 0.2 events for every sqrt(num Of species)
	492
	493	_printComputedCorrelationsAllSites =false; //
	494	_isIgnoreCorrelationAmongSelectedSites =false; // High correlation is due to shared branch length and topology
	495	_isNormalizeForBranchExpInCorrCompute =false; // The values per-branch are normalized to remove branch-dependent signal
	496	_isNormalizeByExpectationPerBranch =true; // else, by branch length
	497
	498	_selectedSitesForCorrelation = ""; // in this file, for each position, the correlation with all other positions if computed.
	499	_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation = false; // the last position is a trait (with possible unknown). If true, (1) unknown removed, (2) correlation only against last
	500	_checkCoEvolWithUnionPAP_against_pos = 0; // if 0, not perforing union
	501
	502	_printAncestralReconstructFullData =false; // huge file...
	503	_printDEBUGinfo =false; // huge file...
	504	_printLikelihoodLandscape =false; // test purpose (Ad-hoc)
	505	_likelihoodLandscapeIncrement = 0.05;
	506	_printLikelihoodLandscapeAlphaRate =false; // test purpose (Ad-hoc)
	507	_printLikelihoodLandscapeGainLoss =false; // test purpose (Ad-hoc)
	508	_printLikelihoodLandscapeTheta =false; // test purpose (Ad-hoc)
	509	_optAlphaInIteration =false;
	510	_optBBL_LS_InIteration =false;
	511	_optBBL_EM_InIteration =false;
	512	_printP11forgain =false; // test purpose (Ad-hoc)
	513
	514	//################################################## optimizations
	515	_performOptimizations =true; // model parameters are numerically estimated to maximize likelihood
	516	_performOptimizationsBBL = false; //
	517	_performOptimizationsBBLOnlyOnce = true;
	518	_isBblLS = false; // possibly after BBL-EM, to make further improvement
	519	_isbblLSWhenbblEMdontImprove = false; //If No improvement with BBL-EM -> Perform BBL-LS one iteration
	520	_isSkipBblEMWhenbblEMdontImprove = true; // Since no improvement, BBL-EM will be skipped next iteration, go directly to LS
	521
	522	_isInitGainLossByEmpiricalFreq=true; // the sp is initialized with the empirical 0 and 1 freq
	523	_isBBLEMwithSimpleSpBeforeFullOptimization=true; // before optimization - BBL-EM is performed with simplified sp
	524	_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately=true; // gain/loss is estimated (not separately gain, loss...)
	525	_isOptimizeInvariantCategoryProb=true;
	526
	527	_isUpdateOnlyGainBetaForRatio=false; // work in progress...
	528	_isComputeLikelihoodDuringInit=true; // true, unless fast/parsimony run is performed
	529
	530	_isBblEMbeforeLSWithMissSpecifiedModel = true; // if both _isBblLS and this is true, after BBL-EM additional iteration is done
	531	_isBblForceFactorCorrection = true;
	532	_BblFactorCorrection = 2.0;
	533
	534	_isSkipFirstParamsOptimization = false;
	535	_isOptimizeParamsWithLogMinMax = true; // when the parameter is a positive and values are e.g., [0.01,100] brent works better for [-2,2]
	536	_isMultipleAllBranchesByFactorAtStart = true;
	537	_isNormalizeAtStart = true;
	538
	539	_performOptimizationsROOT = false;
	540	_performOptimizationsManyStarts =false; // several models are chosen are starting point for optimization
	541	_performOptimizationsBBLManyStarts = false;
	542	_correctOptimizationEpsilon =false; // according to dataset size (was initial likelihood), abs(_logL) * gainLossOptions::_epsilonOptimizationIterationCycle * gainLossOptions::_percentOfImprov
	543	_simulatedAnnealing =false; // epsilon is lowered with iterations
	544	_simulatedAnnealingMinEpsilonFactor =0.2; // lower normal epsilons (Model, BBL, Both). e.g., 0.1*0.2=0.02 - the new epsilon
	545	_simulatedAnnealingCoolingFactor =0.8; // to lower epsilons each iteration
	546
	547	_gammmaMixtureOptimizerAlg = ONE_DIM; // ONE_DIM or EM (not fully functional)
	548	_characterFreqEval =optimizeOverTree; // "-F option" the estimation of freq at root: FiftyFifty, LeavesAve, optimizeOverTree
	549
	550	_isStartWithTheta =false; // the optimization loop of the parameter will start with Theta
	551	_isSkipGainOptimization =false; //
	552	_epsilonOptimizationThetaFactor =1.0; // allows for different optimization Theta
	553
	554	_isAlphaLimit =true; // 0.3 - for Alpha <<0.3, the following computations are erroneous [BUG?]
	555	_isGainLimit =false; // 0.1 - for Gain <<0.1, the following computations are erroneous [BUG?]
	556	_isHGT_normal_Pij =true; // test parameter -
	557	_isHGT_with_Q =true; // test parameter -
	558	_incrementFactorForGain =false; // test parameter -
	559	_lossBiggerGainLimit =false; // test parameter -
	560	_slopeFactorForGain =2.0; // test parameter - limit growth in gain estimation
	561	// if the log-likelihood after optimization is lower than this threshold - then optimize again.
	562	_optimizationLevel = low;
	563	_epsilonOptimizationIterationCycle =1.0; // 1 cycle(model+BBL) epsilon.
	564	_epsilonOptimizationModel =0.01; // (was 0.05) Used by cEval for each parameter, the iteration epsilon is x3(or number of parameters)
	565	_epsilonOptimizationBBL =0.02; // (was 0.1) Used by cEvel for each branch, the iteration epsilon is x5(or number of branches)
	566	//enum optimizationLevel {Vlow, low, mid, high, Vhigh};
	567
	568	_epsilonOptimizationIterationCycleManyStarts = 2.0; // epsilonOptimizationManyStarts = max(epsilonOptimization, abs(_logL)*gainLossOptions::_percentOfImprovManySarts);
	569	_percentOfImprovManySarts = 0.0001; // epsilonOptimization = abs(logL)*_percentOfImprovManySarts
	570	_epsilonFactor_Model = 0.01;
	571	_epsilonFactor_BBL = 0.02;
	572
	573	_maxNumOfIterationsManyStarts = 1; // the basic number of manyStarts option (Model and BBL factors are used, 3 and 2, respectively)
	574	_numIterationsFactor_Model = 3;
	575	_numIterationsFactor_BBL = 2;
	576
	577	_maxNumOfIterations = 3; // 3
	578	_maxNumOfIterationsModel = 10; // 30
	579	_maxNumOfIterationsBBL = 5; // 10
	580
	581	_epsilonForReRootFactor =10; // only for substantial improvement the tree will be re-rooted
	582	_percentOfImprov = 0.00001; // for lL=-200,000 the epsilon is 0.2, epsilonOptimization = abs(logL)_percentOfImprovepsilonOptimization
	583
	584	_initParamsAtRandPoints =false;
	585	_initParamsAtRandPointsInOptimization =true;
	586	_initRandomGammaMixuteParam =true;
	587	_numberOfRandPointsInOptimization = 10; //10
	588	_numberOfRandStartPoints = 300; //10, the loop will break before if L is improved
	589
	590	//################################################## all the model parameters can be given by the user
	591	_userGainLossRatio = VERYBIG; // If given (< VERYBIG), all the related parameter are adapted
	592	_keepUserGainLossRatio = false; // If given other than 1, all the related parameter are adapted
	593	_userGain = 0.2; //
	594	_userLoss = 0.8; //
	595	_userTheta =0.5; // default 0.5 - otherwise, counting is done prior to optimization
	596	_userAlphaGain =1.0; // smaller Alpha => wide distribution with divergent categories. Gain with narrower distribution.
	597	_userBetaGain =2.0; // the Alpha/Beta is the excpectation
	598	_userProbInvariantGain= 0.05; // was
	599	_userAlphaLoss =0.5; // loss had wider distribution (sites with no loss)
	600	_userBetaLoss =0.25; // Thus, gain:loss is 1:4
	601	_userProbInvariantLoss= 0.05; //
	602	_userAlphaRate =0.5; //
	603	_userBetaRate =0.5;
	604	_userProbInvariantRate = 0.05; //
	605	_userRateInvariantVal = 1e-6; //
	606	_isFlatUserParameters = false;
	607
	608	// for initRand - Rand(x){min<x<max}
	609	_userGainMax =2.0;
	610	_userLossMax =5.0;
	611	_userThetaMax =0.9;
	612	_userAlphaGainMax =2.0; //1
	613	_userBetaGainMax =5.0; //2
	614	_userProbInvariantGainMax= 0.1;
	615	_userAlphaLossMax =3.0; //2
	616	_userBetaLossMax =2.0;
	617	_userProbInvariantLossMax= 0.1;
	618	_userProbInvariantRateMax = 0.1;
	619	_userAlphaRateMax =2.0;
	620	_userBetaRateMax =2.0;
	621
	622	_userGainMin =0.1;
	623	_userLossMin =0.1;
	624	_userThetaMin =0.01; //0.1
	625	_userAlphaGainMin =0.05;
	626	_userBetaGainMin =0.05;
	627	_userProbInvariantGainMin= 0.0;
	628	_userAlphaLossMin =0.05;
	629	_userBetaLossMin =0.05;
	630	_userProbInvariantLossMin= 0.0;
	631	_userProbInvariantRateMin = 0.0;
	632	_userAlphaRateMin =0.05;
	633	_userBetaRateMin =0.05;
	634
	635	//################################################## PostExp (Stochastic mapping based Counting)
	636	_numOfSimulationsForPotExp = 100000; // the counting (expectation) is based on simulations - val: >1000 - accurate enough
	637	//_probCutOffSum =0.3; // the cutOff to "ProbabilityPerPosPerBranch.txt"
	638	_probCutOffCounts = 0.3; // the cutOff to estimate HGT count (0.45) "gainLossProbExpCountPerPos.txt"
	639	_isFewCutOffCounts = true; // the cutOff to estimate HGT count - Few (0.1,...,0.9) "gainLossProbExpCountPerPos.txt"
	640	_probCutOffPrintEvent = 0.05; // the cutOff for perPosperBranch (so that file is not too big) (0.05)
	641
	642	//################################################## simulate PostExp (To test to accuracy of the stochastic mapping)
	643	_simulationType = Gamma; // Uniform
	644	_isMPratio = false;
	645	_isInitGainLossByEmpiricalFreqSimulatePostExp = true;
	646	_is3states = false;
	647	_3statesGain = 0.66; //gain (0->1)
	648	_3statesMore=2.68; //more (1->more)
	649	_3statesLess=2.68; // less (more->1)
	650	_3statesLoss=1.34; // loss (1->0)
	651	_3states0=0.5;
	652	_3states1=0.2; //_3states2+= 1 - _3states0 + _3states1;
	653
	654	_numberOfPositions2simulate =8000; // The number of positions, seqLen, note the after Nmin filter, if there are X sites, X^2/2 pairs are computed
	655	_numberOfIterations2simulate = 100; // max number of simulating iteration in parametric bootstrap, without convergence
	656	_numberOfIterationsForPrintResults = 5; // if =3, each 3 simulation iterations, results are updated (thus, temp results are available)
	657
	658	_percentileOfNminWithCorr1RequiredForLastIteration = 10; // if 2, median Nmin wity Cor=1 is required for last simulation iteration, if 10, the ten-percentile is required for convergence
	659
	660	_modelOptimizationSimPostExp =true;
	661	_BBLOptimizationSimPostExp =true; // changed to tree, since the branch length are "erased"
	662	_epsilonOptForPostExpSimFactor = 10; // 1 is for normal accuracy
	663	_numOfIterationsOptForPostExpSimFactor = 0.1; // 1 is for normal accuracy
	664	_loss2gainRatioToSim = 3; // loss rate is 3 time that of gain
	665
	666	_initParamsAtRandPointsInSimPostExp =true; // these 3 options could be used as: enum simulationType {GAMMA, UNI, MP};
	667
	668	_noiseLevelInGammaSimulation =0.5;
	669	_isMatrixGainLossFromRatioInSimulations =true;
	670	_initRootFreqAtRandPointsInSimPostExpEachPos =false; // not required, in current settings
	671	_isTheataFromObservedFreq =true; // The theta is taken from observed freq +random perturbation
	672	_isRootFreqEQstationaryInSimulations =true; //
	673	_isFlatSpBeforeOpt =true; // need to change to T when performing initParamsFromTrueEstimation
	674	_isFlatTreeBeforOpt =true; // In simulations - Flat the tree before Opt
	675	_isbBLEMwithSimpleSpSimulatePostExp =true; // In simulations - Do BBL-EM simple
	676
	677	//################################################## CoEvolvingSites
	678	_numberOfSequences2simulate =100;
	679	_numberOfSequences2simulateForCoEvol = 100; // number of simulations used in the co-evoving computations
	680	_useTheSameSpForSim =true;
	681	_isReversibleSim =false;
	682	_rateDistributionTypeSim =GAMMA;
	683	_gainEQlossSim =false;
	684	_writeSeqSim =true;
	685
	686	//################################################## Misc.
	687	_maxRateForML =100.0;
	688	_minBranchLength =0.0000001;
	689	_maxBranchLength =10.0;
	690	_treeSearchAlg = njML; // To construct tree from distanceTable (JC or others)
	691	_weights = NULL; // positions are weighted (not in use)
	692	_isOnlyComputeLikelihood = false;
	693	_isSequenceUniqPattern = false;
	694	_isRemovePositionsWithHighPercentOfMissingData = false;
	695	_fractionOfMissingDataToRemove = 0.5;
	696
	697	_isAnaliticComputeJumps = true;
	698	_isNormalizeQ = false; // true, but it is required to change optimizeModel (such that the model is not copied, but reference is sent).
	699	_isNormalizeQinSpVVec = false; // update of method is required, otherwise, global changes are made
	700	_isNormalizeQandTreeafterOpt = true; // after bug fixed.
	701	_isAlphaEqBetaManipulation = false; // This manipulation produces an un normalized Q matrices
	702	_calculeBranchLegthDiffFactorFromInputTrees = false; // input 2 trees - compute logL diff per branch length
	703	_intersectTreeAndSeq = false; // input tree and seq (not the same taxa) - intersect, write seq and tree and return
	704
	705	_outPtr =&cout;
	706
	707	_costMatrixfile = "";
	708	_costMatrixType = gainLossCost;
	709	_costMatrixGainLossRatio = 2.001; // add 0.001 as tie breaker
	710
	711	// all the parameters are added to the static: ParamList paramList (vector<Parameter>);
	712	//Parameters::addParameter("_mainType", _mainType);
	713	Parameters::addParameter("_alphabet_size", _alphabet_size);
	714	Parameters::addParameter("_treeFile", _treeFile);
	715	Parameters::addParameter("_treeFileOrig", _treeFileOrig);
	716	Parameters::addParameter("_seqFile", _seqFile);
	717	Parameters::addParameter("_logFile", _logFile);
	718	Parameters::addParameter("_numOfSimulationsForPotExp", _numOfSimulationsForPotExp);
	719	Parameters::addParameter("_logValue", _logValue);
	720	Parameters::addParameter("_referenceSeq", _referenceSeq);
	721	//Parameters::addParameter("_outFile", _outFile);
	722	//Parameters::addParameter("_outFileNotNormalize", _outFileNotNormalize);
	723	//Parameters::addParameter("_outFileGain4Site", _outFileGain4Site);
	724	//Parameters::addParameter("_outFileLoss4Site", _outFileLoss4Site);
	725	//Parameters::addParameter("_outFileLikeofPos", _outFileLikeofPos);
	726	Parameters::addParameter("_treeOutFile", _treeOutFile);
	727	Parameters::addParameter("_isOnlyComputeLikelihood", (_isOnlyComputeLikelihood == true) ? 1 : 0);
	728	Parameters::addParameter("_isSequenceUniqPattern", (_isSequenceUniqPattern == true) ? 1 : 0);
	729	Parameters::addParameter("_isRemovePositionsWithHighPercentOfMissingData", (_isRemovePositionsWithHighPercentOfMissingData == true) ? 1 : 0);
	730	Parameters::addParameter("_fractionOfMissingDataToRemove", _fractionOfMissingDataToRemove);
	731
	732	Parameters::addParameter("_isAnaliticComputeJumps", (_isAnaliticComputeJumps == true) ? 1 : 0);
	733	Parameters::addParameter("_isNormalizeQ", (_isNormalizeQ == true) ? 1 : 0);
	734	Parameters::addParameter("_isNormalizeQinSpVVec", (_isNormalizeQinSpVVec == true) ? 1 : 0);
	735	Parameters::addParameter("_isNormalizeQandTreeafterOpt", (_isNormalizeQandTreeafterOpt == true) ? 1 : 0);
	736	Parameters::addParameter("_isFlatUserParameters", (_isFlatUserParameters == true) ? 1 : 0);
	737	Parameters::addParameter("_isAlphaEqBetaManipulation", (_isAlphaEqBetaManipulation == true) ? 1 : 0);
	738	Parameters::addParameter("_calculeBranchLegthDiffFactorFromInputTrees", (_calculeBranchLegthDiffFactorFromInputTrees == true) ? 1 : 0);
	739	Parameters::addParameter("_intersectTreeAndSeq", (_intersectTreeAndSeq == true) ? 1 : 0);
	740
	741	//Parameters::addParameter("_discretizationType", _discretizationType);
	742	Parameters::addParameter("_gainDistributionType", getDistributionType(_gainDistributionType));
	743	Parameters::addParameter("_lossDistributionType", getDistributionType(_lossDistributionType));
	744	Parameters::addParameter("_rateDistributionType", getDistributionType(_rateDistributionType));
	745
	746	Parameters::addParameter("_userGainLossRatio", _userGainLossRatio);
	747	Parameters::addParameter("_keepUserGainLossRatio", _keepUserGainLossRatio);
	748	Parameters::addParameter("_userAlphaGain", _userAlphaGain);
	749	Parameters::addParameter("_userBetaGain", _userBetaGain);
	750	Parameters::addParameter("_userProbInvariantGain", _userProbInvariantGain);
	751	Parameters::addParameter("_userAlphaLoss", _userAlphaLoss);
	752	Parameters::addParameter("_userBetaLoss", _userBetaLoss);
	753	Parameters::addParameter("_userProbInvariantLoss", _userProbInvariantLoss);
	754	Parameters::addParameter("_userProbInvariantRate", _userProbInvariantRate);
	755	Parameters::addParameter("_userRateInvariantVal", _userRateInvariantVal);
	756	Parameters::addParameter("_userAlphaRate", _userAlphaRate);
	757	Parameters::addParameter("_userBetaRate", _userBetaRate);
	758	Parameters::addParameter("_userGain", _userGain);
	759	Parameters::addParameter("_userLoss", _userLoss);
	760	Parameters::addParameter("_userTheta", _userTheta);
	761
	762	Parameters::addParameter("_userAlphaGainMax", _userAlphaGainMax);
	763	Parameters::addParameter("_userBetaGainMax", _userBetaGainMax);
	764	Parameters::addParameter("_userProbInvariantGainMax", _userProbInvariantGainMax);
	765	Parameters::addParameter("_userAlphaLossMax", _userAlphaLossMax);
	766	Parameters::addParameter("_userBetaLossMax", _userBetaLossMax);
	767	Parameters::addParameter("_userProbInvariantLossMax", _userProbInvariantLossMax);
	768	Parameters::addParameter("_userProbInvariantRateMax", _userProbInvariantRateMax);
	769	Parameters::addParameter("_userAlphaRateMax", _userAlphaRateMax);
	770	Parameters::addParameter("_userBetaRateMax", _userBetaRateMax);
	771	Parameters::addParameter("_userGainMax", _userGainMax);
	772	Parameters::addParameter("_userLossMax", _userLossMax);
	773	Parameters::addParameter("_userThetaMax", _userThetaMax);
	774
	775	Parameters::addParameter("_userAlphaGainMin", _userAlphaGainMin);
	776	Parameters::addParameter("_userBetaGainMin", _userBetaGainMin);
	777	Parameters::addParameter("_userProbInvariantGainMin", _userProbInvariantGainMin);
	778	Parameters::addParameter("_userAlphaLossMin", _userAlphaLossMin);
	779	Parameters::addParameter("_userBetaLossMin", _userBetaLossMin);
	780	Parameters::addParameter("_userProbInvariantLossMin", _userProbInvariantLossMin);
	781	Parameters::addParameter("_userProbInvariantRateMin", _userProbInvariantRateMin);
	782	Parameters::addParameter("_userAlphaRateMin", _userAlphaRateMin);
	783	Parameters::addParameter("_userBetaRateMin", _userBetaRateMin);
	784	Parameters::addParameter("_userGainMin", _userGainMin);
	785	Parameters::addParameter("_userLossMin", _userLossMin);
	786	Parameters::addParameter("_userThetaMin", _userThetaMin);
	787	Parameters::addParameter("_probCutOffPrintEvent", _probCutOffPrintEvent);
	788	Parameters::addParameter("_probCutOffCounts", _probCutOffCounts);
	789	Parameters::addParameter("_isFewCutOffCounts", _isFewCutOffCounts);
	790
	791
	792	Parameters::addParameter("_characterFreqEval", getCharacterFreqEvalType(_characterFreqEval));
	793	Parameters::addParameter("_treeSearchAlg", getTreeSearchAlgType(_treeSearchAlg));
	794	Parameters::addParameter("_gammmaMixtureOptimizerAlg", getGammmaMixtureOptimizerAlgType(_gammmaMixtureOptimizerAlg));
	795	//Parameters::addParameter("_optimizeBranchLengths", _optimizeBranchLengths);
	796	Parameters::addParameter("_rateEstimationMethod", getRateEstimationMethodType(_rateEstimationMethod));
	797	Parameters::addParameter("_rateDiscretizationType", getDiscretizationType(_rateDiscretizationType));
	798
	799	Parameters::addParameter("_numberOfGainCategories", _numberOfGainCategories);
	800	Parameters::addParameter("_numberOfLossCategories", _numberOfLossCategories);
	801	Parameters::addParameter("_numberOfRateCategories", _numberOfRateCategories);
	802	Parameters::addParameter("_numberOfRateComponents", _numberOfRateComponents);
	803
	804	Parameters::addParameter("_maxNumOfIterations", _maxNumOfIterations);
	805	Parameters::addParameter("_maxNumOfIterationsModel", _maxNumOfIterationsModel);
	806	Parameters::addParameter("_maxNumOfIterationsBBL", _maxNumOfIterationsBBL);
	807	Parameters::addParameter("_maxNumOfIterationsManyStarts", _maxNumOfIterationsManyStarts);
	808	Parameters::addParameter("_numberOfRandPointsInOptimization", _numberOfRandPointsInOptimization);
	809	Parameters::addParameter("_numberOfRandStartPoints", _numberOfRandStartPoints);
	810
	811	Parameters::addParameter("_optimizationLevel", getOptimizationLevelType(_optimizationLevel));
	812	Parameters::addParameter("_epsilonOptimizationIterationCycle", _epsilonOptimizationIterationCycle);
	813	Parameters::addParameter("_epsilonOptimizationModel", _epsilonOptimizationModel);
	814	Parameters::addParameter("_epsilonOptimizationBBL", _epsilonOptimizationBBL);
	815	Parameters::addParameter("_epsilonOptimizationIterationCycleManyStarts", _epsilonOptimizationIterationCycleManyStarts);
	816
	817	Parameters::addParameter("_epsilonFactor_Model", _epsilonFactor_Model);
	818	Parameters::addParameter("_epsilonFactor_BBL", _epsilonFactor_BBL);
	819	Parameters::addParameter("_numIterationsFactor_Model", _numIterationsFactor_Model);
	820	Parameters::addParameter("_numIterationsFactor_BBL", _numIterationsFactor_BBL);
	821
	822	Parameters::addParameter("_epsilonOptForPostExpSimFactor", _epsilonOptForPostExpSimFactor);
	823	Parameters::addParameter("_numOfIterationsOptForPostExpSimFactor", _numOfIterationsOptForPostExpSimFactor);
	824	Parameters::addParameter("_loss2gainRatioToSim", _loss2gainRatioToSim);
	825
	826	Parameters::addParameter("_gainLossDist", (_gainLossDist == true) ? 1 : 0);
	827	Parameters::addParameter("_calculateRate4site", (_calculateRate4site == true) ? 1 : 0);
	828	Parameters::addParameter("_calculeGainLoss4site", (_calculeGainLoss4site == true) ? 1 : 0);
	829	Parameters::addParameter("_printLikelihoodLandscape", (_printLikelihoodLandscape == true) ? 1 : 0);
	830	Parameters::addParameter("_likelihoodLandscapeIncrement", _likelihoodLandscapeIncrement);
	831	Parameters::addParameter("_printLikelihoodLandscapeAlphaRate", (_printLikelihoodLandscapeAlphaRate == true) ? 1 : 0);
	832	Parameters::addParameter("_printLikelihoodLandscapeGainLoss", (_printLikelihoodLandscapeGainLoss == true) ? 1 : 0);
	833	Parameters::addParameter("_printLikelihoodLandscapeTheta", (_printLikelihoodLandscapeTheta == true) ? 1 : 0);
	834	Parameters::addParameter("_optAlphaInIteration", (_optAlphaInIteration == true) ? 1 : 0);
	835	Parameters::addParameter("_optBBL_LS_InIteration", (_optBBL_LS_InIteration == true) ? 1 : 0);
	836	Parameters::addParameter("_optBBL_EM_InIteration", (_optBBL_EM_InIteration == true) ? 1 : 0);
	837	Parameters::addParameter("_printP11forgain", (_printP11forgain == true) ? 1 : 0);
	838
	839	Parameters::addParameter("_printTree", (_printTree == true) ? 1 : 0);
	840	Parameters::addParameter("_printSeq", (_printSeq == true) ? 1 : 0);
	841	Parameters::addParameter("_printPij_t", (_printPij_t == true) ? 1 : 0);
	842	Parameters::addParameter("_printLofPos", (_printLofPos == true) ? 1 : 0);
	843	Parameters::addParameter("_printLofPosBothModels", (_printLofPosBothModels == true) ? 1 : 0);
	844	Parameters::addParameter("_performOptimizations", (_performOptimizations == true) ? 1 : 0);
	845	Parameters::addParameter("_correctOptimizationEpsilon", (_correctOptimizationEpsilon == true) ? 1 : 0);
	846	Parameters::addParameter("_performOptimizationsROOT", (_performOptimizationsROOT == true) ? 1 : 0);
	847	Parameters::addParameter("_performOptimizationsBBL", (_performOptimizationsBBL == true) ? 1 : 0);
	848	Parameters::addParameter("_performOptimizationsBBLOnlyOnce", (_performOptimizationsBBLOnlyOnce == true) ? 1 : 0);
	849	Parameters::addParameter("_isBblLS", (_isBblLS == true) ? 1 : 0);
	850	Parameters::addParameter("_isbblLSWhenbblEMdontImprove", (_isbblLSWhenbblEMdontImprove == true) ? 1 : 0);
	851	Parameters::addParameter("_isSkipBblEMWhenbblEMdontImprove", (_isSkipBblEMWhenbblEMdontImprove == true) ? 1 : 0);
	852
	853	Parameters::addParameter("_isInitGainLossByEmpiricalFreq", (_isInitGainLossByEmpiricalFreq == true) ? 1 : 0);
	854	Parameters::addParameter("_isBBLEMwithSimpleSpBeforeFullOptimization", (_isBBLEMwithSimpleSpBeforeFullOptimization == true) ? 1 : 0);
	855	Parameters::addParameter("_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately", (_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately == true) ? 1 : 0);
	856	Parameters::addParameter("_isOptimizeInvariantCategoryProb", (_isOptimizeInvariantCategoryProb == true) ? 1 : 0);
	857	Parameters::addParameter("_isUpdateOnlyGainBetaForRatio", (_isUpdateOnlyGainBetaForRatio == true) ? 1 : 0);
	858	Parameters::addParameter("_isComputeLikelihoodDuringInit", (_isComputeLikelihoodDuringInit == true) ? 1 : 0);
	859
	860	Parameters::addParameter("_isBblEMbeforeLSWithMissSpecifiedModel", (_isBblEMbeforeLSWithMissSpecifiedModel == true) ? 1 : 0);
	861	Parameters::addParameter("_isBblForceFactorCorrection", (_isBblForceFactorCorrection == true) ? 1 : 0);
	862	Parameters::addParameter("_BblFactorCorrection", _BblFactorCorrection);
	863
	864	Parameters::addParameter("_isSkipFirstParamsOptimization", (_isSkipFirstParamsOptimization == true) ? 1 : 0);
	865	Parameters::addParameter("_isOptimizeParamsWithLogMinMax", (_isOptimizeParamsWithLogMinMax == true) ? 1 : 0);
	866	Parameters::addParameter("_isMultipleAllBranchesByFactorAtStart", (_isMultipleAllBranchesByFactorAtStart == true) ? 1 : 0);
	867	Parameters::addParameter("_isNormalizeAtStart", (_isNormalizeAtStart == true) ? 1 : 0);
	868
	869	Parameters::addParameter("_performOptimizationsBBLManyStarts", (_performOptimizationsBBLManyStarts == true) ? 1 : 0);
	870	Parameters::addParameter("_simulatedAnnealing", (_simulatedAnnealing == true) ? 1 : 0);
	871	Parameters::addParameter("_simulatedAnnealingMinEpsilonFactor", _simulatedAnnealingMinEpsilonFactor);
	872	Parameters::addParameter("_simulatedAnnealingCoolingFactor", _simulatedAnnealingCoolingFactor);
	873	Parameters::addParameter("_performOptimizationsManyStarts", (_performOptimizationsManyStarts == true) ? 1 : 0);
	874	Parameters::addParameter("_gainLossDistPlusInvariant", (_gainLossDistPlusInvariant == true) ? 1 : 0);
	875	Parameters::addParameter("_isHGT_normal_Pij", (_isHGT_normal_Pij == true) ? 1 : 0);
	876	Parameters::addParameter("_isHGT_with_Q", (_isHGT_with_Q == true) ? 1 : 0);
	877	Parameters::addParameter("_initParamsAtRandPoints", (_initParamsAtRandPoints == true) ? 1 : 0);
	878	Parameters::addParameter("_initParamsAtRandPointsInOptimization", (_initParamsAtRandPointsInOptimization == true) ? 1 : 0);
	879	Parameters::addParameter("_calculePosteriorExpectationOfChange", (_calculePosteriorExpectationOfChange == true) ? 1 : 0);
	880	Parameters::addParameter("_simulatePosteriorExpectationOfChange", (_simulatePosteriorExpectationOfChange == true) ? 1 : 0);
	881	Parameters::addParameter("_isOnlySimulateSeq", (_isOnlySimulateSeq == true) ? 1 : 0);
	882
	883	Parameters::addParameter("_modelOptimizationSimPostExp", (_modelOptimizationSimPostExp == true) ? 1 : 0);
	884	Parameters::addParameter("_BBLOptimizationSimPostExp", (_BBLOptimizationSimPostExp == true) ? 1 : 0);
	885	Parameters::addParameter("_initParamsAtRandPointsInSimPostExp", (_initParamsAtRandPointsInSimPostExp == true) ? 1 : 0);
	886	Parameters::addParameter("_initRootFreqAtRandPointsInSimPostExpEachPos", (_initRootFreqAtRandPointsInSimPostExpEachPos == true) ? 1 : 0);
	887	Parameters::addParameter("_isFlatTreeBeforOpt", (_isFlatTreeBeforOpt == true) ? 1 : 0);
	888	Parameters::addParameter("_isbBLEMwithSimpleSpSimulatePostExp", (_isbBLEMwithSimpleSpSimulatePostExp == true) ? 1 : 0);
	889	Parameters::addParameter("_noiseLevelInGammaSimulation", _noiseLevelInGammaSimulation);
	890
	891	Parameters::addParameter("_isTheataFromObservedFreq", (_isTheataFromObservedFreq == true) ? 1 : 0);
	892	Parameters::addParameter("_isRootFreqEQstationaryInSimulations", (_isRootFreqEQstationaryInSimulations == true) ? 1 : 0);
	893	Parameters::addParameter("_isMatrixGainLossFromRatioInSimulations", (_isMatrixGainLossFromRatioInSimulations == true) ? 1 : 0);
	894	Parameters::addParameter("_isFlatSpBeforeOpt", (_isFlatSpBeforeOpt == true) ? 1 : 0);
	895
	896	Parameters::addParameter("_printTreesWithProbabilityValuesAsBP", (_printTreesWithProbabilityValuesAsBP == true) ? 1 : 0);
	897	Parameters::addParameter("_printTreesWithExpectationValuesAsBP", (_printTreesWithExpectationValuesAsBP == true) ? 1 : 0);
	898
	899	Parameters::addParameter("_printTreesWithAncestralReconstructAsBP", (_printTreesWithAncestralReconstructAsBP == true) ? 1 : 0);
	900	Parameters::addParameter("_printAncestralReconstructFullData", (_printAncestralReconstructFullData == true) ? 1 : 0);
	901	Parameters::addParameter("_printDEBUGinfo", (_printDEBUGinfo == true) ? 1 : 0);
	902	Parameters::addParameter("_printPropExpOfChangeFullData", (_printPropExpOfChangeFullData == true) ? 1 : 0);
	903	Parameters::addParameter("_printExpPerPosPerBranchMatrix", (_printExpPerPosPerBranchMatrix == true) ? 1 : 0);
	904	Parameters::addParameter("_printComputedCorrelations", (_printComputedCorrelations == true) ? 1 : 0);
	905	Parameters::addParameter("_performParametricBootstapCorrelation", (_performParametricBootstapCorrelation == true) ? 1 : 0);
	906	Parameters::addParameter("_usePosSpecificSimulations", (_usePosSpecificSimulations == true) ? 1 : 0);
	907	Parameters::addParameter("_isConsiderNegativeCorrelations", (_isConsiderNegativeCorrelations == true) ? 1 : 0);
	908	Parameters::addParameter("_isDivideBinsByRange", (_isDivideBinsByRange == true) ? 1 : 0);
	909	Parameters::addParameter("_isSortVectorOfCorrelationsBinsByLowerRateBound", (_isSortVectorOfCorrelationsBinsByLowerRateBound == true) ? 1 : 0);
	910	Parameters::addParameter("_isSortVectorOfCorrelationsBinsByMidRateBound", (_isSortVectorOfCorrelationsBinsByMidRateBound == true) ? 1 : 0);
	911	Parameters::addParameter("_relativeSizeOfOverLappedBins", _relativeSizeOfOverLappedBins);
	912
	913	Parameters::addParameter("_isPrintpairWiseCorrelationsAndNmin", (_isPrintpairWiseCorrelationsAndNmin == true) ? 1 : 0);
	914	Parameters::addParameter("_isPrintCorrelationsOfAllPairs_Corr", (_isPrintCorrelationsOfAllPairs_Corr == true) ? 1 : 0);
	915	Parameters::addParameter("_isPrintCorrelationsOfAllPairs_pVal", (_isPrintCorrelationsOfAllPairs_pVal == true) ? 1 : 0);
	916
	917	Parameters::addParameter("_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH", (_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH == true) ? 1 : 0);
	918	Parameters::addParameter("_isAllCorrTypeReqruiedToBeSignificant", (_isAllCorrTypeReqruiedToBeSignificant == true) ? 1 : 0);
	919	Parameters::addParameter("_isNminBasedOnCountBranchesOverCutOff", (_isNminBasedOnCountBranchesOverCutOff == true) ? 1 : 0);
	920
	921
	922	Parameters::addParameter("_numOfBinsInParametricBootstrapSimulations", _numOfBinsInParametricBootstrapSimulations);
	923	Parameters::addParameter("_isAddSimulationsWithLowRate", (_isAddSimulationsWithLowRate == true) ? 1 : 0);
	924	Parameters::addParameter("_isFDRcorrectionForPValInCorrelation", (_isFDRcorrectionForPValInCorrelation == true) ? 1 : 0);
	925	Parameters::addParameter("_isComputeQVals", (_isComputeQVals == true) ? 1 : 0);
	926	Parameters::addParameter("_pValueCutOffForBootStrap", _pValueCutOffForBootStrap);
	927	Parameters::addParameter("_minExpThresholdForPValComputationForCorrelatingPair", _minExpThresholdForPValComputationForCorrelatingPair);
	928	Parameters::addParameter("_isUpdateMinExpThresholdGivenSimulaitonsQuantile", _isUpdateMinExpThresholdGivenSimulaitonsQuantile); // is Wrong AddParameter? Not the bool type
	929	Parameters::addParameter("_isUpdateMinExpThresholdGivenRealDataQuantile", _isUpdateMinExpThresholdGivenRealDataQuantile);
	930	Parameters::addParameter("_updateMinExpThresholdGivenRealDataQuantileVal", _updateMinExpThresholdGivenRealDataQuantileVal);
	931	Parameters::addParameter("_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel", _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel);
	932	Parameters::addParameter("_isCompExtremeValDistribution", _isCompExtremeValDistribution);
	933	Parameters::addParameter("_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair", _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair);
	934
	935	Parameters::addParameter("_isCorrelateWithPearson", (_isCorrelateWithPearson == true) ? 1 : 0);
	936	Parameters::addParameter("_isCorrelateWithSpearman", (_isCorrelateWithSpearman == true) ? 1 : 0);
	937	Parameters::addParameter("_isCorrelationsBasedOnMaxParsimonyMapping", (_isCorrelationsBasedOnMaxParsimonyMapping == true) ? 1 : 0);
	938	Parameters::addParameter("_isAlsoCorrelateWithLoss", (_isAlsoCorrelateWithLoss == true) ? 1 : 0);
	939	Parameters::addParameter("_isAlsoCorrelateWithBoth", (_isAlsoCorrelateWithBoth == true) ? 1 : 0);
	940	Parameters::addParameter("_isOnlyCorrelateWithBoth", (_isOnlyCorrelateWithBoth == true) ? 1 : 0);
	941	Parameters::addParameter("_isUseRateForSiteAsNminForCorrelations", (_isUseRateForSiteAsNminForCorrelations == true) ? 1 : 0);
	942	Parameters::addParameter("_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur", (_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur == true) ? 1 : 0);
	943	Parameters::addParameter("_isRemoveSimulatedPositionsBasedOnMP", (_isRemoveSimulatedPositionsBasedOnMP == true) ? 1 : 0);
	944	Parameters::addParameter("_minNumOfMPEvent2RemoveSimulatedPositions", _minNumOfMPEvent2RemoveSimulatedPositions);
	945	Parameters::addParameter("_isUpdateminNumOfMPEvent2RemoveSimulatedPositions", (_isUpdateminNumOfMPEvent2RemoveSimulatedPositions == true) ? 1 : 0);
	946
	947
	948	Parameters::addParameter("_printComputedCorrelationsAllSites", (_printComputedCorrelationsAllSites == true) ? 1 : 0);
	949	Parameters::addParameter("_isIgnoreCorrelationAmongSelectedSites", (_isIgnoreCorrelationAmongSelectedSites == true) ? 1 : 0);
	950	Parameters::addParameter("_isNormalizeForBranchExpInCorrCompute", (_isNormalizeForBranchExpInCorrCompute == true) ? 1 : 0);
	951	Parameters::addParameter("_isNormalizeByExpectationPerBranch", (_isNormalizeByExpectationPerBranch == true) ? 1 : 0);
	952
	953
	954	Parameters::addParameter("_selectedSitesForCorrelation", _selectedSitesForCorrelation);
	955	Parameters::addParameter("_calculateAncestralReconstruct", (_calculateAncestralReconstruct == true) ? 1 : 0);
	956	Parameters::addParameter("_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation", (_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation == true) ? 1 : 0);
	957	Parameters::addParameter("_checkCoEvolWithUnionPAP_against_pos", _checkCoEvolWithUnionPAP_against_pos);
	958
	959
	960	Parameters::addParameter("_isReversible", (_isReversible == true) ? 1 : 0);
	961	Parameters::addParameter("_isRootFreqEQstationary", (_isRootFreqEQstationary == true) ? 1 : 0);
	962	Parameters::addParameter("_initRandomGammaMixuteParam", (_initRandomGammaMixuteParam == true) ? 1 : 0);
	963	Parameters::addParameter("_incrementFactorForGain", (_incrementFactorForGain == true) ? 1 : 0);
	964	Parameters::addParameter("_lossBiggerGainLimit", (_lossBiggerGainLimit == true) ? 1 : 0);
	965	Parameters::addParameter("_slopeFactorForGain", _slopeFactorForGain);
	966	Parameters::addParameter("_isStartWithTheta", (_isStartWithTheta == true) ? 1 : 0);
	967	Parameters::addParameter("_isSkipGainOptimization", (_isSkipGainOptimization == true) ? 1 : 0);
	968	Parameters::addParameter("_epsilonOptimizationThetaFactor", _epsilonOptimizationThetaFactor);
	969	Parameters::addParameter("_isAlphaLimit", (_isAlphaLimit == true) ? 1 : 0);
	970	Parameters::addParameter("_isGainLimit", (_isGainLimit == true) ? 1 : 0);
	971	//Parameters::addParameter("_probCutOffSum", _probCutOffSum);
	972	Parameters::addParameter("_maxRateForML", _maxRateForML);
	973	Parameters::addParameter("_minBranchLength", _minBranchLength);
	974	Parameters::addParameter("_maxBranchLength", _maxBranchLength);
	975	Parameters::addParameter("_epsilonForReRootFactor", _epsilonForReRootFactor);
	976	Parameters::addParameter("_percentOfImprovManySarts", _percentOfImprovManySarts);
	977	Parameters::addParameter("_percentOfImprov", _percentOfImprov);
	978	Parameters::addParameter("_calculeBranchLegthDiffFactor", (_calculeBranchLegthDiffFactor == true) ? 1 : 0);
	979
	980	Parameters::addParameter("_simulationType", getSimulationType(_simulationType));
	981	Parameters::addParameter("_isMPratio", (_isMPratio == true) ? 1 : 0);
	982	Parameters::addParameter("_isInitGainLossByEmpiricalFreqSimulatePostExp", (_isInitGainLossByEmpiricalFreqSimulatePostExp == true) ? 1 : 0);
	983	Parameters::addParameter("_is3states", (_is3states == true) ? 1 : 0);
	984	Parameters::addParameter("_3statesGain", _3statesGain);
	985	Parameters::addParameter("_3statesMore", _3statesMore);
	986	Parameters::addParameter("_3statesLess", _3statesLess);
	987	Parameters::addParameter("_3statesLoss", _3statesLoss);
	988	Parameters::addParameter("_3states0", _3states0);
	989	Parameters::addParameter("_3states1", _3states1);
	990
	991	Parameters::addParameter("_simulateSequences", (_simulateSequences == true) ? 1 : 0);
	992	Parameters::addParameter("_numberOfSequences2simulate", _numberOfSequences2simulate);
	993	Parameters::addParameter("_numberOfPositions2simulate", _numberOfPositions2simulate);
	994	Parameters::addParameter("_numberOfIterations2simulate", _numberOfIterations2simulate);
	995	Parameters::addParameter("_numberOfIterationsForPrintResults", _numberOfIterationsForPrintResults);
	996	Parameters::addParameter("_percentileOfNminWithCorr1RequiredForLastIteration", _percentileOfNminWithCorr1RequiredForLastIteration);
	997
	998
	999	Parameters::addParameter("_useTheSameSpForSim", (_useTheSameSpForSim == true) ? 1 : 0);
	1000	Parameters::addParameter("_isReversibleSim", (_isReversibleSim == true) ? 1 : 0);
	1001	Parameters::addParameter("_rateDistributionTypeSim", getDistributionType(_rateDistributionTypeSim));
	1002	Parameters::addParameter("_gainEQlossSim", (_gainEQlossSim == true) ? 1 : 0);
	1003	Parameters::addParameter("_calculateRate4siteSim", (_calculateRate4siteSim == true) ? 1 : 0);
	1004	Parameters::addParameter("_writeSeqSim", (_writeSeqSim == true) ? 1 : 0);
	1005
	1006	Parameters::addParameter("_accountForMissingData", (_accountForMissingData == true) ? 1 : 0);
	1007	Parameters::addParameter("_gainEQloss", (_gainEQloss == true) ? 1 : 0);
	1008	Parameters::addParameter("_gainLossRateAreFreq", (_gainLossRateAreFreq == true) ? 1 : 0);
	1009
	1010	Parameters::addParameter("_findCoEvolvingSitesOldNotWorking", (_findCoEvolvingSitesOldNotWorking == true) ? 1 : 0);// for the co evolving project
	1011	Parameters::addParameter("_saveProbChanges_PosNodeXY", (_saveProbChanges_PosNodeXY == true) ? 1 : 0);// for the co evolving project
	1012	Parameters::addParameter("_isComputeDistanceFromRootForRecent", (_isComputeDistanceFromRootForRecent == true) ? 1 : 0);// for the co evolving project
	1013	Parameters::addParameter("_printAncestralReconstructPosterior", (_printAncestralReconstructPosterior == true) ? 1 : 0);
	1014	Parameters::addParameter("_minNumOfOnes", _minNumOfOnes); // 1,3
	1015	Parameters::addParameter("_minNumOfZeros", _minNumOfZeros); // 0,1
	1016
	1017	Parameters::addParameter("_isOnlyParsimony", (_isOnlyParsimony == true) ? 1 : 0);// for the co evolving project
	1018	Parameters::addParameter("_calculeMaxParsimonyChange", (_calculeMaxParsimonyChange == true) ? 1 : 0);// for the co evolving project
	1019	Parameters::addParameter("_calculeMaxParsimonyChangeSeveralGainLossRatios", (_calculeMaxParsimonyChangeSeveralGainLossRatios == true) ? 1 : 0);// for the co evolving project
	1020	Parameters::addParameter("_costMatrixType", getCostMatrixType(_costMatrixType));
	1021	Parameters::addParameter("_costMatrixfile", _costMatrixfile);
	1022	Parameters::addParameter("_costMatrixGainLossRatio", _costMatrixGainLossRatio);
	1023	}
	1024	/********************************************************************************************
	1025	getParamsFromFile
	1026	*********************************************************************************************/
	1027	void gainLossOptions::readParameters(const string& paramFileName)
	1028	{
	1029	ifstream params(paramFileName.c_str());
	1030	if(params.good())
	1031	Parameters::readParameters(params); // only place where params are read, updateParameter(paramName, param.c_str()) used
	1032	params.close();
	1033	}
	1034	/********************************************************************************************
	1035	getParamsFromFile
	1036	*********************************************************************************************/
	1037	void gainLossOptions::getParamsFromFile(const string& paramFileName)
	1038	{
	1039	readParameters(paramFileName);
	1040	readFromParameters2gainLossOptions();
	1041	updateDependencies();
	1042	readParameters(paramFileName); // if specifically asked for other value in paramFile, now without updated...
	1043	updateParamsInRangeOverrideParamFile();
	1044	readFromParameters2gainLossOptions();
	1045	}
	1046
	1047
	1048	/********************************************************************************************
	1049	Updates... Verify consistencies
	1050	*********************************************************************************************/
	1051	void gainLossOptions::readFromParameters2gainLossOptions(){
	1052	//_mainType = Parameters::getString("_mainType");
	1053	_outDir = Parameters::getString("_outDir");
	1054	_alphabet_size = Parameters::getInt("_alphabet_size");
	1055	_minNumOfOnes = Parameters::getInt("_minNumOfOnes");
	1056	_minNumOfZeros = Parameters::getInt("_minNumOfZeros");
	1057	_numOfSimulationsForPotExp = Parameters::getInt("_numOfSimulationsForPotExp");
	1058
	1059	_gainLossRateAreFreq = (Parameters::getInt("_gainLossRateAreFreq") == 1) ? true : false;
	1060	_isOnlyComputeLikelihood = (Parameters::getInt("_isOnlyComputeLikelihood") == 1) ? true : false;
	1061	_isSequenceUniqPattern = (Parameters::getInt("_isSequenceUniqPattern") == 1) ? true : false;
	1062	_isRemovePositionsWithHighPercentOfMissingData = (Parameters::getInt("_isRemovePositionsWithHighPercentOfMissingData") == 1) ? true : false;
	1063	_fractionOfMissingDataToRemove = Parameters::getFloat("_fractionOfMissingDataToRemove");
	1064
	1065	_isAnaliticComputeJumps = (Parameters::getInt("_isAnaliticComputeJumps") == 1) ? true : false;
	1066	_isNormalizeQ = (Parameters::getInt("_isNormalizeQ") == 1) ? true : false;
	1067	_isNormalizeQinSpVVec = (Parameters::getInt("_isNormalizeQinSpVVec") == 1) ? true : false;
	1068	_isNormalizeQandTreeafterOpt = (Parameters::getInt("_isNormalizeQandTreeafterOpt") == 1) ? true : false;
	1069	_isFlatUserParameters = (Parameters::getInt("_isFlatUserParameters") == 1) ? true : false;
	1070	_isAlphaEqBetaManipulation = (Parameters::getInt("_isAlphaEqBetaManipulation") == 1) ? true : false;
	1071	_calculeBranchLegthDiffFactorFromInputTrees = (Parameters::getInt("_calculeBranchLegthDiffFactorFromInputTrees") == 1) ? true : false;
	1072	_intersectTreeAndSeq = (Parameters::getInt("_intersectTreeAndSeq") == 1) ? true : false;
	1073
	1074	_gainEQloss = (Parameters::getInt("_gainEQloss") == 1) ? true : false;
	1075	_isRootFreqEQstationary = (Parameters::getInt("_isRootFreqEQstationary") == 1) ? true : false;
	1076	_isReversible = (Parameters::getInt("_isReversible") == 1) ? true : false;
	1077	_gainLossDist = (Parameters::getInt("_gainLossDist") == 1) ? true : false;
	1078
	1079
	1080	_rateDistributionType = getDistributionType(Parameters::getString("_rateDistributionType"));
	1081	if(_rateDistributionType == UNIFORM){
	1082	_rateEstimationMethod = mlRate;
	1083	Parameters::updateParameter("_rateEstimationMethod","mlRate");
	1084	}
	1085	_gainDistributionType = getDistributionType(Parameters::getString("_gainDistributionType"));
	1086	_lossDistributionType = getDistributionType(Parameters::getString("_lossDistributionType"));
	1087
	1088	_lossBiggerGainLimit = (Parameters::getInt("_lossBiggerGainLimit") == 1) ? true : false;
	1089	_userGainLossRatio = Parameters::getFloat("_userGainLossRatio");
	1090	_keepUserGainLossRatio = (Parameters::getInt("_keepUserGainLossRatio") == 1) ? true : false;
	1091
	1092	_userGain = Parameters::getFloat("_userGain");
	1093	_userLoss = Parameters::getFloat("_userLoss");
	1094	if((_lossBiggerGainLimit) && (_userLoss <= _userGain)){
	1095	_userGain = 0.5;
	1096	Parameters::updateParameter("_userGain","0.5");
	1097	_userLoss = 1.5;
	1098	Parameters::updateParameter("_userLoss","1.5");
	1099	}
	1100	_performOptimizationsBBL = (Parameters::getInt("_performOptimizationsBBL") == 1) ? true : false;
	1101	_performOptimizationsBBLOnlyOnce = (Parameters::getInt("_performOptimizationsBBLOnlyOnce") == 1) ? true : false;
	1102	_isBblLS = (Parameters::getInt("_isBblLS") == 1) ? true : false;
	1103	_isbblLSWhenbblEMdontImprove = (Parameters::getInt("_isbblLSWhenbblEMdontImprove") == 1) ? true : false;
	1104	_isSkipBblEMWhenbblEMdontImprove = (Parameters::getInt("_isSkipBblEMWhenbblEMdontImprove") == 1) ? true : false;
	1105
	1106	_isBblEMbeforeLSWithMissSpecifiedModel = (Parameters::getInt("_isBblEMbeforeLSWithMissSpecifiedModel") == 1) ? true : false;
	1107	_isBblForceFactorCorrection = (Parameters::getInt("_isBblForceFactorCorrection") == 1) ? true : false;
	1108	_BblFactorCorrection = Parameters::getFloat("_BblFactorCorrection");
	1109
	1110	_isSkipFirstParamsOptimization = (Parameters::getInt("_isSkipFirstParamsOptimization") == 1) ? true : false;
	1111	_isOptimizeParamsWithLogMinMax = (Parameters::getInt("_isOptimizeParamsWithLogMinMax") == 1) ? true : false;
	1112	_isMultipleAllBranchesByFactorAtStart = (Parameters::getInt("_isMultipleAllBranchesByFactorAtStart") == 1) ? true : false;
	1113	_isNormalizeAtStart = (Parameters::getInt("_isNormalizeAtStart") == 1) ? true : false;
	1114
	1115	_performOptimizationsBBLManyStarts = (Parameters::getInt("_performOptimizationsBBLManyStarts") == 1) ? true : false;
	1116	_simulatedAnnealing = (Parameters::getInt("_simulatedAnnealing") == 1) ? true : false;
	1117	_simulatedAnnealingMinEpsilonFactor = Parameters::getFloat("_simulatedAnnealingMinEpsilonFactor");
	1118	_simulatedAnnealingCoolingFactor = Parameters::getFloat("_simulatedAnnealingCoolingFactor");
	1119
	1120	_performOptimizationsManyStarts = (Parameters::getInt("_performOptimizationsManyStarts") == 1) ? true : false;
	1121	if(_performOptimizationsManyStarts == 1){
	1122	_initParamsAtRandPointsInOptimization = true;
	1123	Parameters::updateParameter("_initParamsAtRandPointsInOptimization","1");
	1124	}
	1125	_seqFile = Parameters::getString("_seqFile");
	1126	_simulatePosteriorExpectationOfChange = (Parameters::getInt("_simulatePosteriorExpectationOfChange") == 1) ? true : false;
	1127	_isOnlySimulateSeq = (Parameters::getInt("_isOnlySimulateSeq") == 1) ? true : false;
	1128
	1129	if(_seqFile=="" && _simulatePosteriorExpectationOfChange==0) errorMsg::reportError("_seqFile is needed");
	1130	_treeFile = Parameters::getString("_treeFile");
	1131	_treeFileOrig = Parameters::getString("_treeFileOrig");
	1132
	1133	_rootAt = Parameters::getString("_rootAt");
	1134	_logFile= Parameters::getString("_logFile");
	1135	_logValue = Parameters::getInt("_logValue");
	1136	_referenceSeq = Parameters::getString("_referenceSeq");
	1137	//_outFile = Parameters::getString("_outFile");
	1138	_treeOutFile = Parameters::getString("_treeOutFile");
	1139
	1140	//_discretizationType = Parameters::getString("_discretizationType");
	1141	_treeSearchAlg = getTreeSearchAlgType(Parameters::getString("_treeSearchAlg"));
	1142	_gammmaMixtureOptimizerAlg = getGammmaMixtureOptimizerAlgType(Parameters::getString("_gammmaMixtureOptimizerAlg"));
	1143	//_optimizeBranchLengths = Parameters::getString("_optimizeBranchLengths");
	1144
	1145	_characterFreqEval = getCharacterFreqEvalType(Parameters::getString("_characterFreqEval"));
	1146	_rateEstimationMethod = getRateEstimationMethodType(Parameters::getString("_rateEstimationMethod"));
	1147	_rateDiscretizationType = getDiscretizationType(Parameters::getString("_rateDiscretizationType"));
	1148
	1149	_numberOfGainCategories = Parameters::getInt("_numberOfGainCategories");
	1150	_numberOfLossCategories = Parameters::getInt("_numberOfLossCategories");
	1151	_numberOfRateCategories = Parameters::getInt("_numberOfRateCategories");
	1152	_numberOfRateComponents = Parameters::getInt("_numberOfRateComponents");
	1153
	1154	_maxNumOfIterations = Parameters::getInt("_maxNumOfIterations");
	1155	_maxNumOfIterationsModel = Parameters::getInt("_maxNumOfIterationsModel");
	1156	_maxNumOfIterationsBBL = Parameters::getInt("_maxNumOfIterationsBBL");
	1157	_maxNumOfIterationsManyStarts = Parameters::getInt("_maxNumOfIterationsManyStarts");
	1158	_numberOfRandPointsInOptimization = Parameters::getInt("_numberOfRandPointsInOptimization");
	1159	_numberOfRandStartPoints = Parameters::getInt("_numberOfRandStartPoints");
	1160	_epsilonOptimizationModel = Parameters::getFloat("_epsilonOptimizationModel");
	1161	_epsilonOptimizationBBL = Parameters::getFloat("_epsilonOptimizationBBL");
	1162	_epsilonOptimizationIterationCycleManyStarts = Parameters::getFloat("_epsilonOptimizationIterationCycleManyStarts");
	1163
	1164	_optimizationLevel = getOptimizationLevelTypeFromStr(Parameters::getString("_optimizationLevel"));
	1165	_epsilonFactor_Model = Parameters::getFloat("_epsilonFactor_Model");
	1166	_epsilonFactor_BBL = Parameters::getFloat("_epsilonFactor_BBL");
	1167	_numIterationsFactor_Model = Parameters::getFloat("_numIterationsFactor_Model");
	1168	_numIterationsFactor_BBL = Parameters::getFloat("_numIterationsFactor_BBL");
	1169
	1170	_epsilonOptimizationIterationCycle = Parameters::getFloat("_epsilonOptimizationIterationCycle");
	1171	_epsilonOptForPostExpSimFactor = Parameters::getFloat("_epsilonOptForPostExpSimFactor");
	1172	_numOfIterationsOptForPostExpSimFactor = Parameters::getFloat("_numOfIterationsOptForPostExpSimFactor");
	1173	_loss2gainRatioToSim = Parameters::getFloat("_loss2gainRatioToSim");
	1174
	1175	_userAlphaGain = Parameters::getFloat("_userAlphaGain");
	1176	_userBetaGain = Parameters::getFloat("_userBetaGain");
	1177	_userProbInvariantGain = Parameters::getFloat("_userProbInvariantGain");
	1178	_userAlphaLoss = Parameters::getFloat("_userAlphaLoss");
	1179	_userBetaLoss = Parameters::getFloat("_userBetaLoss");
	1180	_userProbInvariantLoss = Parameters::getFloat("_userProbInvariantLoss");
	1181	_userProbInvariantRate = Parameters::getFloat("_userProbInvariantRate");
	1182	_userRateInvariantVal = Parameters::getFloat("_userRateInvariantVal");
	1183	_userAlphaRate = Parameters::getFloat("_userAlphaRate");
	1184	_userBetaRate = Parameters::getFloat("_userBetaRate");
	1185
	1186	_userAlphaGainMax = Parameters::getFloat("_userAlphaGainMax");
	1187	_userBetaGainMax = Parameters::getFloat("_userBetaGainMax");
	1188	_userProbInvariantGainMax = Parameters::getFloat("_userProbInvariantGainMax");
	1189	_userAlphaLossMax = Parameters::getFloat("_userAlphaLossMax");
	1190	_userBetaLossMax = Parameters::getFloat("_userBetaLossMax");
	1191	_userProbInvariantLossMax = Parameters::getFloat("_userProbInvariantLossMax");
	1192	_userProbInvariantRateMax = Parameters::getFloat("_userProbInvariantRateMax");
	1193	_userAlphaRateMax = Parameters::getFloat("_userAlphaRateMax");
	1194	_userBetaRateMax = Parameters::getFloat("_userBetaRateMax");
	1195	_userGainMax = Parameters::getFloat("_userGainMax");
	1196	_userLossMax = Parameters::getFloat("_userLossMax");
	1197	_userThetaMax = Parameters::getFloat("_userThetaMax");
	1198
	1199	_userAlphaGain = Parameters::getFloat("_userAlphaGain");
	1200	_userBetaGain = Parameters::getFloat("_userBetaGain");
	1201	_userProbInvariantGain = Parameters::getFloat("_userProbInvariantGain");
	1202	_userAlphaLoss = Parameters::getFloat("_userAlphaLoss");
	1203	_userBetaLoss = Parameters::getFloat("_userBetaLoss");
	1204	_userProbInvariantLoss = Parameters::getFloat("_userProbInvariantLoss");
	1205	_userProbInvariantRate = Parameters::getFloat("_userProbInvariantRate");
	1206	_userAlphaRate = Parameters::getFloat("_userAlphaRate");
	1207	_userBetaRate = Parameters::getFloat("_userBetaRate");
	1208	_userGain = Parameters::getFloat("_userGain");
	1209	_userLoss = Parameters::getFloat("_userLoss");
	1210	_userTheta = Parameters::getFloat("_userTheta");
	1211
	1212	_probCutOffPrintEvent = Parameters::getFloat("_probCutOffPrintEvent");
	1213	_probCutOffCounts = Parameters::getFloat("_probCutOffCounts");
	1214	_isFewCutOffCounts = (Parameters::getInt("_isFewCutOffCounts") == 1) ? true : false;
	1215
	1216	_calculateRate4site = (Parameters::getInt("_calculateRate4site") == 1) ? true : false;
	1217	_calculeGainLoss4site = (Parameters::getInt("_calculeGainLoss4site") == 1) ? true : false;
	1218	_printLikelihoodLandscape = (Parameters::getInt("_printLikelihoodLandscape") == 1) ? true : false;
	1219	_likelihoodLandscapeIncrement = Parameters::getFloat("_likelihoodLandscapeIncrement");
	1220	_printLikelihoodLandscapeAlphaRate = (Parameters::getInt("_printLikelihoodLandscapeAlphaRate") == 1) ? true : false;
	1221	_printLikelihoodLandscapeGainLoss = (Parameters::getInt("_printLikelihoodLandscapeGainLoss") == 1) ? true : false;
	1222	_printLikelihoodLandscapeTheta = (Parameters::getInt("_printLikelihoodLandscapeTheta") == 1) ? true : false;
	1223	_optAlphaInIteration = (Parameters::getInt("_optAlphaInIteration") == 1) ? true : false;
	1224	_optBBL_LS_InIteration = (Parameters::getInt("_optBBL_LS_InIteration") == 1) ? true : false;
	1225	_optBBL_EM_InIteration = (Parameters::getInt("_optBBL_EM_InIteration") == 1) ? true : false;
	1226	_printP11forgain = (Parameters::getInt("_printP11forgain") == 1) ? true : false;
	1227
	1228	_printTree = (Parameters::getInt("_printTree") == 1) ? true : false;
	1229	_printSeq = (Parameters::getInt("_printSeq") == 1) ? true : false;
	1230	_printPij_t = (Parameters::getInt("_printPij_t") == 1) ? true : false;
	1231	_printLofPos = (Parameters::getInt("_printLofPos") == 1) ? true : false;
	1232	_printLofPosBothModels = (Parameters::getInt("_printLofPosBothModels") == 1) ? true : false;
	1233	_performOptimizations = (Parameters::getInt("_performOptimizations") == 1) ? true : false;
	1234	_correctOptimizationEpsilon = (Parameters::getInt("_correctOptimizationEpsilon") == 1) ? true : false;
	1235
	1236	_isInitGainLossByEmpiricalFreq = (Parameters::getInt("_isInitGainLossByEmpiricalFreq") == 1) ? true : false;
	1237	_isBBLEMwithSimpleSpBeforeFullOptimization = (Parameters::getInt("_isBBLEMwithSimpleSpBeforeFullOptimization") == 1) ? true : false;
	1238	_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately = (Parameters::getInt("_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately") == 1) ? true : false;
	1239	_isOptimizeInvariantCategoryProb = (Parameters::getInt("_isOptimizeInvariantCategoryProb") == 1) ? true : false;
	1240	_isUpdateOnlyGainBetaForRatio = (Parameters::getInt("_isUpdateOnlyGainBetaForRatio") == 1) ? true : false;
	1241	_isComputeLikelihoodDuringInit = (Parameters::getInt("_isComputeLikelihoodDuringInit") == 1) ? true : false;
	1242
	1243	_performOptimizationsROOT = (Parameters::getInt("_performOptimizationsROOT") == 1) ? true : false;
	1244	_initParamsAtRandPointsInOptimization = (Parameters::getInt("_initParamsAtRandPointsInOptimization") == 1) ? true : false;
	1245	_gainLossDistPlusInvariant = (Parameters::getInt("_gainLossDistPlusInvariant") == 1) ? true : false;
	1246	_isHGT_normal_Pij = (Parameters::getInt("_isHGT_normal_Pij") == 1) ? true : false;
	1247	_isHGT_with_Q = (Parameters::getInt("_isHGT_with_Q") == 1) ? true : false;
	1248	_initParamsAtRandPoints = (Parameters::getInt("_initParamsAtRandPoints") == 1) ? true : false;
	1249	_calculePosteriorExpectationOfChange = (Parameters::getInt("_calculePosteriorExpectationOfChange") == 1) ? true : false;
	1250	_modelOptimizationSimPostExp = (Parameters::getInt("_modelOptimizationSimPostExp") == 1) ? true : false;
	1251	_BBLOptimizationSimPostExp = (Parameters::getInt("_BBLOptimizationSimPostExp") == 1) ? true : false;
	1252	_initParamsAtRandPointsInSimPostExp = (Parameters::getInt("_initParamsAtRandPointsInSimPostExp") == 1) ? true : false;
	1253	_initRootFreqAtRandPointsInSimPostExpEachPos = (Parameters::getInt("_initRootFreqAtRandPointsInSimPostExpEachPos") == 1) ? true : false;
	1254	_isFlatTreeBeforOpt = (Parameters::getInt("_isFlatTreeBeforOpt") == 1) ? true : false;
	1255	_isbBLEMwithSimpleSpSimulatePostExp = (Parameters::getInt("_isbBLEMwithSimpleSpSimulatePostExp") == 1) ? true : false;
	1256	_noiseLevelInGammaSimulation = Parameters::getFloat("_noiseLevelInGammaSimulation");
	1257	_isTheataFromObservedFreq = (Parameters::getInt("_isTheataFromObservedFreq") == 1) ? true : false;
	1258	_isRootFreqEQstationaryInSimulations = (Parameters::getInt("_isRootFreqEQstationaryInSimulations") == 1) ? true : false;
	1259	_isMatrixGainLossFromRatioInSimulations = (Parameters::getInt("_isMatrixGainLossFromRatioInSimulations") == 1) ? true : false;
	1260	_isFlatSpBeforeOpt = (Parameters::getInt("_isFlatSpBeforeOpt") == 1) ? true : false;
	1261	_printTreesWithProbabilityValuesAsBP = (Parameters::getInt("_printTreesWithProbabilityValuesAsBP") == 1) ? true : false;
	1262	_printTreesWithExpectationValuesAsBP = (Parameters::getInt("_printTreesWithExpectationValuesAsBP") == 1) ? true : false;
	1263	_printTreesWithAncestralReconstructAsBP = (Parameters::getInt("_printTreesWithAncestralReconstructAsBP") == 1) ? true : false;
	1264	_printAncestralReconstructFullData = (Parameters::getInt("_printAncestralReconstructFullData") == 1) ? true : false;
	1265	_printDEBUGinfo = (Parameters::getInt("_printDEBUGinfo") == 1) ? true : false;
	1266	_printPropExpOfChangeFullData = (Parameters::getInt("_printPropExpOfChangeFullData") == 1) ? true : false;
	1267	_printExpPerPosPerBranchMatrix = (Parameters::getInt("_printExpPerPosPerBranchMatrix") == 1) ? true : false;
	1268	_printComputedCorrelations = (Parameters::getInt("_printComputedCorrelations") == 1) ? true : false;
	1269	_performParametricBootstapCorrelation = (Parameters::getInt("_performParametricBootstapCorrelation") == 1) ? true : false;
	1270	_usePosSpecificSimulations = (Parameters::getInt("_usePosSpecificSimulations") == 1) ? true : false;
	1271	_isConsiderNegativeCorrelations = (Parameters::getInt("_isConsiderNegativeCorrelations") == 1) ? true : false;
	1272	_isDivideBinsByRange = (Parameters::getInt("_isDivideBinsByRange") == 1) ? true : false;
	1273	_isSortVectorOfCorrelationsBinsByLowerRateBound = (Parameters::getInt("_isSortVectorOfCorrelationsBinsByLowerRateBound") == 1) ? true : false;
	1274	_isSortVectorOfCorrelationsBinsByMidRateBound = (Parameters::getInt("_isSortVectorOfCorrelationsBinsByMidRateBound") == 1) ? true : false;
	1275	_relativeSizeOfOverLappedBins = Parameters::getFloat("_relativeSizeOfOverLappedBins");
	1276
	1277	_isPrintpairWiseCorrelationsAndNmin = (Parameters::getInt("_isPrintpairWiseCorrelationsAndNmin") == 1) ? true : false;
	1278	_isPrintCorrelationsOfAllPairs_Corr = (Parameters::getInt("_isPrintCorrelationsOfAllPairs_Corr") == 1) ? true : false;
	1279	_isPrintCorrelationsOfAllPairs_pVal = (Parameters::getInt("_isPrintCorrelationsOfAllPairs_pVal") == 1) ? true : false;
	1280
	1281	_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH = (Parameters::getInt("_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH") == 1) ? true : false;
	1282	_isAllCorrTypeReqruiedToBeSignificant = (Parameters::getInt("_isAllCorrTypeReqruiedToBeSignificant") == 1) ? true : false;
	1283	_isNminBasedOnCountBranchesOverCutOff = (Parameters::getInt("_isNminBasedOnCountBranchesOverCutOff") == 1) ? true : false;
	1284
	1285	_numOfBinsInParametricBootstrapSimulations = Parameters::getInt("_numOfBinsInParametricBootstrapSimulations");
	1286	_isAddSimulationsWithLowRate = (Parameters::getInt("_isAddSimulationsWithLowRate") == 1) ? true : false;
	1287	_isFDRcorrectionForPValInCorrelation = (Parameters::getInt("_isFDRcorrectionForPValInCorrelation") == 1) ? true : false;
	1288	_isComputeQVals = (Parameters::getInt("_isComputeQVals") == 1) ? true : false;
	1289	_pValueCutOffForBootStrap = Parameters::getFloat("_pValueCutOffForBootStrap");
	1290	_minExpThresholdForPValComputationForCorrelatingPair = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair");
	1291	_isUpdateMinExpThresholdGivenSimulaitonsQuantile = (Parameters::getInt("_isUpdateMinExpThresholdGivenSimulaitonsQuantile") == 1) ? true : false;
	1292	_isUpdateMinExpThresholdGivenRealDataQuantile = (Parameters::getInt("_isUpdateMinExpThresholdGivenRealDataQuantile") == 1) ? true : false;
	1293	_updateMinExpThresholdGivenRealDataQuantileVal = Parameters::getFloat("_updateMinExpThresholdGivenRealDataQuantileVal");
	1294	_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel = (Parameters::getInt("_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel") == 1) ? true : false;
	1295	_isCompExtremeValDistribution = (Parameters::getInt("_isCompExtremeValDistribution") == 1) ? true : false;
	1296
	1297	_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair = Parameters::getFloat("_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair");
	1298
	1299	_isCorrelateWithPearson = (Parameters::getInt("_isCorrelateWithPearson") == 1) ? true : false;
	1300	_isCorrelateWithSpearman = (Parameters::getInt("_isCorrelateWithSpearman") == 1) ? true : false;
	1301	_isCorrelationsBasedOnMaxParsimonyMapping = (Parameters::getInt("_isCorrelationsBasedOnMaxParsimonyMapping") == 1) ? true : false;
	1302	_isAlsoCorrelateWithLoss = (Parameters::getInt("_isAlsoCorrelateWithLoss") == 1) ? true : false;
	1303	_isAlsoCorrelateWithBoth = (Parameters::getInt("_isAlsoCorrelateWithBoth") == 1) ? true : false;
	1304	_isOnlyCorrelateWithBoth = (Parameters::getInt("_isOnlyCorrelateWithBoth") == 1) ? true : false;
	1305	_isUseRateForSiteAsNminForCorrelations = (Parameters::getInt("_isUseRateForSiteAsNminForCorrelations") == 1) ? true : false;
	1306	_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur = (Parameters::getInt("_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur") == 1) ? true : false;
	1307	_isRemoveSimulatedPositionsBasedOnMP = (Parameters::getInt("_isRemoveSimulatedPositionsBasedOnMP") == 1) ? true : false;
	1308	_minNumOfMPEvent2RemoveSimulatedPositions = Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions");
	1309	_isUpdateminNumOfMPEvent2RemoveSimulatedPositions = (Parameters::getInt("_isUpdateminNumOfMPEvent2RemoveSimulatedPositions") == 1) ? true : false;
	1310
	1311	_printComputedCorrelationsAllSites = (Parameters::getInt("_printComputedCorrelationsAllSites") == 1) ? true : false;
	1312	_isIgnoreCorrelationAmongSelectedSites = (Parameters::getInt("_isIgnoreCorrelationAmongSelectedSites") == 1) ? true : false;
	1313	_isNormalizeForBranchExpInCorrCompute = (Parameters::getInt("_isNormalizeForBranchExpInCorrCompute") == 1) ? true : false;
	1314	_isNormalizeByExpectationPerBranch = (Parameters::getInt("_isNormalizeByExpectationPerBranch") == 1) ? true : false;
	1315
	1316	_selectedSitesForCorrelation = Parameters::getString("_selectedSitesForCorrelation");
	1317	_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation = (Parameters::getInt("_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation") == 1) ? true : false;
	1318	_checkCoEvolWithUnionPAP_against_pos = Parameters::getInt("_checkCoEvolWithUnionPAP_against_pos");
	1319
	1320
	1321	_calculateAncestralReconstruct = (Parameters::getInt("_calculateAncestralReconstruct") == 1) ? true : false;
	1322	_calculeBranchLegthDiffFactor = (Parameters::getInt("_calculeBranchLegthDiffFactor") == 1) ? true : false;
	1323	_initRandomGammaMixuteParam = (Parameters::getInt("_initRandomGammaMixuteParam") == 1) ? true : false;
	1324	_incrementFactorForGain = (Parameters::getInt("_incrementFactorForGain") == 1) ? true : false;
	1325	_slopeFactorForGain = Parameters::getFloat("_slopeFactorForGain");
	1326	_isStartWithTheta = (Parameters::getInt("_isStartWithTheta") == 1) ? true : false;
	1327	_isSkipGainOptimization = (Parameters::getInt("_isSkipGainOptimization") == 1) ? true : false;
	1328	_epsilonOptimizationThetaFactor = Parameters::getFloat("_epsilonOptimizationThetaFactor");
	1329
	1330	_isAlphaLimit = (Parameters::getInt("_isAlphaLimit") == 1) ? true : false;
	1331	_isGainLimit = (Parameters::getInt("_isGainLimit") == 1) ? true : false;
	1332	//_probCutOffSum = Parameters::getFloat("_probCutOffSum");
	1333	_maxRateForML = Parameters::getFloat("_maxRateForML");
	1334	_minBranchLength = Parameters::getFloat("_minBranchLength");
	1335	_maxBranchLength = Parameters::getFloat("_maxBranchLength");
	1336	_epsilonForReRootFactor = Parameters::getFloat("_epsilonForReRootFactor");
	1337	_percentOfImprovManySarts = Parameters::getFloat("_percentOfImprovManySarts");
	1338	_percentOfImprov = Parameters::getFloat("_percentOfImprov");
	1339	_accountForMissingData = (Parameters::getInt("_accountForMissingData") == 1) ? true : false;
	1340
	1341	_findCoEvolvingSitesOldNotWorking = (Parameters::getInt("_findCoEvolvingSitesOldNotWorking") == 1) ? true : false;
	1342	_saveProbChanges_PosNodeXY = (Parameters::getInt("_saveProbChanges_PosNodeXY") == 1) ? true : false;
	1343	_isComputeDistanceFromRootForRecent = (Parameters::getInt("_isComputeDistanceFromRootForRecent") == 1) ? true : false;
	1344	_printAncestralReconstructPosterior = (Parameters::getInt("_printAncestralReconstructPosterior") == 1) ? true : false;
	1345	_numberOfSequences2simulateForCoEvol = (Parameters::getInt("_numberOfSequences2simulateForCoEvol"));
	1346
	1347	_simulationType = getSimulationTypeFromStr(Parameters::getString("_simulationType"));
	1348	_isMPratio = (Parameters::getInt("_isMPratio") == 1) ? true : false;
	1349	_isInitGainLossByEmpiricalFreqSimulatePostExp = (Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp") == 1) ? true : false;
	1350	_is3states = (Parameters::getInt("_is3states") == 1) ? true : false;
	1351	_3statesGain = Parameters::getFloat("_3statesGain");
	1352	_3statesMore = Parameters::getFloat("_3statesMore");
	1353	_3statesLess = Parameters::getFloat("_3statesLess");
	1354	_3statesLoss = Parameters::getFloat("_3statesLoss");
	1355	_3states0 = Parameters::getFloat("_3states0");
	1356	_3states1 = Parameters::getFloat("_3states1");
	1357
	1358	_simulateSequences = (Parameters::getInt("_simulateSequences") == 1) ? true : false;
	1359	_useTheSameSpForSim = (Parameters::getInt("_useTheSameSpForSim") == 1) ? true : false;
	1360	_isReversibleSim = (Parameters::getInt("_isReversibleSim") == 1) ? true : false;
	1361	_numberOfSequences2simulate = Parameters::getInt("_numberOfSequences2simulate");
	1362	_numberOfPositions2simulate = Parameters::getInt("_numberOfPositions2simulate");
	1363	_numberOfIterations2simulate = Parameters::getInt("_numberOfIterations2simulate");
	1364	_numberOfIterationsForPrintResults = Parameters::getInt("_numberOfIterationsForPrintResults");
	1365	_percentileOfNminWithCorr1RequiredForLastIteration = Parameters::getFloat("_percentileOfNminWithCorr1RequiredForLastIteration");
	1366
	1367	_rateDistributionTypeSim = getDistributionType(Parameters::getString("_rateDistributionTypeSim"));
	1368	_gainEQlossSim = (Parameters::getInt("_gainEQlossSim") == 1) ? true : false;
	1369	_calculateRate4siteSim = (Parameters::getInt("_calculateRate4siteSim") == 1) ? true : false;
	1370
	1371	_isOnlyParsimony = (Parameters::getInt("_isOnlyParsimony") == 1) ? true : false;
	1372	_calculeMaxParsimonyChange = (Parameters::getInt("_calculeMaxParsimonyChange") == 1) ? true : false;
	1373	_calculeMaxParsimonyChangeSeveralGainLossRatios = (Parameters::getInt("_calculeMaxParsimonyChangeSeveralGainLossRatios") == 1) ? true : false;
	1374	_costMatrixType = getCostMatrixTypeFromStr(Parameters::getString("_costMatrixType"));
	1375	_costMatrixfile = Parameters::getString("_costMatrixfile");
	1376	_costMatrixGainLossRatio = Parameters::getFloat("_costMatrixGainLossRatio");
	1377	if(_calculateRate4siteSim \|\| _findCoEvolvingSitesOldNotWorking){
	1378	_writeSeqSim = true;
	1379	Parameters::updateParameter("_writeSeqSim","1");
	1380	}
	1381	_writeSeqSim = (Parameters::getInt("_writeSeqSim") == 1) ? true : false;
	1382
	1383	if(_rateDistributionType == GAMMA_MIXTURE){ // TEMP - not DEBBUGED
	1384	if(_performOptimizationsManyStarts){
	1385	cout<<"For GAMMA_MIXTURE - OptimizationsManyStarts is not fully functional.";
	1386	// _performOptimizationsManyStarts =0;
	1387	// Parameters::updateParameter("_performOptimizationsManyStarts","0");
	1388	}
	1389	}
	1390	}
	1391
	1392
	1393
	1394	/********************************************************************************************
	1395	*********************************************************************************************/
	1396	void gainLossOptions::getOutDirFromFile(const string& paramFileName)
	1397	{
	1398	_outDir = "RESULTS";
	1399	Parameters::addParameter("_outDir", _outDir);
	1400
	1401	ifstream params(paramFileName.c_str());
	1402	if(params.good())
	1403	Parameters::readParameters(params);
	1404	params.close();
	1405	_outDir = Parameters::getString("_outDir");
	1406	}
	1407
	1408	/********************************************************************************************
	1409	*********************************************************************************************/
	1410	void gainLossOptions::verifyConsistParams()
	1411	{
	1412
	1413	if((_isReversible \|\| gainLossOptions::_isRootFreqEQstationary) && // fixedRoot
	1414	(gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel \|\| !gainLossOptions::_isBblLS) && // BBL-EM
	1415	(gainLossOptions::_gainDistributionType==gainLossOptions::UNIFORM) // UNIFORM
	1416	)
	1417	errorMsg::reportError("BBL-EM fixedRoot is not working with UNIFORM");
	1418
	1419	if(gainLossOptions::_isAlsoCorrelateWithLoss)
	1420	LOGnOUT(3,<<"WARN: compute correlatins for co-Loss, printComputedCorrelationsData() is problematic (not all pair will have both co-gain and co-loss defined) \n");
	1421
	1422	if(_gainLossDist == true && !(_rateDistributionType == UNIFORM)){
	1423	cout<<"WARNING:!!! In params: _gainLossDist == 1 but _rateDistributionType != UNIFORM (update to UNIFORM)\n";
	1424	_rateDistributionType = UNIFORM;
	1425	Parameters::updateParameter("_rateDistributionType","UNIFORM");
	1426	}
	1427	//if(gainLossOptions::_isReversible && gainLossOptions::_calculePosteriorExpectationOfChange)
	1428	// errorMsg::reportError("calculePosteriorExpectationOfChange is not implemented for Reversible process");
	1429	//if((gainLossOptions::_rateDistributionType == UNIFORM) && gainLossOptions::_calculePosteriorExpectationOfChange)
	1430	// errorMsg::reportError("calculePosteriorExpectationOfChange is not implemented for UNIFORM rate");
	1431	//if(gainLossOptions::_gainLossDist && gainLossOptions::_printLikelihoodLandscape)
	1432	// errorMsg::reportError("LikelihoodLandscape is not implemented for spVVec(gainLossDist)");
	1433	//if(gainLossOptions::_gainLossDist && gainLossOptions::_performOptimizationsBBL)
	1434	// errorMsg::reportError("BBL is not implemented for spVVec(gainLossDist)");
	1435	//if(gainLossOptions::_accountForMissingData && (gainLossOptions::_rateDistributionType == GAMMA_MIXTURE))
	1436	// errorMsg::reportError("accountForMissingData is not implemented with GAMMA_MIXTURE");
	1437	}
	1438
	1439	/********************************************************************************************
	1440	Updates... Verify consistencies
	1441	*********************************************************************************************/
	1442	void gainLossOptions::updateDependencies(){
	1443	if(_simulatedAnnealing){
	1444	cout<<"In params: _simulatedAnnealing -> double the normal epsilons\n";
	1445	updateOptimizationLevel(low);
	1446	}
	1447	updateGainLossDist();
	1448	updateAccountForMissingData();
	1449	updateRemoveComputationNotSuiteForModels();
	1450	updateSimulatePosteriorExpectationOfChange();
	1451	updateInitParamsAtRandPointsInSimPostExp();
	1452	updateGainEQloss();
	1453	updateGainLossAsFreq();
	1454	updateUserGainLossRatio(_userGainLossRatio);
	1455	updateKeepUserGainLossRatio();
	1456	updateOnlyComputeLikelihood();
	1457	updateFlatUserParameters();
	1458	updateNoBBL();
	1459	updateNoOptimization();
	1460	updateNoBranchLengthDiffComputation();
	1461	updateOptimizationLevel(_optimizationLevel); // should be after updateNoBBL
	1462	updatNoSeq();
	1463	updateParsimonyRun();
	1464	if(_performParametricBootstapCorrelation)
	1465	updatParametericBootstrapComputationOfCorrelation();
	1466	}
	1467
	1468
	1469	/********************************************************************************************
	1470	Updates... Verify consistencies
	1471	*********************************************************************************************/
	1472	void gainLossOptions::updateOptimizationLevel(optimizationLevel level)
	1473	{
	1474	MDOUBLE epsilonFactor = 1;
	1475
	1476	if(level == mid)
	1477	return; // no change
	1478	switch (level) // enum optimizationLevel {VVVlow,VVlow, Vlow, low, mid, high, Vhigh};
	1479	{
	1480	case VVVlow:
	1481	epsilonFactor = 10;
	1482	_maxNumOfIterations = 1;
	1483	_maxNumOfIterationsModel = 1;
	1484	_maxNumOfIterationsBBL = 1;
	1485	_numberOfRandPointsInOptimization = 1;
	1486	_numberOfRandStartPoints = 10;
	1487	_percentOfImprov = 0.0002;
	1488	_correctOptimizationEpsilon = 1;
	1489	_isOptimizeInvariantCategoryProb = false;
	1490	break;
	1491	case VVlow:
	1492	epsilonFactor = 8;
	1493	_maxNumOfIterations = 1;
	1494	_maxNumOfIterationsModel = 1;
	1495	_maxNumOfIterationsBBL = 1;
	1496	_numberOfRandPointsInOptimization = 2;
	1497	_numberOfRandStartPoints = 20;
	1498	_percentOfImprov = 0.0001;
	1499	_correctOptimizationEpsilon = 1;
	1500	_isOptimizeInvariantCategoryProb = false;
	1501	break;
	1502	case Vlow:
	1503	epsilonFactor = 5;
	1504	_maxNumOfIterations = 1;
	1505	_maxNumOfIterationsModel = 1;
	1506	_maxNumOfIterationsBBL = 1;
	1507	_numberOfRandPointsInOptimization = 3;
	1508	_numberOfRandStartPoints = 30;
	1509	_percentOfImprov = 0.00002;
	1510	_correctOptimizationEpsilon = 1;
	1511	_isOptimizeInvariantCategoryProb = false;
	1512	break;
	1513	case low: // same as Vlow
	1514	epsilonFactor = 5;
	1515	_maxNumOfIterations = 1;
	1516	_maxNumOfIterationsModel = 1;
	1517	_maxNumOfIterationsBBL = 1;
	1518	_numberOfRandPointsInOptimization = 3;
	1519	_numberOfRandStartPoints = 30;
	1520	_percentOfImprov = 0.00002;
	1521	_correctOptimizationEpsilon = 1;
	1522	_isOptimizeInvariantCategoryProb = false;
	1523	break;
	1524	case mid:
	1525	break;
	1526	case high:
	1527	epsilonFactor = 0.5;
	1528	break;
	1529	case Vhigh:
	1530	epsilonFactor = 0.1;
	1531	//_isBblLS = true;
	1532	//Parameters::updateParameter("_isBblLS","0");
	1533	_isbblLSWhenbblEMdontImprove = true;
	1534	Parameters::updateParameter("_isbblLSWhenbblEMdontImprove","1");
	1535
	1536	break;
	1537	}
	1538	cout<<"In params: updateOptimizationLevel -> multiply the normal epsilons by "<<epsilonFactor<<"\n";
	1539
	1540	_epsilonOptimizationIterationCycle *=epsilonFactor;
	1541	Parameters::updateParameter("_epsilonOptimizationIterationCycle",double2string(_epsilonOptimizationIterationCycle).c_str());
	1542	_epsilonOptimizationModel *=epsilonFactor;
	1543	Parameters::updateParameter("_epsilonOptimizationModel",double2string(_epsilonOptimizationModel).c_str());
	1544	_epsilonOptimizationBBL *=epsilonFactor;
	1545	Parameters::updateParameter("_epsilonOptimizationBBL",double2string(_epsilonOptimizationBBL).c_str());
	1546
	1547	Parameters::updateParameter("_maxNumOfIterations",double2string(_maxNumOfIterations).c_str());
	1548	Parameters::updateParameter("_maxNumOfIterationsModel",double2string(_maxNumOfIterationsModel).c_str());
	1549	Parameters::updateParameter("_maxNumOfIterationsBBL",double2string(_maxNumOfIterationsBBL).c_str());
	1550
	1551	Parameters::updateParameter("_numberOfRandPointsInOptimization",double2string(_numberOfRandPointsInOptimization).c_str());
	1552	Parameters::updateParameter("_numberOfRandStartPoints",double2string(_numberOfRandStartPoints).c_str());
	1553
	1554	// lowering the epsilon seems problematic - alternative?
	1555	Parameters::updateParameter("_percentOfImprov",double2string(_percentOfImprov).c_str());
	1556	Parameters::updateParameter("_correctOptimizationEpsilon",int2string(_correctOptimizationEpsilon).c_str());
	1557	Parameters::updateParameter("_isOptimizeInvariantCategoryProb",int2string(_isOptimizeInvariantCategoryProb).c_str());
	1558
	1559	if(level < 4){ // 4 is mid,
	1560	_isBblLS = false;
	1561	Parameters::updateParameter("_isBblLS","0");
	1562	_isbblLSWhenbblEMdontImprove = false;
	1563	Parameters::updateParameter("_isbblLSWhenbblEMdontImprove","0");
	1564	if(level < 2){ // VVVlow and VVlow - no BBL
	1565	_performOptimizationsBBL = false;
	1566	Parameters::updateParameter("_performOptimizationsBBL","0");
	1567	_performOptimizationsManyStarts =false;
	1568	Parameters::updateParameter("_performOptimizationsManyStarts","0");
	1569	//if(level > 0){ // other than VVVlow - prior simple BBLEM performed
	1570	// _isBBLEMwithSimpleSpBeforeFullOptimization = true;
	1571	// Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","1");
	1572	//}
	1573	}
	1574	}
	1575
	1576	}
	1577
	1578	/********************************************************************************************
	1579	*********************************************************************************************/
	1580	void gainLossOptions::updateUserGainLossRatio(MDOUBLE _userGainLossRatio)
	1581	{
	1582	if(!(_userGainLossRatio<VERYBIG)) // then it was not given
	1583	return;
	1584	cout<<"In params: _userGainLossRatio -> Change gain, loss, Beta, Theta to adapt by" <<_userGainLossRatio<<"\n";
	1585	MDOUBLE basicRate = 1; // there is no need for this parameter...
	1586	_userGain = basicRate*sqrt(_userGainLossRatio);
	1587
	1588	Parameters::updateParameter("_userGain",double2string(_userGain).c_str());
	1589	if(_userGainLossRatio == 0)
	1590	_userLoss =1;
	1591	else
	1592	_userLoss = basicRate*sqrt(1/_userGainLossRatio);
	1593	Parameters::updateParameter("_userLoss",double2string(_userLoss).c_str());
	1594
	1595	//MDOUBLE computedTheta = 0.5/(_userGainLossRatio/0.1);
	1596	MDOUBLE computedTheta = _userGain/(_userGain+_userLoss);
	1597
	1598	if(computedTheta<1 && computedTheta>0)
	1599	_userTheta = computedTheta; // in case _userGainLossRatio is smaller then 0.05
	1600	else
	1601	_userTheta = _userThetaMax;
	1602	//_userTheta = _userGainLossRatio/(1+_userGainLossRatio); // ???
	1603	Parameters::updateParameter("_userTheta",double2string(_userTheta).c_str());
	1604
	1605	//_isStartWithTheta = true; // why is it required?
	1606	//Parameters::updateParameter("_isStartWithTheta","1");
	1607
	1608	if(_gainLossDist == 1 && (_userGainLossRatio<pow(10.0,-10.0) \|\| _userGainLossRatio>pow(10.0,10.0)))
	1609	LOGnOUT(3,<<"WARN: with Mixture model, no extreme gain/loss ratios are possible\n");
	1610	MDOUBLE gainLossRatioToCompleteByBeta = _userGainLossRatio*(_userAlphaLoss/_userAlphaGain);
	1611	if(_userGainLossRatio == 0)
	1612	_userBetaGain = VERYBIG;
	1613	else
	1614	if(_isUpdateOnlyGainBetaForRatio)
	1615	_userBetaGain =_userBetaLoss/gainLossRatioToCompleteByBeta; // AlphaGain = 0.35
	1616	else
	1617	_userBetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35
	1618	Parameters::updateParameter("_userBetaGain",double2string(_userBetaGain).c_str());
	1619
	1620	if(!_isUpdateOnlyGainBetaForRatio){
	1621	if(_userGainLossRatio == 0)
	1622	_userBetaGain = VERYSMALL;
	1623	else
	1624	_userBetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9
	1625	Parameters::updateParameter("_userBetaLoss",double2string(_userBetaLoss).c_str());
	1626	}
	1627	_isInitGainLossByEmpiricalFreq = false;
	1628	Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0");
	1629	}
	1630
	1631	/********************************************************************************************
	1632	*********************************************************************************************/
	1633	void gainLossOptions::updateGainLossAsFreq()
	1634	{
	1635	if(!_gainLossRateAreFreq)
	1636	return;
	1637	cout<<"In params: _gainLossRateAreFreq -> adapt g+l=1, max val = 1\n";
	1638	_userGain= 0.4;
	1639	Parameters::updateParameter("_userGain","0.4");
	1640	_userLoss = 0.6;
	1641	Parameters::updateParameter("_userLoss","0.6");
	1642	_userGainMax = 0.9999;
	1643	Parameters::updateParameter("_userGainMax","0.9999");
	1644	_userLossMax = 0.9999;
	1645	Parameters::updateParameter("_userLossMax","0.9999");
	1646	}
	1647
	1648	/********************************************************************************************
	1649	*********************************************************************************************/
	1650	void gainLossOptions::updateParamsInRangeOverrideParamFile()
	1651	{
	1652	_userTheta = max(_userTheta,1e-06);
	1653	_userTheta = min(_userTheta,1-1e-06);
	1654	Parameters::updateParameter("_userTheta",double2string(_userTheta).c_str());
	1655	//_userGain = max(_userGain,1e-06);
	1656	//Parameters::updateParameter("_userGain",double2string(_userGain).c_str());
	1657
	1658
	1659	}
	1660
	1661
	1662
	1663	/********************************************************************************************
	1664	*********************************************************************************************/
	1665	void gainLossOptions::updatNoSeq()
	1666	{
	1667	if(_seqFile!="")
	1668	return;
	1669	cout<<"In params: no Seq file -> \n";
	1670	_isTheataFromObservedFreq= false;
	1671	Parameters::updateParameter("_isTheataFromObservedFreq","0");
	1672	_characterFreqEval = FiftyFifty;
	1673	Parameters::updateParameter("_characterFreqEval","FiftyFifty");
	1674	if(_simulationType == MPestEmp \|\| _simulationType == SMestEmp)
	1675	errorMsg::reportError("The simulation scenario based on real data, in _simulationType=MPestEmp or SMestEmp requires input Seq.\n");
	1676
	1677	}
	1678
	1679	/********************************************************************************************
	1680	*********************************************************************************************/
	1681	void gainLossOptions::updatParametericBootstrapComputationOfCorrelation()
	1682	{
	1683	cout<<"In params: ParametericBootstrapComputationOfCorrelation -> \n";
	1684
	1685	_calculePosteriorExpectationOfChange= true;
	1686	Parameters::updateParameter("_calculePosteriorExpectationOfChange","1");
	1687	_printComputedCorrelations = true;
	1688	Parameters::updateParameter("_printComputedCorrelations","1");
	1689	if(gainLossOptions::_selectedSitesForCorrelation==""){
	1690	_printComputedCorrelationsAllSites = true;
	1691	Parameters::updateParameter("_printComputedCorrelationsAllSites","1");
	1692	}
	1693	_calculateRate4site = false;
	1694	Parameters::updateParameter("_calculateRate4site","0");
	1695	_calculeGainLoss4site = false;
	1696	Parameters::updateParameter("_calculeGainLoss4site","0");
	1697	_calculeMaxParsimonyChange = false;
	1698	Parameters::updateParameter("_calculeMaxParsimonyChange","0");
	1699	_calculateAncestralReconstruct = false;
	1700	Parameters::updateParameter("_calculateAncestralReconstruct","0");
	1701	_printLofPos = false;
	1702	Parameters::updateParameter("_printLofPos","0");
	1703	_isNormalizeQandTreeafterOpt = true; // with NoOpt - false is the default
	1704	Parameters::updateParameter("_isNormalizeQandTreeafterOpt","1");
	1705	//_performOptimizationsBBL = false;
	1706	//Parameters::updateParameter("_performOptimizationsBBL","0");
	1707	_calculeBranchLegthDiffFactor = false;
	1708	Parameters::updateParameter("_calculeBranchLegthDiffFactor","0");
	1709
	1710	if(_usePosSpecificSimulations){
	1711	_isOnlySimulateSeq = true;
	1712	Parameters::updateParameter("_isOnlySimulateSeq","1");
	1713	_simulationType = Gamma;
	1714	Parameters::updateParameter("_simulationType", "Gamma");
	1715	_numberOfSequences2simulate = 1;
	1716	Parameters::updateParameter("_numberOfSequences2simulate", "1");
	1717	}
	1718	if(_isSortVectorOfCorrelationsBinsByLowerRateBound){
	1719	_isSortVectorOfCorrelationsBinsByMidRateBound = false;
	1720	Parameters::updateParameter("_isSortVectorOfCorrelationsBinsByMidRateBound","0");
	1721	_numOfBinsInParametricBootstrapSimulations = 20;
	1722	Parameters::updateParameter("_numOfBinsInParametricBootstrapSimulations","10");
	1723	}
	1724	if(_isSortVectorOfCorrelationsBinsByMidRateBound){
	1725	_isSortVectorOfCorrelationsBinsByLowerRateBound = false;
	1726	Parameters::updateParameter("_isSortVectorOfCorrelationsBinsByLowerRateBound","0");
	1727	_numOfBinsInParametricBootstrapSimulations = 10;
	1728	Parameters::updateParameter("_numOfBinsInParametricBootstrapSimulations","10");
	1729	}
	1730	}
	1731
	1732
	1733	/********************************************************************************************
	1734	*********************************************************************************************/
	1735	void gainLossOptions::updateNoBranchLengthDiffComputation()
	1736	{
	1737	if(_performOptimizationsBBL == 0 \|\| _performOptimizations == 0){
	1738	cout<<"In params: _performOptimizationsBBL =false -> _calculeBranchLegthDiffFactor =false\n";
	1739	_calculeBranchLegthDiffFactor = false;
	1740	Parameters::updateParameter("_calculeBranchLegthDiffFactor","0");
	1741	}
	1742	}
	1743
	1744	/********************************************************************************************
	1745	*********************************************************************************************/
	1746	void gainLossOptions::updateNoBBL()
	1747	{
	1748	if(_performOptimizationsBBL)
	1749	return;
	1750	cout<<"In params: _performOptimizationsBBL =false -> _isBBLEMwithSimpleSpBeforeFullOptimization =false\n";
	1751	_isBBLEMwithSimpleSpBeforeFullOptimization = false;
	1752	Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0");
	1753	}
	1754
	1755
	1756	/********************************************************************************************
	1757	*********************************************************************************************/
	1758	void gainLossOptions::updateGainEQloss()
	1759	{
	1760	if(!_gainEQloss)
	1761	return;
	1762	cout<<"In params: _gainEQloss -> FiftyFifty, and Reversible\n";
	1763	_characterFreqEval = FiftyFifty;
	1764	Parameters::updateParameter("_characterFreqEval","FiftyFifty");
	1765	_isReversible = true;
	1766	Parameters::updateParameter("_isReversible","1");
	1767	}
	1768
	1769	/********************************************************************************************
	1770	*********************************************************************************************/
	1771	void gainLossOptions::updateKeepUserGainLossRatio()
	1772	{
	1773	if(!_keepUserGainLossRatio)
	1774	return;
	1775	cout<<"In params: _keepUserGainLossRatio -> No _isInitGainLossByEmpiricalFreq\n";
	1776	_isInitGainLossByEmpiricalFreq = false;
	1777	Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0");
	1778	}
	1779
	1780	/********************************************************************************************
	1781	*********************************************************************************************/
	1782	void gainLossOptions::updateRemoveComputationNotSuiteForModels()
	1783	{
	1784	if(_isReversible){
	1785	cout<<"In params: _isReversible -> _calculePosteriorExpectationOfChange = false\n";
	1786	_calculePosteriorExpectationOfChange = false;
	1787	Parameters::updateParameter("_calculePosteriorExpectationOfChange","0");
	1788	}
	1789	if(_rateDistributionType == UNIFORM && !_gainLossDist){ // TEMP - not DEBBUGED
	1790	//cout<<"In params: rateDistributionType == UNIFORM -> _calculePosteriorExpectationOfChange and _calculateAncestralReconstruct = false\n";
	1791	//_calculePosteriorExpectationOfChange = false;
	1792	//Parameters::updateParameter("_calculePosteriorExpectationOfChange","0");
	1793	//_calculateAncestralReconstruct = false;
	1794	//Parameters::updateParameter("_calculateAncestralReconstruct","0");
	1795	}
	1796
	1797	}
	1798	/********************************************************************************************
	1799	*********************************************************************************************/
	1800	void gainLossOptions::updateGainLossDist()
	1801	{
	1802	if(_gainLossDist){
	1803	cout<<"In params: _gainLossDist == 1 -> _rateDistributionType = UNIFORM (prevent to option for inner complex stochastic process)\n";
	1804	_rateDistributionType = UNIFORM;
	1805	Parameters::updateParameter("_rateDistributionType","UNIFORM");
	1806	_calculateRate4site = false;
	1807	Parameters::updateParameter("_calculateRate4site","0");
	1808	//_isBblLS = true;
	1809	//Parameters::updateParameter("_isBblLS","1");
	1810
	1811	if((_gainDistributionType == GENERAL_GAMMA_PLUS_INV) \|\| (_lossDistributionType == GAMMA_PLUS_INV)){
	1812	_gainLossDistPlusInvariant = true;
	1813	Parameters::updateParameter("_gainLossDistPlusInvariant","1");
	1814	}
	1815	}
	1816	}
	1817
	1818	/********************************************************************************************
	1819	*********************************************************************************************/
	1820	void gainLossOptions::updateAccountForMissingData()
	1821	{
	1822	if(!_accountForMissingData){
	1823	_minNumOfOnes =0;
	1824	_minNumOfZeros =0;
	1825	Parameters::updateParameter("_minNumOfOnes","0");
	1826	Parameters::updateParameter("_minNumOfZeros","0");
	1827	}
	1828	if(_accountForMissingData && _minNumOfOnes ==0 && _minNumOfZeros ==0){
	1829	_accountForMissingData =false;
	1830	Parameters::updateParameter("_accountForMissingData","0");
	1831	}
	1832	}
	1833
	1834	/********************************************************************************************
	1835	*********************************************************************************************/
	1836	void gainLossOptions::updateInitParamsAtRandPointsInSimPostExp()
	1837	{
	1838	//if(_initParamsFromMPEstimation \|\| _initParamsFromMPratio \|\| _initParamsFromTrueEstimation \|\| _initParamsFromGammaWithNoise){
	1839	// _initParamsAtRandPointsInSimPostExp = false;
	1840	// Parameters::updateParameter("_initParamsAtRandPointsInSimPostExp","0");
	1841	//}
	1842
	1843	if(_simulationType == Gamma){
	1844	_isFlatSpBeforeOpt = true;
	1845	Parameters::updateParameter("_isFlatSpBeforeOpt","1");
	1846	}
	1847	if(_simulationType == GammaNoise){
	1848	_modelOptimizationSimPostExp = false;
	1849	Parameters::updateParameter("_modelOptimizationSimPostExp","0");
	1850	}
	1851	}
	1852
	1853	/********************************************************************************************
	1854	*********************************************************************************************/
	1855	void gainLossOptions::updateSimulatePosteriorExpectationOfChange()
	1856	{
	1857	if(!_simulatePosteriorExpectationOfChange)
	1858	return;
	1859	cout<<"In params: _simulatePosteriorExpectationOfChange -> no Opt, no Calculations ...\n";
	1860	_performOptimizations = false;
	1861	Parameters::updateParameter("_performOptimizations","0");
	1862	_calculateAncestralReconstruct = false;
	1863	Parameters::updateParameter("_calculateAncestralReconstruct","0");
	1864	_calculateRate4site = false;
	1865	Parameters::updateParameter("_calculateRate4site","0");
	1866	_calculeGainLoss4site = false;
	1867	Parameters::updateParameter("_calculeGainLoss4site","0");
	1868	//_calculePosteriorExpectationOfChange = false;
	1869	//Parameters::updateParameter("_calculePosteriorExpectationOfChange","0"); // required for SMestEmp
	1870	_printLofPos = false;
	1871	Parameters::updateParameter("_printLofPos","0");
	1872	//_printSeq = false;
	1873	//Parameters::updateParameter("_printSeq","0");
	1874	_printTree = false;
	1875	Parameters::updateParameter("_printTree","0");
	1876	_lossBiggerGainLimit = true;
	1877	Parameters::updateParameter("_lossBiggerGainLimit","1");
	1878	_printPropExpOfChangeFullData = 1;
	1879	Parameters::updateParameter("_printPropExpOfChangeFullData","1");
	1880	_probCutOffPrintEvent = 0;
	1881	Parameters::updateParameter("_probCutOffPrintEvent","0");
	1882	_calculeMaxParsimonyChangeSeveralGainLossRatios =1;
	1883	Parameters::updateParameter("_calculeMaxParsimonyChangeSeveralGainLossRatios","1");
	1884
	1885	//_isRootFreqEQstationary =1;
	1886	//Parameters::updateParameter("_isRootFreqEQstationary","1");
	1887
	1888	_isbblLSWhenbblEMdontImprove =0;
	1889	Parameters::updateParameter("_isbblLSWhenbblEMdontImprove","0");
	1890
	1891	if(_seqFile==""){
	1892	_isInitGainLossByEmpiricalFreq = 0;
	1893	Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0");
	1894	}
	1895	// Note: if tree is not Flatned (branches) there is no need for skip
	1896	//_isSkipFirstParamsOptimization =1;
	1897	//Parameters::updateParameter("_isSkipFirstParamsOptimization","1");
	1898	}
	1899
	1900	/********************************************************************************************
	1901	*********************************************************************************************/
	1902	void gainLossOptions::updateOnlyComputeLikelihood()
	1903	{
	1904	if(!_isOnlyComputeLikelihood)
	1905	return;
	1906	cout<<"In params: _isOnlyComputeLikelihood -> only Opt, no Calculations ...\n";
	1907	_calculateRate4site = false;
	1908	Parameters::updateParameter("_calculateRate4site","0");
	1909	_calculeGainLoss4site = false;
	1910	Parameters::updateParameter("_calculeGainLoss4site","0");
	1911	_calculePosteriorExpectationOfChange = false;
	1912	Parameters::updateParameter("_calculePosteriorExpectationOfChange","0");
	1913	_calculeMaxParsimonyChange = false;
	1914	Parameters::updateParameter("_calculeMaxParsimonyChange","0");
	1915	_calculateAncestralReconstruct = false;
	1916	Parameters::updateParameter("_calculateAncestralReconstruct","0");
	1917	_calculeBranchLegthDiffFactor =false;
	1918	Parameters::updateParameter("_calculeBranchLegthDiffFactor","0");
	1919	_printSeq = false;
	1920	Parameters::updateParameter("_printSeq","0");
	1921
	1922	_printLofPos = true;
	1923	Parameters::updateParameter("_printLofPos","1");
	1924	}
	1925	/********************************************************************************************
	1926	*********************************************************************************************/
	1927	void gainLossOptions::updateNoOptimization()
	1928	{
	1929	if(_performOptimizations)
	1930	return;
	1931	cout<<"In params: _performOptimizations = F -> no Opt\n";
	1932	_isMultipleAllBranchesByFactorAtStart = false;
	1933	Parameters::updateParameter("_isMultipleAllBranchesByFactorAtStart","0");
	1934	_isBBLEMwithSimpleSpBeforeFullOptimization = false;
	1935	Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0");
	1936	_isNormalizeAtStart = false;
	1937	Parameters::updateParameter("_isNormalizeAtStart","0");
	1938	_isAlphaEqBetaManipulation = false;
	1939	Parameters::updateParameter("_isAlphaEqBetaManipulation","0");
	1940	_isNormalizeQandTreeafterOpt = false;
	1941	Parameters::updateParameter("_isNormalizeQandTreeafterOpt","0");
	1942	_isInitGainLossByEmpiricalFreq = false;
	1943	Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0");
	1944	}
	1945
	1946	/********************************************************************************************
	1947	*********************************************************************************************/
	1948	void gainLossOptions::updateParsimonyRun()
	1949	{
	1950	if(!_isCorrelationsBasedOnMaxParsimonyMapping && !_isOnlyParsimony)
	1951	return;
	1952
	1953	cout<<"In params: _performOptimizations = F -> no Opt\n";
	1954
	1955	_performOptimizations = false;
	1956	Parameters::updateParameter("_performOptimizations","0");
	1957	_isMultipleAllBranchesByFactorAtStart = false;
	1958	Parameters::updateParameter("_isMultipleAllBranchesByFactorAtStart","0");
	1959	_isBBLEMwithSimpleSpBeforeFullOptimization = false;
	1960	Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0");
	1961	_isNormalizeAtStart = false;
	1962	Parameters::updateParameter("_isNormalizeAtStart","0");
	1963	_isAlphaEqBetaManipulation = false;
	1964	Parameters::updateParameter("_isAlphaEqBetaManipulation","0");
	1965	_isNormalizeQandTreeafterOpt = false;
	1966	Parameters::updateParameter("_isNormalizeQandTreeafterOpt","0");
	1967	_isInitGainLossByEmpiricalFreq = false;
	1968	Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0");
	1969	_isComputeLikelihoodDuringInit = false;
	1970	Parameters::updateParameter("_isComputeLikelihoodDuringInit","0");
	1971
	1972	}
	1973
	1974
	1975
	1976	/********************************************************************************************
	1977	*********************************************************************************************/
	1978	void gainLossOptions::updateFlatUserParameters()
	1979	{
	1980	if(!_isFlatUserParameters)
	1981	return;
	1982	cout<<"In params: _isFlatUserParameters -> all user paramas are 1.\n";
	1983	_userGain = 1.0;
	1984	Parameters::updateParameter("_userGain","1.0");
	1985	_userLoss = 1.0;
	1986	Parameters::updateParameter("_userLoss","1.0");
	1987	_userTheta =0.5;
	1988	Parameters::updateParameter("_userTheta","0.5");
	1989	_userAlphaGain =1.0;
	1990	Parameters::updateParameter("_userAlphaGain","1.0");
	1991	_userBetaGain =1.0;
	1992	Parameters::updateParameter("_userBetaGain","1.0");
	1993	_userAlphaLoss =1.0;
	1994	Parameters::updateParameter("_userAlphaLoss","1.0");
	1995	_userBetaLoss =1.0;
	1996	Parameters::updateParameter("_userBetaLoss","1.0");
	1997	_userAlphaRate =1.0;
	1998	Parameters::updateParameter("_userAlphaRate","1.0");
	1999	_userBetaRate =1.0;
	2000	Parameters::updateParameter("_userBetaRate","1.0");
	2001	}
	2002
	2003
	2004
	2005
	2006
	2007
	2008	/********************************************************************************************
	2009	Types
	2010	enum optimizationLevel {Vlow, low, mid, high, Vhigh};
	2011	*********************************************************************************************/
	2012	string gainLossOptions::getOptimizationLevelType(optimizationLevel type)
	2013	{
	2014	string res = "";
	2015	switch (type) //{VVlow, Vlow, low, mid, high, Vhigh}
	2016	{
	2017	case VVVlow:
	2018	res = "VVVlow";
	2019	break;
	2020	case VVlow:
	2021	res = "VVlow";
	2022	break;
	2023	case Vlow:
	2024	res = "Vlow";
	2025	break;
	2026	case low:
	2027	res = "low";
	2028	break;
	2029	case mid:
	2030	res = "mid";
	2031	break;
	2032	case high:
	2033	res = "high";
	2034	break;
	2035	case Vhigh:
	2036	res = "Vhigh";
	2037	break;
	2038	default:
	2039	errorMsg::reportError("unknown type in optimizationLevel - {VVVlow,VVlow,Vlow, low, mid, high, Vhigh}");
	2040	}
	2041	return res;
	2042	}
	2043	//////////////////////////////////////////////////////////////////////////
	2044	gainLossOptions::optimizationLevel gainLossOptions::getOptimizationLevelTypeFromStr(const string& str)
	2045	{
	2046	optimizationLevel returnType;
	2047	if (str == "VVVlow")
	2048	returnType = VVVlow;
	2049	else if (str == "VVlow")
	2050	returnType = VVlow;
	2051	else if (str == "Vlow")
	2052	returnType = Vlow;
	2053	else if (str=="low")
	2054	returnType = low;
	2055	else if (str=="mid")
	2056	returnType = mid;
	2057	else if (str=="high")
	2058	returnType = high;
	2059	else if (str=="Vhigh")
	2060	returnType = Vhigh;
	2061	else
	2062	errorMsg::reportError("unknown type in gainLossOptions::optimizationLevel- {VVVlow,VVlow,Vlow, low, mid, high, Vhigh}");
	2063	return returnType;
	2064	}
	2065
	2066	/********************************************************************************************
	2067	enum costMatrixType {file,fitch,diff,diffSquare,gainLossCost};
	2068	*********************************************************************************************/
	2069	string gainLossOptions::getCostMatrixType(costMatrixType type)
	2070	{
	2071	string res = "";
	2072	switch (type)
	2073	{
	2074	case file:
	2075	res = "file";
	2076	break;
	2077	case fitch:
	2078	res = "fitch";
	2079	break;
	2080	case diff:
	2081	res = "diff";
	2082	break;
	2083	case diffSquare:
	2084	res = "diffSquare";
	2085	break;
	2086	case gainLossCost:
	2087	res = "gainLossCost";
	2088	break;
	2089	default:
	2090	errorMsg::reportError("unknown type in gainLossOptions::getCostMatrixType - {file,fitch,diff,diffSquare,gainLossCost}");
	2091	}
	2092	return res;
	2093	}
	2094	//////////////////////////////////////////////////////////////////////////
	2095	gainLossOptions::costMatrixType gainLossOptions::getCostMatrixTypeFromStr(const string& str)
	2096	{
	2097	costMatrixType returnType;
	2098	if (str == "file")
	2099	returnType = file;
	2100	else if (str=="fitch")
	2101	returnType = fitch;
	2102	else if (str=="diff")
	2103	returnType = diff;
	2104	else if (str=="diffSquare")
	2105	returnType = diffSquare;
	2106	else if (str=="gainLossCost")
	2107	returnType = gainLossCost;
	2108	else
	2109	errorMsg::reportError("unknown type in MPoptions::getCostMatrixTypeFromStr- {file,fitch,diff,diffSquare,gainLossCost}");
	2110	return returnType;
	2111	}
	2112
	2113
	2114	/********************************************************************************************
	2115	enum distributionType {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE};
	2116	*********************************************************************************************/
	2117	string gainLossOptions::getDistributionType(distributionType type)
	2118	{
	2119	string res = "";
	2120	switch (type)
	2121	{
	2122	case GAMMA_MIXTURE:
	2123	res = "GAMMA_MIXTURE";
	2124	break;
	2125	case GAMMA_PLUS_INV:
	2126	res = "GAMMA_PLUS_INV";
	2127	break;
	2128	case GENERAL_GAMMA_PLUS_INV:
	2129	res = "GENERAL_GAMMA_PLUS_INV";
	2130	break;
	2131	case GAMMA_FIXED_CATEGORIES:
	2132	res = "GAMMA_FIXED_CATEGORIES";
	2133	break;
	2134	case GENERAL_GAMMA_FIXED_CATEGORIES:
	2135	res = "GENERAL_GAMMA_FIXED_CATEGORIES";
	2136	break;
	2137	case GENERAL_GAMMA:
	2138	res = "GENERAL_GAMMA";
	2139	break;
	2140	case GAMMA:
	2141	res = "GAMMA";
	2142	break;
	2143	case UNIFORM:
	2144	res = "UNIFORM";
	2145	break;
	2146
	2147	default:
	2148	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}");
	2149	}
	2150	return res;
	2151	}
	2152	//////////////////////////////////////////////////////////////////////////
	2153	gainLossOptions::distributionType gainLossOptions::getDistributionType(const string& str)
	2154	{
	2155	if (str == "GAMMA_MIXTURE")
	2156	return GAMMA_MIXTURE;
	2157	if (str == "GAMMA_FIXED_CATEGORIES")
	2158	return GAMMA_FIXED_CATEGORIES;
	2159	if (str == "GENERAL_GAMMA_FIXED_CATEGORIES")
	2160	return GENERAL_GAMMA_FIXED_CATEGORIES;
	2161	if (str == "GENERAL_GAMMA_PLUS_INV")
	2162	return GENERAL_GAMMA_PLUS_INV;
	2163	if (str == "GAMMA_PLUS_INV")
	2164	return GAMMA_PLUS_INV;
	2165	if (str == "GENERAL_GAMMA")
	2166	return GENERAL_GAMMA;
	2167	else if (str == "GAMMA")
	2168	return GAMMA;
	2169	else if (str == "UNIFORM")
	2170	return UNIFORM;
	2171	else
	2172	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}");
	2173	return GENERAL_GAMMA;
	2174	}
	2175	/********************************************************************************************
	2176	enum discretizationType {FIXED, QUANTILE, LAGUERRE};
	2177	*********************************************************************************************/
	2178	string gainLossOptions::getDiscretizationType(discretizationType type)
	2179	{
	2180	string res = "";
	2181	switch (type)
	2182	{
	2183	case FIXED:
	2184	res = "FIXED";
	2185	break;
	2186	case QUANTILE:
	2187	res = "QUANTILE";
	2188	break;
	2189	case LAGUERRE:
	2190	res = "LAGUERRE";
	2191	break;
	2192	default:
	2193	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {FIXED, QUANTILE, LAGUERRE}");
	2194	}
	2195	return res;
	2196	}
	2197	//////////////////////////////////////////////////////////////////////////
	2198	gainLossOptions::discretizationType gainLossOptions::getDiscretizationType(const string& str)
	2199	{
	2200	if (str == "FIXED")
	2201	return FIXED;
	2202	else if (str == "QUANTILE")
	2203	return QUANTILE;
	2204	else if (str == "LAGUERRE")
	2205	return LAGUERRE;
	2206	else
	2207	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {FIXED, QUANTILE, LAGUERRE}");
	2208	return QUANTILE;
	2209	}
	2210	/********************************************************************************************
	2211	enum gammmaMixtureOptimizerAlgType {EM, ONE_DIM};
	2212	*********************************************************************************************/
	2213	string gainLossOptions::getGammmaMixtureOptimizerAlgType(gammmaMixtureOptimizerAlgType type)
	2214	{
	2215	string res = "";
	2216	switch (type)
	2217	{
	2218	case ONE_DIM:
	2219	res = "ONE_DIM";
	2220	break;
	2221	case EM:
	2222	res = "EM";
	2223	break;
	2224
	2225	default:
	2226	errorMsg::reportError("unknown type in gainLossOptions::getGammmaMixtureOptimizerAlgType - {EM, ONE_DIM}");
	2227	}
	2228	return res;
	2229	}
	2230
	2231	//////////////////////////////////////////////////////////////////////////
	2232	gainLossOptions::gammmaMixtureOptimizerAlgType gainLossOptions::getGammmaMixtureOptimizerAlgType(const string& str)
	2233	{
	2234	if (str == "ONE_DIM")
	2235	return ONE_DIM;
	2236	else if (str == "EM")
	2237	return EM;
	2238	else
	2239	errorMsg::reportError("unknown type in gainLossOptions::getGammmaMixtureOptimizerAlgType - {EM, ONE_DIM}");
	2240	return EM;
	2241	}
	2242	/********************************************************************************************
	2243	enum treeSearchAlgType {njJC,njML,njJCOLD};
	2244	*********************************************************************************************/
	2245	string gainLossOptions::getTreeSearchAlgType(treeSearchAlgType type)
	2246	{
	2247	string res = "";
	2248	switch (type)
	2249	{
	2250	case njJC:
	2251	res = "njJC";
	2252	break;
	2253	case njML:
	2254	res = "njML";
	2255	break;
	2256	case njJCOLD:
	2257	res = "njJCOLD";
	2258	break;
	2259
	2260	default:
	2261	errorMsg::reportError("unknown type in gainLossOptions::getTreeSearchAlgType - {njJC,njML,njJCOLD}");
	2262	}
	2263	return res;
	2264	}
	2265
	2266	//////////////////////////////////////////////////////////////////////////
	2267	gainLossOptions::treeSearchAlgType gainLossOptions::getTreeSearchAlgType(const string& str)
	2268	{
	2269	if (str == "njJC")
	2270	return njJC;
	2271	else if (str == "njML")
	2272	return njML;
	2273	else if (str == "njJCOLD")
	2274	return njJCOLD;
	2275	else
	2276	errorMsg::reportError("unknown type in gainLossOptions::getTreeSearchAlgAlgType - {njJC,njML,njJCOLD}");
	2277	return njML;
	2278	}
	2279	/********************************************************************************************
	2280	enum characterFreqEvalType {FiftyFifty, LeavesAve, optimizeOverTree};
	2281	*********************************************************************************************/
	2282	string gainLossOptions::getCharacterFreqEvalType(characterFreqEvalType type)
	2283	{
	2284	string res = "";
	2285	switch (type)
	2286	{
	2287	case optimizeOverTree:
	2288	res = "optimizeOverTree";
	2289	break;
	2290	case LeavesAve:
	2291	res = "LeavesAve";
	2292	break;
	2293	case FiftyFifty:
	2294	res = "FiftyFifty";
	2295	break;
	2296
	2297	default:
	2298	errorMsg::reportError("unknown type in gainLossOptions::getCharacterFreqEvalType - {FiftyFifty, LeavesAve, optimizeOverTree}");
	2299	}
	2300	return res;
	2301	}
	2302	//////////////////////////////////////////////////////////////////////////
	2303	gainLossOptions::characterFreqEvalType gainLossOptions::getCharacterFreqEvalType(const string& str)
	2304	{
	2305	if (str == "optimizeOverTree")
	2306	return optimizeOverTree;
	2307	else if (str == "LeavesAve")
	2308	return LeavesAve;
	2309	else if (str == "FiftyFifty")
	2310	return FiftyFifty;
	2311	else
	2312	errorMsg::reportError("unknown type in gainLossOptions::getDistributionTypeStr - {FiftyFifty, LeavesAve, optimizeOverTree}");
	2313	return optimizeOverTree;
	2314	}
	2315
	2316	/********************************************************************************************
	2317	enum rateEstimationMethodType {ebExp, mlRate};
	2318	*********************************************************************************************/
	2319	string gainLossOptions::getRateEstimationMethodType(rateEstimationMethodType type)
	2320	{
	2321	string res = "";
	2322	switch (type)
	2323	{
	2324	case mlRate:
	2325	res = "mlRate";
	2326	break;
	2327	case ebExp:
	2328	res = "ebExp";
	2329	break;
	2330
	2331	default:
	2332	errorMsg::reportError("unknown type in gainLossOptions::getRateEstimationMethodType - {ebExp, mlRate}");
	2333	}
	2334	return res;
	2335	}
	2336	//////////////////////////////////////////////////////////////////////////
	2337	gainLossOptions::rateEstimationMethodType gainLossOptions::getRateEstimationMethodType(const string& str)
	2338	{
	2339	if (str == "ebExp")
	2340	return ebExp;
	2341	else if (str == "mlRate")
	2342	return mlRate;
	2343	else
	2344	errorMsg::reportError("unknown type in gainLossOptions::getRateEstimationMethodType - {ebExp, mlRate}");
	2345	return ebExp;
	2346	}
	2347
	2348	/********************************************************************************************
	2349	Types
	2350	enum simulationType {Uniform, Normal, Gamma, MPestEmp GammaNoise, MPratio}
	2351	*********************************************************************************************/
	2352	string gainLossOptions::getSimulationType(simulationType type)
	2353	{
	2354	string res = "";
	2355	switch (type)
	2356	{
	2357	case Uniform:
	2358	res = "Uniform";
	2359	break;
	2360	case Normal:
	2361	res = "Normal";
	2362	break;
	2363	case Gamma:
	2364	res = "Gamma";
	2365	break;
	2366	case MPestEmp:
	2367	res = "MPestEmp";
	2368	break;
	2369	case SMestEmp:
	2370	res = "SMestEmp";
	2371	break;
	2372	case GammaNoise:
	2373	res = "GammaNoise";
	2374	break;
	2375	case EQ_gEql:
	2376	res = "EQ_gEql";
	2377	break;
	2378	case EQ_gVrl:
	2379	res = "EQ_gVrl";
	2380	break;
	2381	case Gam_gEql:
	2382	res = "Gam_gEql";
	2383	break;
	2384	case Gam_gVrl:
	2385	res = "Gam_gVrl";
	2386	break;
	2387	default:
	2388	errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}");
	2389	}
	2390	return res;
	2391	}
	2392	//////////////////////////////////////////////////////////////////////////
	2393	gainLossOptions::simulationType gainLossOptions::getSimulationTypeFromStr(const string& str)
	2394	{
	2395	simulationType returnType;
	2396	if (str == "Uniform")
	2397	returnType = Uniform;
	2398	else if (str=="Normal")
	2399	returnType = Normal;
	2400	else if (str=="Gamma")
	2401	returnType = Gamma;
	2402	else if (str=="MPestEmp")
	2403	returnType = MPestEmp;
	2404	else if (str=="SMestEmp")
	2405	returnType = SMestEmp;
	2406	else if (str=="GammaNoise")
	2407	returnType = GammaNoise;
	2408	else if (str=="EQ_gEql")
	2409	returnType = EQ_gEql;
	2410	else if (str=="EQ_gVrl")
	2411	returnType = EQ_gVrl;
	2412	else if (str=="Gam_gEql")
	2413	returnType = Gam_gEql;
	2414	else if (str=="Gam_gVrl")
	2415	returnType = Gam_gVrl;
	2416	else
	2417	errorMsg::reportError("unknown type in gainLossOptions::optimizationLevel- {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}");
	2418	return returnType;
	2419	}
	2420
	2421
	2422

+499

-0

programs/gainLoss/gainLossOptions.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef __gainLossOptionsParams_OPTION
	19	#define __gainLossOptionsParams_OPTION
	20
	21	#include "definitions.h"
	22	#include <string>
	23	#include <fstream>
	24	using namespace std;
	25
	26	/*
	27	--- utilize CLASS:Parameters ---
	28	USAGE: SETTING DEFAULT PARAMETERS
	29	Note that the type of the parameter is set according to the addParameter arguments.
	30	e.g., If a parameter is set using addParameter with an integer argument then subsequent updates (using updateParameter)
	31	to the same parameter will all be stored as integers.
	32	Therefore the following code should output a 0:
	33	EXAMPLE
	34	Parameters::addParameter("Dummy", 3);
	35	Parameters::updateParameter("Dummy", "This should set it to zero");
	36	cout << Parameters::getString("Dummy");
	37	END
	38	Note also that when setting default values of float parameters always use
	39	a decimal point or else these parameters will be added as integers.
	40
	41	USAGE: READING PARAMETERS FROM FILE
	42	The readParameters method receives an input stream from which parameters are to be read.
	43	Files are structured so that each line specifies the value of a parameter.
	44	Each line gives the parameter name, a white space and then the parameter value.
	45	Lines whose first non white-space charachter is # are ignored.
	46	A basic schema for using the Parameters class is to set the default
	47	values using addParameter calls and then calling readParameters to read in
	48	parameters with other values or new parameters.
	49	EXAMPLE
	50	Parameters::addParameter("CubeSize", 1.0);
	51	Parameters::addParameter("MinVote", 8);
	52	ifstream params("params");
	53	Parameters::readParameters(params);
	54	params.close();
	55	Parameters::dump(cout);
	56	END
	57	With the following parameters file:
	58	EXAMPLE
	59	CubeSize 0.5
	60	File pdb4hhb.ent
	61	END
	62	The following output should result:
	63	EXAMPLE
	64	CubeSize (Float) 0.5
	65	File (Str) pdb4hhb.ent
	66	MinVote (Int) 8
	67	END
	68
	69	USAGE: SUBCLASSING AND PERFORMANCE
	70	The Parameters engine keeps the parameters in a sorted list.
	71	The correct usage would have been to inherit: e.g., class ProgParams : protected Parameters
	72
	73	*/
	74
	75
	76	class gainLossOptions{
	77	public:
	78	enum discretizationType {FIXED, QUANTILE, LAGUERRE};
	79	enum distributionType {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE};
	80	enum treeSearchAlgType {njJC,njML,njJCOLD};
	81	enum rateEstimationMethodType {ebExp, mlRate};
	82	enum characterFreqEvalType {FiftyFifty, LeavesAve, optimizeOverTree};
	83	enum gammmaMixtureOptimizerAlgType {EM, ONE_DIM};
	84	enum costMatrixType {file,fitch,diff,diffSquare,gainLossCost};
	85	enum optimizationLevel {VVVlow,VVlow, Vlow, low, mid, high, Vhigh};
	86	enum simulationType {Uniform, Normal, Gamma, MPestEmp,SMestEmp,GammaNoise ,EQ_gEql,EQ_gVrl,Gam_gEql,Gam_gVrl};
	87
	88	//enum optimizeBranchLengthsType {noBBL, mlBBLUniform, mlAndAlphaBBL};
	89
	90	public:
	91	virtual ~gainLossOptions();
	92
	93	static void initOptions(const string& paramFileName);
	94	static void initDefault();
	95	static void readParameters(const string& paramFileName);
	96	static void getParamsFromFile(const string& paramFileName);
	97	static void getOutDirFromFile(const string& paramFileName);
	98	static void verifyConsistParams();
	99	ostream& out() const {return *_outPtr;};
	100
	101	// conversions from enum to (from) string
	102	static string getDistributionType(distributionType type);
	103	static distributionType getDistributionType(const string& str);
	104	static characterFreqEvalType getCharacterFreqEvalType(const string& str);
	105	static string getCharacterFreqEvalType(characterFreqEvalType type);
	106	static string getRateEstimationMethodType(rateEstimationMethodType type);
	107	static rateEstimationMethodType getRateEstimationMethodType(const string& str);
	108	static string getGammmaMixtureOptimizerAlgType(gammmaMixtureOptimizerAlgType type);
	109	static gammmaMixtureOptimizerAlgType getGammmaMixtureOptimizerAlgType(const string& str);
	110	static string getTreeSearchAlgType(treeSearchAlgType type);
	111	static treeSearchAlgType getTreeSearchAlgType(const string& str);
	112	static string getDiscretizationType(discretizationType type);
	113	static discretizationType getDiscretizationType(const string& str);
	114	static string getCostMatrixType(costMatrixType type);
	115	static costMatrixType getCostMatrixTypeFromStr(const string& str);
	116	static string getOptimizationLevelType(optimizationLevel type);
	117	static optimizationLevel getOptimizationLevelTypeFromStr(const string& str);
	118
	119	static string getSimulationType(simulationType type);
	120	static simulationType getSimulationTypeFromStr(const string& str);
	121
	122	static void readFromParameters2gainLossOptions();
	123	// update parameters by dependencies
	124	static void updateDependencies();
	125	static void updateOptimizationLevel(optimizationLevel level);
	126	static void updateUserGainLossRatio(MDOUBLE gainLossRatio);
	127	static void updateGainLossAsFreq();
	128	static void updateGainEQloss();
	129	static void updateKeepUserGainLossRatio();
	130	static void updateRemoveComputationNotSuiteForModels();
	131	static void updateGainLossDist();
	132	static void updateAccountForMissingData();
	133	static void updateInitParamsAtRandPointsInSimPostExp();
	134	static void updateSimulatePosteriorExpectationOfChange();
	135	static void updateOnlyComputeLikelihood();
	136	static void updateFlatUserParameters();
	137	static void updateNoBBL();
	138	static void updateNoBranchLengthDiffComputation();
	139	static void updateNoOptimization();
	140	static void updatNoSeq();
	141	static void updateParamsInRangeOverrideParamFile();
	142	static void updatParametericBootstrapComputationOfCorrelation();
	143	static void updateParsimonyRun();
	144
	145
	146
	147	public:
	148	//################### Basic parameters:
	149	// input (general)
	150	static string _seqFile; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions)
	151	static string _treeFile; // basic - if not given - calculated based on distanceTable
	152	static string _treeFileOrig; // // used for branchDiff calc. functionality
	153
	154	static string _rootAt; // name of node to be root (the tree must contain names of internal nodes)
	155	static string _referenceSeq; // the results are printed with this seq in each positions. (default - first)
	156	//static string _mainType;
	157
	158	// output
	159	static string _outDir; // _outDir = "RESULTS", concatenated after current dir location 'pwd'
	160	static string _logFile; // print-outs of the running progress including the estimated parameters optimization
	161	static int _logValue; // verbosity level - ~4 - normal, >7 - load of info
	162	static string _treeOutFile; // "TheTree.ph" - tree after BBL and other changes -
	163	// all of these files are still part of the output, but names are fixed
	164	//static string _outFile; // Rate4Site results (normalized - Ave=0, Sd=1)
	165	//static string _outFileNotNormalize; // Rate4Site results (original)
	166	//static string _outFileGain4Site; // gain4Site results
	167	//static string _outFileLoss4Site; // loss4Site results
	168	//static string _outFileLikeofPos; // compare to model with gainRate=0
	169	//static string _outFilePosteriorExpectationOfChange; // exp01, exp10 per gene
	170
	171
	172
	173	//################################################## Model params
	174	static int _alphabet_size; // 2 - presence(1)/absence(0)
	175	static bool _gainLossDist; // GLM (mixture)
	176	static bool _accountForMissingData; // for phyletic patterns - must be true
	177	static int _minNumOfOnes; // for COG and EggNOG only patterns with 3 or more are observable
	178	static int _minNumOfZeros; // for indels, there is no position with only 1s => minNumOfZeros=1
	179
	180	static bool _gainEQloss; // M1 (the basic model)
	181	static bool _isReversible; // if _isReversible = False -> the root is fixed
	182	static bool _isRootFreqEQstationary; // same "-"
	183	static bool _gainLossDistPlusInvariant; // Automatically True if GENERAL_GAMMA_PLUS_INV or GAMMA_PLUS_INV
	184	static bool _gainLossRateAreFreq; // test parameter where gain+loss = 1, and the "r_Q" is external
	185
	186	//Each of the rates governing the stochastic process are assumed to be sampled from a prior distribution.
	187	static distributionType _rateDistributionType;
	188	static distributionType _gainDistributionType; //(only for the mixture models - _gainLossDist 1)
	189	static distributionType _lossDistributionType; //(only for the mixture models - _gainLossDist 1)
	190	static int _numberOfGainCategories; // gain 3-5 - the overall number of stochasticProcess 9-25
	191	static int _numberOfLossCategories; // loss 3-5
	192	static int _numberOfRateCategories; // discretization usually 4-16
	193	static int _numberOfRateComponents; // gammaMix
	194	static discretizationType _rateDiscretizationType; // QUANTILE, LAGUERRE - only in use for gammaMix
	195
	196
	197	//################################################## computations
	198	static bool _calculateRate4site;
	199	static rateEstimationMethodType _rateEstimationMethod; // mlRate (only option for UNIFORM) or posteriorBayesianExpectation
	200	static bool _calculeGainLoss4site;
	201	static bool _calculePosteriorExpectationOfChange;
	202	static bool _calculateAncestralReconstruct;
	203	static bool _simulatePosteriorExpectationOfChange; // simulate PostExp (To test to accuracy of the stochastic mapping)
	204	static bool _isOnlySimulateSeq; // no mapping or parsimony is done
	205
	206	static bool _simulateSequences; // Test the rate4site computation
	207	static bool _calculateRate4siteSim; // Test the rate4site computation
	208	static bool _calculeBranchLegthDiffFactor; // if BBL is used for each branch - compare length before/after
	209	static bool _findCoEvolvingSitesOldNotWorking; // for the co evolving project
	210	static bool _printAncestralReconstructPosterior;
	211	static bool _saveProbChanges_PosNodeXY; // used for AnsetralReconstruc - posterior
	212	static bool _isComputeDistanceFromRootForRecent; // used to classify branches
	213
	214	//################################################## Prints
	215	static bool _printLikelihoodLandscapeAlphaRate;
	216	static bool _printLikelihoodLandscapeGainLoss;
	217	static bool _printLikelihoodLandscapeTheta;
	218	static bool _optAlphaInIteration;
	219	static bool _optBBL_LS_InIteration;
	220	static bool _optBBL_EM_InIteration;
	221	static bool _printTree;
	222	static bool _printSeq;
	223	static bool _printPij_t;
	224	static bool _printLofPos;
	225	static bool _printLofPosBothModels;
	226	static bool _printTreesWithProbabilityValuesAsBP; // tree for each position
	227	static bool _printTreesWithExpectationValuesAsBP; // tree for each position
	228	static bool _printTreesWithAncestralReconstructAsBP;// tree for each position
	229	static bool _printPropExpOfChangeFullData; // huge file...
	230	static bool _printExpPerPosPerBranchMatrix; // Used as input for COMAP
	231	static bool _printComputedCorrelations; // Correlation
	232	static bool _performParametricBootstapCorrelation; // Correlation with simulation as correction
	233	static bool _usePosSpecificSimulations; // pos-specific simulation using startSimultePosteriorExpectationOfChange
	234	static bool _isAddSimulationsWithLowRate; // Correlation with simulation as correction
	235	static bool _isFDRcorrectionForPValInCorrelation; //
	236	static bool _isComputeQVals; // qVals are printed
	237	static MDOUBLE _pValueCutOffForBootStrap; //0.05, 0.01
	238	static bool _isConsiderNegativeCorrelations;
	239	static int _numOfBinsInParametricBootstrapSimulations;
	240	static bool _isDivideBinsByRange; // if true, each bin will get different number of samples, but the rate(Nmin) is eq-partitioned
	241	static bool _isSortVectorOfCorrelationsBinsByLowerRateBound; // it true, each pair pVal is computed according to all simulation with Nmin >= that of pair ()
	242	static bool _isSortVectorOfCorrelationsBinsByMidRateBound; // if ture, the bins are overlapping
	243	static MDOUBLE _relativeSizeOfOverLappedBins; // if 0.5, 50% of samples per bin
	244
	245	static bool _isPrintpairWiseCorrelationsAndNmin; // util, for statistics
	246	static bool _isPrintCorrelationsOfAllPairs_Corr; // Huge file
	247	static bool _isPrintCorrelationsOfAllPairs_pVal; // Huge file
	248
	249	static bool _isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH; // only pairs with PVal significant after BH will be printed
	250	static bool _isAllCorrTypeReqruiedToBeSignificant; // if true, sufficiet that one corType results with pVal>BH[corType] not to print
	251	static bool _isNminBasedOnCountBranchesOverCutOff; // if true, Nmin is based on numOfEvent>cutoff, not total expectation
	252	static MDOUBLE _minExpThresholdForPValComputationForCorrelatingPair; // 0, 2,3,..
	253	static bool _isUpdateMinExpThresholdGivenSimulaitonsQuantile; // After simulation, minR is defined by 0.25 quantile in simulation (updated only if higher)
	254	static bool _isUpdateMinExpThresholdGivenRealDataQuantile; // Given real data, minR is defined by the 0.1 percentile (updated only is higher)
	255	static MDOUBLE _updateMinExpThresholdGivenRealDataQuantileVal; // if 0.2, Nmin is for sites above the 0.2 percentile rate
	256
	257	static bool _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel; // After correlation of simulated data is computed minR is elevated to P(corr=1)<
	258	static bool _isCompExtremeValDistribution; // pValue is also estimated assuming EVD distribution
	259
	260	static MDOUBLE _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair; // e.g., if =2, with 500 species, minT = 10
	261
	262	static bool _isCorrelateWithPearson; // Pearson or Spearman's correlation computed for CoEvolution
	263	static bool _isCorrelateWithSpearman;
	264	static bool _isCorrelationsBasedOnMaxParsimonyMapping;
	265
	266	static bool _isAlsoCorrelateWithLoss; // additionally to gain, compute with loss vectors
	267	static bool _isAlsoCorrelateWithBoth; // additionally to gain and loss, compute with a gain . loss concatenated vectors
	268	static bool _isOnlyCorrelateWithBoth; // compute with a gain . loss concatenated vectors, only
	269	static bool _isUseRateForSiteAsNminForCorrelations;
	270	static bool _isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur; // Remove simulated position with too low/high occur to save later computation time (quick and (very)dirty)
	271	static bool _isRemoveSimulatedPositionsBasedOnMP; // Remove simulated positions with less than 2 events based on max parsimony (quick and dirty)
	272	static MDOUBLE _minNumOfMPEvent2RemoveSimulatedPositions; // If 1 then gain+loss events must be >=1
	273	static bool _isUpdateminNumOfMPEvent2RemoveSimulatedPositions; // If true, add 0.2 events for every sqrt(num Of species)
	274
	275
	276	static bool _printComputedCorrelationsAllSites; // all-against-all, in STRING format
	277	static string _selectedSitesForCorrelation; // in this file, for each position, the correlation with all other positions if computed.
	278	static bool _isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation; // the last is a trait (with possible unknown)
	279	static int _checkCoEvolWithUnionPAP_against_pos; // PAP will be modified to union (1 in either) with selected position
	280
	281	static bool _isIgnoreCorrelationAmongSelectedSites;
	282	static bool _isNormalizeForBranchExpInCorrCompute;
	283	static bool _isNormalizeByExpectationPerBranch; // else, by branch length
	284
	285
	286	static bool _printAncestralReconstructFullData; // huge file...
	287	static bool _printDEBUGinfo; // huge file...
	288	static bool _printLikelihoodLandscape; // test purpose (Ad-hoc)
	289	static MDOUBLE _likelihoodLandscapeIncrement;
	290	static bool _printP11forgain; // test purpose (Ad-hoc)
	291
	292	//################################################## optimizations
	293	static bool _isInitGainLossByEmpiricalFreq; // the sp is initialized with the empirical 0 and 1 freq
	294	static bool _isBBLEMwithSimpleSpBeforeFullOptimization; // before optimization - BBL-EM is performed with simplified sp
	295	static bool _isSkipFirstParamsOptimization;
	296	static bool _isOptimizeParamsWithLogMinMax; // when the parameter is a positive and values are e.g., [0.01,100] brent works better for [-2,2]
	297
	298
	299	static bool _performOptimizations;
	300	static bool _performOptimizationsBBL;
	301	static bool _performOptimizationsBBLOnlyOnce;
	302	static bool _isLongAndAccurateOptimization;
	303
	304	static bool _isBblLS;
	305	static bool _isbblLSWhenbblEMdontImprove;
	306	static bool _isSkipBblEMWhenbblEMdontImprove;
	307
	308	static bool _isBblEMbeforeLSWithMissSpecifiedModel;
	309
	310	static bool _isBblForceFactorCorrection;
	311	static MDOUBLE _BblFactorCorrection;
	312
	313	static bool _isOptimizeGainLossRatioInsteadOfGainAndLossSeperately;
	314	static bool _isOptimizeInvariantCategoryProb;
	315	static bool _isUpdateOnlyGainBetaForRatio; // currently, not in use
	316	static bool _isComputeLikelihoodDuringInit; // true, unless fast/parsimony run is performed
	317
	318	static bool _isMultipleAllBranchesByFactorAtStart;
	319	static bool _isNormalizeAtStart;
	320
	321	static bool _performOptimizationsROOT;
	322	static bool _performOptimizationsManyStarts;
	323	static bool _performOptimizationsBBLManyStarts;
	324	static bool _correctOptimizationEpsilon; // according to dataset size (initial likelihood)
	325	static bool _simulatedAnnealing; // epsilon is lowered with iterations
	326	static MDOUBLE _simulatedAnnealingMinEpsilonFactor; // to lower to normal epsilons (Model, BBL, Both)
	327	static MDOUBLE _simulatedAnnealingCoolingFactor; // to lower epsilons each iteration
	328
	329	static gammmaMixtureOptimizerAlgType _gammmaMixtureOptimizerAlg; // ONE_DIM or EM (not fully functional)
	330	static characterFreqEvalType _characterFreqEval; // "-F option" the estimation of freq at root: FiftyFifty, LeavesAve, optimizeOverTree
	331
	332	static bool _isStartWithTheta; // the optimization loop of the parameter will start with Theta
	333	static bool _isSkipGainOptimization; //
	334
	335	static MDOUBLE _epsilonOptimizationThetaFactor; // the optimization loop of the parameter will start with Theta
	336	static bool _isAlphaLimit; // 0.3 - for Alpha <<0.3, the following computations are erroneous [BUG?]
	337	static bool _isGainLimit; // 0.1 - for Gain <<0.1, the following computations are erroneous [BUG?]
	338	static bool _isHGT_normal_Pij; // test parameter -
	339	static bool _isHGT_with_Q; // test parameter -
	340	static bool _incrementFactorForGain; // test parameter -
	341	static bool _lossBiggerGainLimit; // test parameter -
	342	static MDOUBLE _slopeFactorForGain; // test parameter - limit growth in gain estimation
	343
	344	static optimizationLevel _optimizationLevel; // change all epsilons and related parameters
	345	static MDOUBLE _epsilonOptimizationIterationCycle; //if the log-likelihood after optimization is lower than this threshold - then optimize again.
	346	static MDOUBLE _epsilonOptimizationModel;
	347	static MDOUBLE _epsilonOptimizationBBL;
	348	static MDOUBLE _epsilonOptimizationIterationCycleManyStarts;
	349
	350	static MDOUBLE _epsilonFactor_Model;
	351	static MDOUBLE _epsilonFactor_BBL;
	352	static MDOUBLE _numIterationsFactor_Model;
	353	static MDOUBLE _numIterationsFactor_BBL;
	354
	355	static int _maxNumOfIterations; // over Model,Root, and BBL
	356	static int _maxNumOfIterationsModel;
	357	static int _maxNumOfIterationsBBL;
	358	static int _maxNumOfIterationsManyStarts; // the basic number of manyStarts option (Model and BBL factors are used)
	359
	360	static MDOUBLE _epsilonForReRootFactor; // only for substantial improvement the tree will be re-rooted
	361	static MDOUBLE _percentOfImprovManySarts; // epsilonOptimization = abs(logL)*_percentOfImprovManySarts
	362	static MDOUBLE _percentOfImprov; // epsilonOptimization = abs(logL)*_percentOfImprov
	363
	364	static bool _initParamsAtRandPoints;
	365	static bool _initParamsAtRandPointsInOptimization;
	366	static bool _initRandomGammaMixuteParam;
	367	static int _numberOfRandPointsInOptimization;
	368	static int _numberOfRandStartPoints;
	369
	370	// all the model parameters can be given by the user
	371	static MDOUBLE _userGainLossRatio;
	372	static bool _keepUserGainLossRatio;
	373	static MDOUBLE _userGain;
	374	static MDOUBLE _userLoss;
	375	static MDOUBLE _userTheta; // default 0.5 - otherwise, counting is done prior to optimization
	376	static MDOUBLE _userAlphaGain;
	377	static MDOUBLE _userBetaGain;
	378	static MDOUBLE _userProbInvariantGain;
	379	static MDOUBLE _userAlphaLoss;
	380	static MDOUBLE _userBetaLoss;
	381	static MDOUBLE _userProbInvariantLoss;
	382	static MDOUBLE _userAlphaRate;
	383	static MDOUBLE _userBetaRate;
	384	static MDOUBLE _userProbInvariantRate;
	385	static MDOUBLE _userRateInvariantVal; // The low (~10-8) value that corresponds to rate=0
	386
	387	// for initRand - Rand(x){min<x<max}
	388	static MDOUBLE _userGainMax;
	389	static MDOUBLE _userLossMax;
	390	static MDOUBLE _userThetaMax;
	391	static MDOUBLE _userAlphaGainMax;
	392	static MDOUBLE _userBetaGainMax;
	393	static MDOUBLE _userProbInvariantGainMax;
	394	static MDOUBLE _userAlphaLossMax;
	395	static MDOUBLE _userBetaLossMax;
	396	static MDOUBLE _userProbInvariantLossMax;
	397	static MDOUBLE _userAlphaRateMax;
	398	static MDOUBLE _userBetaRateMax;
	399	static MDOUBLE _userProbInvariantRateMax;
	400
	401	static MDOUBLE _userGainMin;
	402	static MDOUBLE _userLossMin;
	403	static MDOUBLE _userThetaMin;
	404	static MDOUBLE _userAlphaGainMin;
	405	static MDOUBLE _userBetaGainMin;
	406	static MDOUBLE _userProbInvariantGainMin;
	407	static MDOUBLE _userAlphaLossMin;
	408	static MDOUBLE _userBetaLossMin;
	409	static MDOUBLE _userProbInvariantLossMin;
	410	static MDOUBLE _userAlphaRateMin;
	411	static MDOUBLE _userBetaRateMin;
	412	static MDOUBLE _userProbInvariantRateMin;
	413
	414	//################################################## PostExp (Stochastic mapping based Counting)
	415	static int _numOfSimulationsForPotExp; // the counting (expectation) is based on simulations - val: >1000 - accurate enough
	416	//static MDOUBLE _probCutOffSum; // the cutOff to sum count (0.5) "ProbabilityPerPos.txt", "ProbabilityPerPosPerBranch.txt"
	417	static bool _isFewCutOffCounts; // Few Cut offs, not just one
	418	static MDOUBLE _probCutOffCounts; // the cutOff to estimate HGT count (0.6) "gainLossProbExpCountPerPos.txt"
	419	static MDOUBLE _probCutOffPrintEvent; // the cutOff for perPosperBranch (so that file is not too big) (0.05)
	420
	421
	422	//################################################## simulate PostExp (To test to accuracy of the stochastic mapping)
	423	static simulationType _simulationType; // {Uniform, Normal, Gamma, MPestEmp, SMestEmp}
	424	static bool _isMPratio;
	425	static int _numberOfPositions2simulate;
	426	static int _numberOfIterations2simulate;
	427	static int _numberOfIterationsForPrintResults; // if =3, each 3 simulation iterations, results are updated (thus, temp results are available)
	428	static MDOUBLE _percentileOfNminWithCorr1RequiredForLastIteration;
	429
	430	static bool _modelOptimizationSimPostExp;
	431	static bool _BBLOptimizationSimPostExp;
	432	static MDOUBLE _epsilonOptForPostExpSimFactor; // reduce optimization run-time in simulations
	433	static MDOUBLE _numOfIterationsOptForPostExpSimFactor; // reduce optimization run-time in simulations
	434	static MDOUBLE _loss2gainRatioToSim;
	435	static bool _isInitGainLossByEmpiricalFreqSimulatePostExp; // the sp is initialized with the empirical 0 and 1 freq
	436	static bool _is3states;
	437	static MDOUBLE _3statesGain;
	438	static MDOUBLE _3statesMore;
	439	static MDOUBLE _3statesLess;
	440	static MDOUBLE _3statesLoss;
	441	static MDOUBLE _3states0;
	442	static MDOUBLE _3states1;
	443
	444	//Used as.... enum simulationType {GAMMA, UNI, MP}
	445	static bool _isFlatTreeBeforOpt; // Flat the tree before model-based estimation
	446	static bool _isbBLEMwithSimpleSpSimulatePostExp;
	447	static MDOUBLE _noiseLevelInGammaSimulation;
	448	static bool _initParamsAtRandPointsInSimPostExp; // gain, loss rates are sampled uniform distribution
	449	static bool _isMatrixGainLossFromRatioInSimulations; //
	450
	451	static bool _initRootFreqAtRandPointsInSimPostExpEachPos; // not required
	452	static bool _isTheataFromObservedFreq; // The theta is taken from observed freq +random perturbation
	453	static bool _isRootFreqEQstationaryInSimulations;
	454	static bool _isFlatSpBeforeOpt; // need to change to T when performing initParamsFromTrueEstimation
	455
	456	//################################################## CoEvolvingSites
	457	static int _numberOfSequences2simulate;
	458	static int _numberOfSequences2simulateForCoEvol; // number of simulations used in the co-evoving computations val: >1000 - accurate enough
	459	static bool _useTheSameSpForSim;
	460	static bool _isReversibleSim;
	461	static distributionType _rateDistributionTypeSim;
	462	static bool _gainEQlossSim;
	463	static bool _writeSeqSim;
	464
	465	//################################################## Misc.
	466	static MDOUBLE _maxRateForML;
	467	static MDOUBLE _minBranchLength;
	468	static MDOUBLE _maxBranchLength;
	469
	470	static treeSearchAlgType _treeSearchAlg; // To construct tree from distanceTable (JC or others)
	471	static Vdouble* _weights; // positions are weighted (not in use)
	472	static bool _isSequenceUniqPattern;
	473	static bool _isRemovePositionsWithHighPercentOfMissingData;
	474	static MDOUBLE _fractionOfMissingDataToRemove;
	475
	476	static bool _isOnlyComputeLikelihood;
	477	static bool _isAnaliticComputeJumps;
	478	static bool _isNormalizeQ;
	479	static bool _isNormalizeQinSpVVec;
	480	static bool _isNormalizeQandTreeafterOpt;
	481	static bool _isFlatUserParameters;
	482	static bool _isAlphaEqBetaManipulation; // Turn GeneralGamma into Gamma -> Alpha=Beta
	483	static bool _calculeBranchLegthDiffFactorFromInputTrees; // input 2 trees - compute logL diff per branch length
	484	static bool _intersectTreeAndSeq; // input tree and seq (not the same taxa) - intersect, write seq and tree
	485
	486	static bool _isOnlyParsimony;
	487	static bool _calculeMaxParsimonyChange;
	488	static bool _calculeMaxParsimonyChangeSeveralGainLossRatios;
	489	static string _costMatrixfile;
	490	static costMatrixType _costMatrixType;
	491	static MDOUBLE _costMatrixGainLossRatio;
	492
	493	private:
	494	static ostream* _outPtr;
	495	//static ofstream _out_f;
	496
	497	};
	498	#endif

+61

-0

programs/gainLoss/gainLossProject.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLoss.h"
	17	#include "computePosteriorExpectationOfChange.h"
	18	#include "gammaDistributionFixedCategories.h"
	19	#include "mixtureDistribution.h"
	20	#include "gainLossOptions.h"
	21	#include "Parameters.h"
	22
	23	using namespace std;
	24	int mainRunOptimize();
	25
	26	int main(int argc, char **argv){
	27
	28	int pp = getpid();
	29	time_t t1,t2;
	30	time(&t1);
	31	if (argc == 1) {
	32	printHelp();// here the -h option will be printed
	33	exit(0);
	34	}
	35	long seed = static_cast<long>(t1) * pp;
	36	talRandom::setSeed(seed); // set 1 for debug
	37	string paramStr = argv[1];
	38	gainLossOptions::initOptions(paramStr);
	39
	40	myLog::setLog(gainLossOptions::_logFile, gainLossOptions::_logValue);
	41	LOG(4,<<"# Process_id= "<<getpid()<<endl);
	42	LOG(6,<<"# the seed = " <<seed<<endl);
	43	Parameters::dump(cout);
	44	//enables other options...
	45	//string mainType = Parameters::getString("mainType");
	46	//if (mainType == "mainRunOptimize")
	47	mainRunOptimize();
	48
	49	time(&t2);
	50	LOGnOUT(4,<<endl<<"TOTAL RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	51	return 0;
	52	}
	53
	54	/********************************************************************************************
	55	*********************************************************************************************/
	56	int mainRunOptimize(){
	57	gainLoss gl;
	58	gl.run();
	59	return 0;
	60	}

+1043

-0

programs/gainLoss/gainLossUtils.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "gainLossUtils.h"
	17	#include "gainLossOptions.h"
	18	#include "gainLossModel.h"
	19	#include "gammaDistributionPlusInvariant.h"
	20	#include "Parameters.h"
	21	#include <cmath>
	22
	23	/********************************************************************************************
	24	*********************************************************************************************/
	25	void printProgramInfo(){
	26	LOGnOUT(4,<<"+=============================================================+"<<endl);
	27	LOGnOUT(4,<<"+ The gainLoss project: "<<endl);
	28	LOGnOUT(4,<<"+ Analysis of Phyletic Patterns in a Likelihood Framework "<<endl);
	29	LOGnOUT(4,<<"+ "<<PROG_INFO<<" "<<endl);
	30	LOGnOUT(4,<<"+ Ofir Cohen - ofircohe@tau.ac.il "<<endl);
	31	LOGnOUT(4,<<"+=============================================================+"<<endl);
	32	}
	33	/********************************************************************************************
	34	*********************************************************************************************/
	35
	36	void printTree (tree &tr, string treeFile){
	37	ofstream treeINodesStream(treeFile.c_str());
	38	printTree(tr,treeINodesStream); //@ gainLoss - the branch lengths are lost
	39	}
	40	void printTree (tree &tr){
	41	string treeINodes_st = gainLossOptions::_outDir + "//" + "TheTree.INodes.ph";
	42	ofstream treeINodesStream(treeINodes_st.c_str());
	43	printTree(tr,treeINodesStream); //Note: @ gainLoss method - the branch lengths are lost
	44
	45	//string tree_st = gainLossOptions::_outDir + "//" + "TheTree.ph";
	46	//ofstream treeStream(tree_st.c_str());
	47	ofstream treeStream(gainLossOptions::_treeOutFile.c_str());
	48	tr.output(treeStream);
	49	treeStream.close();
	50	}
	51
	52	void printTree (tree &tr,ostream &out){
	53	vector<tree::nodeP> vAllNodes;
	54	tr.getAllNodes(vAllNodes,tr.getRoot());
	55	Vstring Vnames(vAllNodes.size());
	56	for (int i = 0; i<vAllNodes.size();++i)
	57	Vnames[vAllNodes[i]->id()] = vAllNodes[i]->name();
	58	printTreeWithValuesAsBP(out,tr,Vnames);
	59	out<<endl;
	60	}
	61
	62	/********************************************************************************************
	63	*********************************************************************************************/
	64	void printTreeWithValuesAsBP(ostream &out, tree &tr, Vstring values, VVVdouble *probs, bool printGains) {
	65	printTreeWithValuesAsBP(out,tr.getRoot(), values,probs);
	66	out<<"["<<values[tr.getRoot()->id()]<<"];";
	67	}
	68	void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, bool printGains) {
	69	if (myNode->isLeaf()) {
	70	out<< myNode->name();
	71	if(probs)
	72	if (printGains)
	73	out<<"_P_"<<(*probs)[myNode->id()][0][1];
	74	else //print losses
	75	out<<"_P_"<<(*probs)[myNode->id()][1][0];
	76	out<< ":"<<myNode->dis2father();
	77	return;
	78	} else {
	79	out <<"(";
	80	for (int i=0;i<myNode->getNumberOfSons();++i) {
	81	if (i>0) out <<",";
	82	printTreeWithValuesAsBP(out, myNode->getSon(i), values,probs);
	83	}
	84	out <<")";
	85	if (myNode->isRoot()==false) {
	86	out<< myNode->name();
	87	if(probs)
	88	if (printGains)
	89	out<<"_P_"<<(*probs)[myNode->id()][0][1];
	90	else //print losses
	91	out<<"_P_"<<(*probs)[myNode->id()][1][0];
	92	out<< ":"<<myNode->dis2father();
	93	// out << "["<<values[myNode->id()]<<"]";
	94	}
	95	}
	96	}
	97
	98	/********************************************************************************************
	99	used For AncestralRec
	100	*********************************************************************************************/
	101	void printTreeStatesAsBPValues(ostream &out, Vint &states, tree &tr,
	102	VVVdouble *probs,bool printGains) {
	103	printTreeStatesAsBPValues(out,states, tr.getRoot(), probs);
	104	out<<"["<<(tr.getRoot())->name()<<"-"<<states[(tr.getRoot())->id()]<<"];";
	105	}
	106
	107	void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode, VVVdouble *probs,bool printGains) {
	108	if (myNode->isLeaf()) {
	109	out << myNode->name()<<"-"<<states[myNode->id()]<< ":"<<myNode->dis2father();
	110	return;
	111	} else {
	112	out <<"(";
	113	for (int i=0;i<myNode->getNumberOfSons();++i) {
	114	if (i>0) out <<",";
	115	printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs);
	116	}
	117	out <<")";
	118	if (myNode->isRoot()==false) {
	119	out.precision(3);
	120	if (probs){
	121	if (printGains)
	122	out<<(probs)[myNode->id()][0][2]<<"//"<<(probs)[myNode->id()][1][2];
	123	else //print losses
	124	out<<(probs)[myNode->id()][2][0]<<"//"<<(probs)[myNode->id()][2][1];
	125	}
	126	out << "["<<myNode->name()<<"-"<<states[myNode->id()]<<"]";
	127	out<<":"<<myNode->dis2father();
	128	}
	129	}
	130	}
	131
	132	/********************************************************************************************
	133	used For AncestralRec - Double (posterior size)
	134	*********************************************************************************************/
	135	void printTreeStatesAsBPValues(ostream &out, Vdouble &states, tree &tr,
	136	VVVdouble *probs, bool printGains)
	137	{
	138	printTreeStatesAsBPValues(out,states, tr.getRoot(), probs);
	139	out<<"["<<(tr.getRoot())->name()<<"-"<<states[(tr.getRoot())->id()]<<"];";
	140	}
	141
	142	void printTreeStatesAsBPValues(ostream &out, Vdouble &states, const tree::nodeP &myNode,
	143	VVVdouble *probs,bool printGains)
	144	{
	145	if (myNode->isLeaf()) {
	146	out << myNode->name()<<"-"<<states[myNode->id()]<< ":"<<myNode->dis2father();
	147	return;
	148	} else {
	149	out <<"(";
	150	for (int i=0;i<myNode->getNumberOfSons();++i) {
	151	if (i>0) out <<",";
	152	printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs);
	153	}
	154	out <<")";
	155	if (myNode->isRoot()==false) {
	156	out.precision(3);
	157	if (probs){
	158	if (printGains)
	159	out<<(probs)[myNode->id()][0][2]<<"//"<<(probs)[myNode->id()][1][2];
	160	else //print losses
	161	out<<(probs)[myNode->id()][2][0]<<"//"<<(probs)[myNode->id()][2][1];
	162	}
	163	out << "["<<myNode->name()<<"-"<<states[myNode->id()]<<"]";
	164	out<<":"<<myNode->dis2father();
	165	}
	166	}
	167	}
	168
	169	/********************************************************************************************
	170	*********************************************************************************************/
	171	MDOUBLE factorial (MDOUBLE num){
	172	if (num==1)
	173	return 1.0;
	174	return factorial(num-1)*num;
	175	}
	176
	177
	178	/********************************************************************************************
	179	*********************************************************************************************/
	180	MDOUBLE getRateAlpha(distribution* dist)
	181	{
	182	MDOUBLE res;
	183	//switch (gainLossOptions::_rateDistributionType)
	184	//{
	185	//case (gainLossOptions::GAMMA_PLUS_INV):
	186	// res = static_cast<gammaDistributionPlusInvariant*>(dist)->getAlpha();
	187	// break;
	188	//case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):
	189	// res = static_cast<generalGammaDistributionPlusInvariant*>(dist)->getAlpha();
	190	// break;
	191	//case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	192	// res = static_cast<gammaDistributionFixedCategories*>(dist)->getAlpha();
	193	// break;
	194	//case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	195	// res = static_cast<generalGammaDistributionFixedCategories*>(dist)->getAlpha();
	196	// break;
	197	//case (gainLossOptions::GENERAL_GAMMA):
	198	// res = static_cast<generalGammaDistribution*>(dist)->getAlpha();
	199	// break;
	200	//case (gainLossOptions::GAMMA):
	201	// res = static_cast<gammaDistribution*>(dist)->getAlpha();
	202	// break;
	203	//default:
	204	// errorMsg::reportError("unknown type in gainLossOptions::getDistributionType");
	205	//}
	206	if(dynamic_cast<gammaDistributionPlusInvariant*>(dist)){
	207	res = static_cast<gammaDistributionPlusInvariant*>(dist)->getAlpha();
	208	}
	209	else if(dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)){
	210	res = static_cast<generalGammaDistributionPlusInvariant*>(dist)->getAlpha();
	211	}
	212	else if (dynamic_cast<gammaDistributionFixedCategories*>(dist)){
	213	res = static_cast<gammaDistributionFixedCategories*>(dist)->getAlpha();
	214	}
	215	else if (dynamic_cast<generalGammaDistributionFixedCategories*>(dist)){
	216	res = static_cast<generalGammaDistributionFixedCategories*>(dist)->getAlpha();
	217	}
	218	else if (dynamic_cast<generalGammaDistribution*>(dist)){
	219	res = static_cast<generalGammaDistribution*>(dist)->getAlpha();
	220	}
	221	else if (dynamic_cast<gammaDistribution*>(dist)){
	222	res = static_cast<gammaDistribution*>(dist)->getAlpha();
	223	}
	224	else{
	225	LOGnOUT(4,<<"unknown type in gainLossOptions::getDistributionType, zero is filled for Alpha\n");
	226	res = 0;
	227	}
	228	return res;
	229	}
	230	/********************************************************************************************
	231	*********************************************************************************************/
	232	void setRateAlpha(distribution* dist, MDOUBLE paramAlpha)
	233	{
	234	//switch (gainLossOptions::_rateDistributionType)
	235	//{
	236	//case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):
	237	// static_cast<generalGammaDistributionPlusInvariant*>(dist)->setAlpha(paramAlpha);
	238	// break;
	239	//case (gainLossOptions::GAMMA_PLUS_INV):
	240	// static_cast<gammaDistributionPlusInvariant*>(dist)->setAlpha(paramAlpha);
	241	// break;
	242	//case (gainLossOptions::GAMMA_FIXED_CATEGORIES):
	243	// static_cast<gammaDistributionFixedCategories*>(dist)->setAlpha(paramAlpha);
	244	// break;
	245	//case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	246	// static_cast<generalGammaDistributionFixedCategories*>(dist)->setAlpha(paramAlpha);
	247	// break;
	248	//case (gainLossOptions::GENERAL_GAMMA):
	249	// static_cast<generalGammaDistribution*>(dist)->setAlpha(paramAlpha);
	250	// break;
	251	//case (gainLossOptions::GAMMA):
	252	// static_cast<gammaDistribution*>(dist)->setAlpha(paramAlpha);
	253	// break;
	254	//default:
	255	// errorMsg::reportError("unknown type in distributionType");
	256	//}
	257	if (dynamic_cast<gammaDistributionPlusInvariant*>(dist)){
	258	static_cast<gammaDistributionPlusInvariant*>(dist)->setAlpha(paramAlpha);
	259	}
	260	else if (dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)){
	261	static_cast<generalGammaDistributionPlusInvariant*>(dist)->setAlpha(paramAlpha);
	262	}
	263	else if (dynamic_cast<generalGammaDistributionFixedCategories*>(dist)){
	264	static_cast<generalGammaDistributionFixedCategories*>(dist)->setAlpha(paramAlpha);
	265	}
	266	else if (dynamic_cast<gammaDistributionFixedCategories*>(dist)){
	267	static_cast<gammaDistributionFixedCategories*>(dist)->setAlpha(paramAlpha);
	268	}
	269	else if (dynamic_cast<gammaDistribution*>(dist)){
	270	static_cast<gammaDistribution*>(dist)->setAlpha(paramAlpha);
	271	}
	272	else if (dynamic_cast<generalGammaDistribution*>(dist)){
	273	static_cast<generalGammaDistribution*>(dist)->setAlpha(paramAlpha);
	274	}
	275	else{
	276	errorMsg::reportError("unknown type in distributionType");
	277	}
	278
	279	}
	280	/********************************************************************************************
	281	*********************************************************************************************/
	282	MDOUBLE getRateBeta(distribution* dist)
	283	{
	284	MDOUBLE res;
	285	//switch (gainLossOptions::_rateDistributionType)
	286	//{
	287	//case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):
	288	// res = static_cast<generalGammaDistributionPlusInvariant*>(dist)->getBeta();
	289	// break;
	290	//case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	291	// res = static_cast<generalGammaDistributionFixedCategories*>(dist)->getBeta();
	292	// break;
	293	//case (gainLossOptions::GENERAL_GAMMA):
	294	// res = static_cast<generalGammaDistribution*>(dist)->getBeta();
	295	// break;
	296	//default:
	297	// errorMsg::reportError("unknown type in gainLossOptions::getDistributionType");
	298	//}
	299	if(dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)){
	300	res = static_cast<generalGammaDistributionPlusInvariant*>(dist)->getBeta();
	301	}
	302	else if(dynamic_cast<generalGammaDistributionFixedCategories*>(dist)){
	303	res = static_cast<generalGammaDistributionFixedCategories*>(dist)->getBeta();
	304	}
	305	else if (dynamic_cast<generalGammaDistribution*>(dist)){
	306	res = static_cast<generalGammaDistribution*>(dist)->getBeta();
	307	}
	308	else{
	309	errorMsg::reportError("unknown type in gainLossOptions::getDistributionType");
	310	}
	311	return res;
	312	}
	313	/********************************************************************************************
	314	*********************************************************************************************/
	315	void setRateBeta(distribution* dist, MDOUBLE paramBeta)
	316	{
	317	//switch (gainLossOptions::_rateDistributionType)
	318	//{
	319	//case (gainLossOptions::GENERAL_GAMMA_PLUS_INV):
	320	// static_cast<generalGammaDistributionPlusInvariant*>(dist)->setBeta(paramBeta);
	321	// break;
	322	//case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES):
	323	// static_cast<generalGammaDistributionFixedCategories*>(dist)->setBeta(paramBeta);
	324	// break;
	325	//case (gainLossOptions::GENERAL_GAMMA):
	326	// static_cast<generalGammaDistribution*>(dist)->setBeta(paramBeta);
	327	// break;
	328	//default:
	329	// errorMsg::reportError("unknown type in distributionType");
	330	//}
	331
	332	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)){
	333	static_cast<generalGammaDistributionPlusInvariant*>(dist)->setBeta(paramBeta);
	334	}
	335	else if (dynamic_cast<generalGammaDistributionFixedCategories*>(dist)){
	336	static_cast<generalGammaDistributionFixedCategories*>(dist)->setBeta(paramBeta);
	337	}
	338	else if (dynamic_cast<generalGammaDistribution*>(dist)){
	339	static_cast<generalGammaDistribution*>(dist)->setBeta(paramBeta);
	340	}
	341	else{
	342	errorMsg::reportError("unknown type in distributionType");
	343	}
	344
	345	}
	346	/********************************************************************************************
	347	*********************************************************************************************/
	348	bool isAlphaOptimization(distribution* dist)
	349	{
	350	if ((dynamic_cast<gammaDistribution*>(dist)) \|\|
	351	(dynamic_cast<generalGammaDistribution*>(dist)) \|\|
	352	(dynamic_cast<gammaDistributionFixedCategories*>(dist)) \|\|
	353	(dynamic_cast<gammaDistributionPlusInvariant*>(dist)) \|\|
	354	(dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)) \|\|
	355	(dynamic_cast<generalGammaDistributionFixedCategories*>(dist)) )
	356	return true;
	357	else
	358	return false;
	359	}
	360	/********************************************************************************************
	361	*********************************************************************************************/
	362	bool isBetaOptimization(distribution* dist)
	363	{
	364	if( ((dynamic_cast<generalGammaDistribution*>(dist)) \|\|
	365	(dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)) \|\|
	366	(dynamic_cast<generalGammaDistributionFixedCategories*>(dist)) )
	367	&&
	368	!( (dynamic_cast<gammaDistributionFixedCategories*>(dist)) \|\|
	369	(dynamic_cast<gammaDistribution*>(dist)) \|\|
	370	(dynamic_cast<gammaDistributionPlusInvariant*>(dist)) ) )
	371	return true;
	372	else
	373	return false;
	374	}
	375	/********************************************************************************************
	376	*********************************************************************************************/
	377	bool isMixOptimization(distribution* dist)
	378	{
	379	if (dynamic_cast<mixtureDistribution*>(dist) )
	380	return true;
	381	else
	382	return false;
	383	}
	384	/********************************************************************************************
	385	*********************************************************************************************/
	386	bool isInvariantOptimization(distribution* dist, bool onlyForPrintVal)
	387	{
	388	bool isInvariantDist = false;
	389	if (! Parameters::getInt("_isOptimizeInvariantCategoryProb") && !onlyForPrintVal )
	390	return false;
	391
	392	if ( (dynamic_cast<generalGammaDistributionPlusInvariant*>(dist)) \|\|
	393	(dynamic_cast<gammaDistributionPlusInvariant*>(dist)) )
	394	isInvariantDist =true;
	395
	396	return isInvariantDist;
	397	}
	398	/********************************************************************************************
	399	*********************************************************************************************/
	400	bool isThetaOptimization()
	401	{
	402	if (gainLossOptions::_characterFreqEval==gainLossOptions::optimizeOverTree && !gainLossOptions::_isRootFreqEQstationary)
	403	return true;
	404	else
	405	return false;
	406	}
	407
	408	/********************************************************************************************
	409	*********************************************************************************************/
	410	void printHelp(){
	411	cout <<"+-------------------------------------------+"<<endl;
	412	cout<<"+ The gainLoss project: "<<endl;
	413	cout<<"+ Analysis of Phyletic Patterns in a Likelihood Framework "<<endl;
	414	cout<<"+ "<<PROG_INFO<<" "<<endl;
	415	cout<<"+ Ofir Cohen - ofircohe@tau.ac.il "<<endl;
	416	cout <<"use a parameter file with these options: "<<endl;
	417	cout <<"+-------------------------------------------+"<<endl;
	418	cout <<"_seqFile "<<endl;
	419	cout <<"\|-------------------------------------------\|"<<endl;
	420	cout <<"_treeFile "<<endl;
	421	cout <<"\|------------------------------------------\|"<<endl;
	422	cout <<"_rootAt "<<endl;
	423	cout <<"_logFile "<<endl;
	424	cout <<"_logValue "<<endl;
	425	cout <<"_referenceSeq "<<endl;
	426	cout <<"_outDir "<<endl;
	427	cout <<"_outFile "<<endl;
	428	cout <<"_treeOutFile "<<endl;
	429	cout <<"_numberOfGainCategories "<<endl;
	430	cout <<"_numberOfLossCategories "<<endl;
	431	cout <<"_numberOfRateCategories "<<endl;
	432	cout <<"_maxNumOfIterationsModel "<<endl;
	433	cout <<"_epsilonOptimizationModel "<<endl;
	434	cout <<"_maxNumOfIterationsBBL "<<endl;
	435	cout <<"_epsilonOptimizationBBL "<<endl;
	436	cout <<"_epsilonOptimization "<<endl;
	437	cout <<"\|------------------------------------------\|"<<endl;
	438	cout <<"_gainLossDist "<<endl;
	439	cout <<"_calculateRate4site "<<endl;
	440	cout <<"_calculeGainLoss4site "<<endl;
	441	cout <<"_printTree "<<endl;
	442	cout <<"_printPij_t "<<endl;
	443	cout <<"_printLofPos "<<endl;
	444	cout <<"_performOptimizations "<<endl;
	445	cout <<"_isHGT_normal_Pij "<<endl;
	446	cout <<"_isReversible "<<endl;
	447	cout <<"...(a very partial list) "<<endl;
	448	cout <<"+------------------------------------------+"<<endl;
	449
	450	}
	451
	452	/********************************************************************************************
	453	*********************************************************************************************/
	454	void updateGainAlpha(MDOUBLE param,
	455	vector<vector<stochasticProcess*> >& spVVec,
	456	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	457	{
	458	bool isReversible = spVVec[0][0]->isReversible();
	459	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(gainDist))
	460	static_cast<generalGammaDistributionPlusInvariant*>(gainDist)->setAlpha(param);
	461	else
	462	static_cast<generalGammaDistribution*>(gainDist)->setAlpha(param);
	463
	464	int numOfSPs = gainDist->categories()*lossDist->categories();
	465	for (int i=0; i < numOfSPs; ++i) {
	466	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	467	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	468	static_cast<gainLossModel*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu1(gainDist->rates(gainIndex),isReversible);
	469	}
	470	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	471	normalizeQ(spVVec, gainDist, lossDist);
	472	}
	473	/********************************************************************************************
	474	*********************************************************************************************/
	475	void updateGainBeta(MDOUBLE param,
	476	vector<vector<stochasticProcess*> >& spVVec,
	477	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	478	{
	479	bool isReversible = spVVec[0][0]->isReversible();
	480	MDOUBLE normFactor;
	481
	482	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(gainDist))
	483	static_cast<generalGammaDistributionPlusInvariant*>(gainDist)->setBeta(param);
	484	else
	485	static_cast<generalGammaDistribution*>(gainDist)->setBeta(param);
	486
	487	int numOfSPs = gainDist->categories()*lossDist->categories();
	488	for (int i=0; i < numOfSPs; ++i) {
	489	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	490	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	491	static_cast<gainLossModel*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu1(gainDist->rates(gainIndex),isReversible);
	492	}
	493	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	494	normFactor = normalizeQ(spVVec, gainDist, lossDist);
	495
	496	}
	497	/********************************************************************************************
	498	*********************************************************************************************/
	499	void updateGainProbInvariant(MDOUBLE param, distribution* gainDist)
	500	{
	501	static_cast<generalGammaDistributionPlusInvariant*>(gainDist)->setInvProb(param);
	502	}
	503
	504
	505	/********************************************************************************************
	506	*********************************************************************************************/
	507	void updateLossAlpha(MDOUBLE param,
	508	vector<vector<stochasticProcess*> >& spVVec,
	509	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	510	{
	511
	512	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(lossDist))
	513	static_cast<generalGammaDistributionPlusInvariant*>(lossDist)->setAlpha(param);
	514	else
	515	static_cast<generalGammaDistribution*>(lossDist)->setAlpha(param);
	516	int numOfSPs = gainDist->categories()*lossDist->categories();
	517	for (int i=0; i < numOfSPs; ++i) {
	518	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	519	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	520	static_cast<gainLossModelNonReversible*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu2(lossDist->rates(lossIndex));
	521	}
	522	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	523	normalizeQ(spVVec, gainDist, lossDist);
	524	}
	525	/********************************************************************************************
	526	*********************************************************************************************/
	527	void updateLossBeta(MDOUBLE param,
	528	vector<vector<stochasticProcess*> >& spVVec,
	529	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	530	{
	531
	532	if (dynamic_cast<generalGammaDistributionPlusInvariant*>(gainDist))
	533	static_cast<generalGammaDistributionPlusInvariant*>(lossDist)->setBeta(param);
	534	else
	535	static_cast<generalGammaDistribution*>(lossDist)->setBeta(param);
	536	int numOfSPs = gainDist->categories()*lossDist->categories();
	537	for (int i=0; i < numOfSPs; ++i) {
	538	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	539	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	540	static_cast<gainLossModelNonReversible*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu2(lossDist->rates(lossIndex));
	541	}
	542	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	543	normalizeQ(spVVec, gainDist, lossDist);
	544	}
	545	/********************************************************************************************
	546	*********************************************************************************************/
	547	void updateLossProbInvariant(MDOUBLE param, distribution* lossDist)
	548	{
	549	static_cast<generalGammaDistributionPlusInvariant*>(lossDist)->setInvProb(param);
	550	}
	551
	552
	553	/********************************************************************************************
	554	*********************************************************************************************/
	555	void updateRateAlpha(MDOUBLE param,
	556	vector<vector<stochasticProcess*> >& spVVec,
	557	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	558	{
	559	int numOfSPs = gainDist->categories()*lossDist->categories();
	560	for (int i=0; i < numOfSPs; ++i) {
	561	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	562	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	563	setRateAlpha(spVVec[gainIndex][lossIndex]->distr(), param);
	564	//static_cast<gammaDistribution*>(spVVec[gainIndex][lossIndex]->distr())->setAlpha(param);
	565	}
	566	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	567	normalizeQ(spVVec, gainDist, lossDist);
	568	}
	569	/********************************************************************************************
	570	*********************************************************************************************/
	571	void updateRateProbInvariant(MDOUBLE param,
	572	vector<vector<stochasticProcess*> >& spVVec,
	573	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	574	{
	575	int numOfSPs = gainDist->categories()*lossDist->categories();
	576	for (int i=0; i < numOfSPs; ++i) {
	577	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	578	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	579	static_cast<generalGammaDistributionPlusInvariant*>(spVVec[gainIndex][lossIndex]->distr())->setInvProb(param);
	580	}
	581	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	582	normalizeQ(spVVec, gainDist, lossDist);
	583	}
	584	/********************************************************************************************
	585	*********************************************************************************************/
	586	void updateTheta(MDOUBLE param,
	587	vector<vector<stochasticProcess*> >& spVVec,
	588	distribution * gainDist, distribution * lossDist, bool isNormalizeQ)
	589	{
	590	int numOfSPs = gainDist->categories()*lossDist->categories();
	591	for (int i=0; i < numOfSPs; ++i) {
	592	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	593	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	594	(static_cast<gainLossModel*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->setTheta(param);
	595	}
	596	if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ)
	597	normalizeQ(spVVec, gainDist, lossDist);
	598	}
	599
	600	/********************************************************************************************
	601	*********************************************************************************************/
	602	void cloneSpVVec(vector<vector<stochasticProcess> >& spVVec, vector<vector<stochasticProcess> >& neWspVVec){
	603
	604	neWspVVec.resize(spVVec.size());
	605	for (int gainCategor=0; gainCategor<spVVec.size(); gainCategor++){
	606	neWspVVec[gainCategor].resize(spVVec[0].size());
	607	for (int lossCategor=0; lossCategor<spVVec[0].size(); lossCategor++){
	608	neWspVVec[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone();
	609	}
	610	}
	611	}
	612
	613	/********************************************************************************************
	614	*********************************************************************************************/
	615	void deleteSpVVec(vector<vector<stochasticProcess> > spVVec_p){
	616
	617	if(spVVec_p){
	618	for (int gainCategor=0; gainCategor<spVVec_p->size(); gainCategor++){
	619	for (int lossCategor=0; lossCategor<(*spVVec_p)[0].size(); lossCategor++){
	620	delete (*spVVec_p)[gainCategor][lossCategor];
	621	}
	622	}
	623	}
	624	}
	625
	626
	627	/********************************************************************************************
	628	*********************************************************************************************/
	629	void clearVVVV(VVVVdouble& vetor){
	630	for (int i=0;i<vetor.size();++i){
	631	if(vetor.size()==0)
	632	break;
	633	for (int j=0;j<vetor[i].size();++j){
	634	if(vetor[i].size()==0)
	635	break;
	636	for (int k=0;j<vetor[i][j].size();++k){
	637	if(vetor[i][j].size()==0)
	638	break;
	639	if(vetor[i][j][k].size()==0)
	640	break;
	641	vetor[i][j][k].clear();
	642	}
	643	vetor[i][j].clear();
	644	}
	645	vetor[i].clear();
	646	}
	647	vetor.clear();
	648	}
	649
	650	/********************************************************************************************
	651	*********************************************************************************************/
	652	void clearVVV(VVVdouble& vetor){
	653	for (int i=0;i<vetor.size();++i){
	654	if(vetor.size()==0)
	655	break;
	656	for (int j=0;j<vetor[i].size();++j){
	657	if(vetor[i].size()==0)
	658	break;
	659	vetor[i][j].clear();
	660	}
	661	vetor[i].clear();
	662	}
	663	vetor.clear();
	664	}
	665
	666
	667	/********************************************************************************************
	668	*********************************************************************************************/
	669	void resizeVVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor){
	670
	671	vetor.resize(dim1);
	672	for (int posNum=0;posNum<vetor.size();++posNum){
	673	vetor[posNum].resize(dim2);
	674	for (int n=0;n<vetor[posNum].size();++n){
	675	resizeMatrix(vetor[posNum][n],dim3,dim4);
	676	}
	677	}
	678	}
	679	/********************************************************************************************
	680	*********************************************************************************************/
	681	void resizeVVV(int dim1, int dim2, int dim3, VVVdouble& vetor){
	682	vetor.resize(dim1);
	683	for (int n=0;n<vetor.size();++n){
	684	resizeMatrix(vetor[n],dim2,dim3);
	685	}
	686	}
	687
	688	///********************************************************************************************
	689	//*********************************************************************************************/
	690	//MDOUBLE getDistance2ROOT(const tree::nodeP &myNode){
	691	// if(myNode->isRoot())
	692	// return 0.0;
	693	// else
	694	// return ( myNode->dis2father() + getDistance2ROOT(myNode->father()) );
	695	//}
	696	///********************************************************************************************
	697	//getMinimalDistance2OTU()
	698	//This implementation is only for binary trees.
	699	//Can easily be generalized to arbitrary number of sons.
	700	//*********************************************************************************************/
	701	//MDOUBLE getMinimalDistance2OTU(const tree::nodeP &myNode){
	702	// if(myNode->isLeaf())
	703	// return 0.0;
	704	// else{
	705	// if(myNode->getNumberOfSons()>2)
	706	// LOGnOUT(3,<<" ERROR: getMinimalDistance2OTU is only for binary trees, and this node "
	707	// <<myNode->name()<<" is with "<<myNode->getNumberOfSons()<<"sons.\n The return value is only for first 2 sons\n");
	708	//
	709	// return ( min(
	710	// myNode->getSon(0)->dis2father() + getMinimalDistance2OTU(myNode->getSon(0)),
	711	// myNode->getSon(1)->dis2father() + getMinimalDistance2OTU(myNode->getSon(1))
	712	// ) );
	713	//
	714	// }
	715	//}
	716
	717
	718
	719	/********************************************************************************************
	720	*********************************************************************************************/
	721	void fillVnames(Vstring& Vnames,const tree& tr){
	722	vector<tree::nodeP> vAllNodes;
	723	tr.getAllNodes(vAllNodes,tr.getRoot());
	724	Vnames.resize(vAllNodes.size());
	725	for (int i = 0; i<vAllNodes.size();++i)
	726	Vnames[vAllNodes[i]->id()] = vAllNodes[i]->name();
	727	}
	728	/********************************************************************************************
	729	*********************************************************************************************/
	730	void P11forgain(ostream& out) {
	731	string P11forgain = gainLossOptions::_outDir + "//" + "P11forgain.txt";
	732	ofstream P11forgainStream(P11forgain.c_str());
	733	P11forgainStream.precision(PRECISION);
	734
	735	MDOUBLE loss = 0.0;
	736	MDOUBLE dist = 0.3;
	737	MDOUBLE increment = 0.1;
	738	P11forgainStream <<"gain"<<"\t"<<"loss"<<"\t"<<"dist"<<"\t"<<"P11"<<endl;
	739	for (int ind = 1; ind<10000; ind++){
	740	MDOUBLE gain = ind*increment;
	741	MDOUBLE eigenvalue = -(gain + loss);
	742	MDOUBLE P11 = gain/(-eigenvalue) + exp(eigenvaluedist)(1 - gain/(-eigenvalue));
	743	P11forgainStream <<gain<<"\t"<<loss<<"\t"<<dist<<"\t"<<P11<<endl;
	744	}
	745	}
	746
	747
	748	/********************************************************************************************
	749	normalize the Q matrix so average rate of substitution = 1
	750	*********************************************************************************************/
	751	MDOUBLE normalizeQ(vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist){
	752	MDOUBLE sumPijQij=0.0;
	753	MDOUBLE scale;
	754
	755	//int numOfSPs = gainDist->categories()*lossDist->categories();
	756	//for (int i=0; i < numOfSPs; ++i) {
	757	// int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	758	// int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	759	// sumPijQij+=gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex)
	760	// (static_cast<gainLossModel>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	761	//}
	762	//if (sumPijQij ==0){
	763	// errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0");
	764	//}
	765	sumPijQij = sumPijQijVec(spVVec, gainDist, lossDist);
	766	scale = (1.0 / sumPijQij);
	767	normVec(scale, spVVec, gainDist, lossDist);
	768	//for (int i=0; i < numOfSPs; ++i) {
	769	// int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	770	// int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	771	// (static_cast<gainLossModel*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->norm(scale);
	772	//}
	773	////MDOUBLE AlphaGainLossRatio = getRateAlpha(gainDist)/getRateAlpha(lossDist);
	774	//MDOUBLE newGainBeta = getRateBeta(gainDist)/scale;
	775	//updateGainBeta(newGainBeta,spVVec,gainDist,lossDist,false); // BUG fixed. If only Q matrices are corrected -> problem
	776	//MDOUBLE newLossBeta = getRateBeta(lossDist)/scale;
	777	//updateLossBeta(newLossBeta,spVVec,gainDist,lossDist,false);
	778	return sumPijQij;
	779	}
	780
	781
	782	/********************************************************************************************
	783	normalize the Q matrix so average rate of substitution = 1
	784	*********************************************************************************************/
	785	MDOUBLE sumPijQijVec(vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist){
	786	MDOUBLE sumPijQij=0.0;
	787	MDOUBLE scale;
	788
	789	int numOfSPs = gainDist->categories()*lossDist->categories();
	790	for (int i=0; i < numOfSPs; ++i) {
	791	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	792	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	793	sumPijQij+=gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex)
	794	(static_cast<gainLossModel>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	795	}
	796	if (sumPijQij ==0){
	797	errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0");
	798	}
	799	return sumPijQij;
	800	}
	801
	802
	803	/********************************************************************************************
	804	normalize the Q matrix so average rate of substitution = 1
	805	*********************************************************************************************/
	806	void normVec(const MDOUBLE scale, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist){
	807
	808	int numOfSPs = gainDist->categories()*lossDist->categories();
	809	for (int i=0; i < numOfSPs; ++i) {
	810	int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	811	int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	812	(static_cast<gainLossModel*>(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->norm(scale);
	813	}
	814
	815	MDOUBLE newGainBeta = getRateBeta(gainDist)/scale;
	816	updateGainBeta(newGainBeta,spVVec,gainDist,lossDist,false); // BUG fixed. If only Q matrices are corrected -> problem
	817	MDOUBLE newLossBeta = getRateBeta(lossDist)/scale;
	818	updateLossBeta(newLossBeta,spVVec,gainDist,lossDist,false);
	819	}
	820
	821	/********************************************************************************************/
	822	MDOUBLE normalizeQ(stochasticProcess* sp){
	823	MDOUBLE sumPijQij=(static_cast<gainLossModel*>(sp->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	824	(static_cast<gainLossModel*>(sp->getPijAccelerator()->getReplacementModel()))->norm( 1.0/sumPijQij );
	825	return sumPijQij;
	826	}
	827
	828
	829	/********************************************************************************************
	830	*********************************************************************************************/
	831	MDOUBLE computeExpectationOfStationaryFrequency(distribution* gainDist, distribution* lossDist){
	832	MDOUBLE estimatedStationaryFreq=0;
	833	//if(gainDist->categories() == lossDist->categories()){
	834	for(int i=0; i<gainDist->categories(); ++i){
	835	for(int j=0; j<lossDist->categories(); ++j){
	836	//if(gainDist->ratesProb(i) == lossDist->ratesProb(i)){
	837	estimatedStationaryFreq += (gainDist->rates(i)/(gainDist->rates(i)+lossDist->rates(j)))* gainDist->ratesProb(i)*lossDist->ratesProb(j);
	838	//}
	839	//else{
	840	// LOGnOUT(4,<<" WARN: computeExpectationOfStationaryFrequency did not compute Theta" <<endl);
	841	//}
	842	}
	843	}
	844	//}
	845	//else{
	846	// LOGnOUT(4,<<" WARN: computeExpectationOfStationaryFrequency did not compute Theta" <<endl);
	847	//}
	848	if(estimatedStationaryFreq<0 \|\| estimatedStationaryFreq>1){
	849	LOGnOUT(4,<<" ERROR: computeExpectationOfStationaryFrequency <0 or >1" <<estimatedStationaryFreq<<endl);
	850	return 0;
	851	}
	852	return estimatedStationaryFreq;
	853	}
	854
	855
	856	/********************************************************************************************
	857	exp(gain/loss) (not exp(gain)/exp(loss) )
	858	Each gain/loss ratio is weighted by the rateCategories probability
	859	(their duplication is the matrix probability)
	860	*********************************************************************************************/
	861	MDOUBLE computeExpectationOfGainLossRatio(distribution* gainDist, distribution* lossDist){
	862
	863	MDOUBLE compGainLossRatio=0;
	864
	865	for(int i=0; i<gainDist->categories(); ++i){
	866	for(int j=0; j<lossDist->categories(); ++j){
	867	compGainLossRatio += gainDist->rates(i)/lossDist->rates(j) gainDist->ratesProb(i)lossDist->ratesProb(j);
	868	}
	869	}
	870	if(compGainLossRatio<0 ){
	871	LOGnOUT(4,<<" ERROR: compGainLossRatio <0 " <<compGainLossRatio<<endl);
	872	return 0;
	873	}
	874	return compGainLossRatio;
	875	}
	876
	877
	878	/********************************************************************************************
	879	exp(gain)/exp(loss) (not exp(gain/loss) )
	880	Each gain/loss ratio is weighted by the rateCategories probability
	881	(their duplication is the matrix probability)
	882	*********************************************************************************************/
	883	MDOUBLE computeExpOfGainByExpOfLossRatio(distribution* gainDist, distribution* lossDist){
	884
	885	MDOUBLE compGainLossRatio = 1;
	886	MDOUBLE ExpGain=0;
	887	MDOUBLE ExpLoss=0;
	888
	889	//for(int i=0; i<gainDist->categories(); ++i){
	890	// ExpGain += gainDist->rates(i) *gainDist->ratesProb(i);
	891	//}
	892	ExpGain = rateExpectation(gainDist);
	893	//for(int j=0; j<lossDist->categories(); ++j){
	894	// ExpLoss += lossDist->rates(j) *lossDist->ratesProb(j);
	895	//}
	896	ExpLoss = rateExpectation(lossDist);
	897	compGainLossRatio = ExpGain/ExpLoss;
	898	if(compGainLossRatio<0 ){
	899	LOGnOUT(4,<<" ERROR: compGainLossRatio <0 " <<compGainLossRatio<<endl);
	900	return 0;
	901	}
	902	return compGainLossRatio;
	903	}
	904
	905	/********************************************************************************************
	906	*********************************************************************************************/
	907	MDOUBLE rateExpectation(distribution* dist){
	908	MDOUBLE ExpRate=0;
	909	bool isWithInvariant = isInvariantOptimization(dist);
	910	if(isWithInvariant){
	911	for(int i=0; i<dist->categories(); ++i){
	912	ExpRate += dist->rates(i) *dist->ratesProb(i);
	913	}
	914	}else{
	915	ExpRate = getRateAlpha(dist)/getRateBeta(dist);
	916	}
	917	return ExpRate;
	918	}
	919
	920
	921
	922	/********************************************************************************************
	923	Mixture
	924	*********************************************************************************************/
	925	void printMixtureParams(stochasticProcess* sp)
	926	{
	927	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(sp->distr());
	928	for (int k = 0; k < pMixture->getComponentsNum(); ++k)
	929	{
	930	LOGnOUT(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
	931	}
	932	}
	933
	934	/********************************************************************************************
	935	*********************************************************************************************/
	936	stochasticProcess* startStochasticProcessSimpleGamma(MDOUBLE init_gain, MDOUBLE init_loss, Vdouble& freq, int numberOfRateCategories)
	937	{
	938	LOGnOUT(4,<<"startStochasticProcess SimpleGamma of "<<numberOfRateCategories<<" categories... \nwith gain="
	939	<<init_gain<<" loss="<<init_loss<<" root(1)="<<freq[1]<<endl);
	940	stochasticProcess *spSimple;
	941	replacementModel* glm;
	942	if(!gainLossOptions::_isReversible){
	943	glm = new gainLossModelNonReversible(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	944	}
	945	else{
	946	glm = new gainLossModel(init_gain, freq,gainLossOptions::_isRootFreqEQstationary, true,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q);
	947	}
	948	trivialAccelerator* pijAcc = new trivialAccelerator(glm);
	949	MDOUBLE initAlphaRate = gainLossOptions::_userAlphaRate;
	950	distribution* rateDist = new gammaDistribution(initAlphaRate,numberOfRateCategories);
	951	spSimple = new stochasticProcess(rateDist,pijAcc,gainLossOptions::_isReversible);
	952	if (rateDist) delete rateDist; //at r4s after the sp object is created all other objects dynamically constructed are deleted
	953	if (pijAcc) delete pijAcc;
	954	if (glm) delete glm;
	955	if(gainLossOptions::_isNormalizeQ)
	956	normalizeQ(spSimple);
	957	return spSimple;
	958	}
	959
	960	/********************************************************************************************
	961	Assume the first site number =1 in selectedSites file
	962	*********************************************************************************************/
	963	void readIntegersFromFileIntoVector(Vint& intVector, const int maxAllowed, const int minAllowed, string* inFile,Vint* evolvingSites){
	964	if(inFile){
	965	ifstream myReadFile;
	966	myReadFile.open(inFile->c_str());
	967
	968	bool isSiteLegal = true;
	969	vector<int>::iterator vec_iter;
	970	if (myReadFile.is_open()) {
	971	while (!myReadFile.eof()) {
	972	int site = -1; // thus, only if read a new site
	973	myReadFile>>site;
	974	--site; // since count starts from 1 denoted sites
	975	isSiteLegal = true;
	976	if(evolvingSites){
	977	vec_iter = evolvingSites->begin();
	978	while(vec_iter < evolvingSites->end() && *vec_iter != site)
	979	vec_iter++;
	980	if (vec_iter==evolvingSites->end() )
	981	isSiteLegal = false;
	982	}
	983	if(isSiteLegal && site <= maxAllowed && site>=minAllowed){
	984	intVector.push_back(site);
	985	}
	986	else
	987	LOGnOUT(4,<<" WARN selectedSitesForCorrelation - "<<site<<" is not in seq length or not legal (not among evolvingSites). Thus not included"<<maxAllowed<<endl);
	988	}
	989	}
	990	else
	991	LOGnOUT(4,<<" Error selectedSitesForCorrelation file "<<inFile<<" can't be opened"<<endl);
	992	myReadFile.close();
	993	}
	994	else{
	995	for(int site=minAllowed; site<maxAllowed; ++site){
	996	intVector.push_back(site);
	997	}
	998	}
	999	}
	1000
	1001
	1002	/********************************************************************************************
	1003	Flat Tree BeforeOpt
	1004	*********************************************************************************************/
	1005	void FlatTree(tree& trForSM , MDOUBLE defaultBranchLength){
	1006	LOGnOUT(4,<<"\nNote: FlatTreeBeforeOpt.. with defaultBranchLength="<<defaultBranchLength<<endl);
	1007	treeIterDownTopConst tIt(trForSM);
	1008	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	1009	mynode->setDisToFather(defaultBranchLength);
	1010	}
	1011	}
	1012
	1013
	1014	/********************************************************************************************
	1015	fill map_PosXY
	1016	*********************************************************************************************/
	1017	void computeRateValPerPos(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_PosXY){
	1018	int numOfPositions = expChanges_PosNodeXY.size();
	1019	int numOfBranches = expChanges_PosNodeXY[0].size();
	1020	int AlphSize = expChanges_PosNodeXY[0][0].size(); // =2
	1021	resizeVVV(numOfPositions,AlphSize,AlphSize,map_PosXY);
	1022
	1023	for (int pos = 0; pos <numOfPositions; ++pos){
	1024	for(int b=0;b<numOfBranches;++b){
	1025	for(int j=0;j<AlphSize;++j){
	1026	for(int k=0;k<AlphSize;++k){
	1027	if(gainLossOptions::_isNminBasedOnCountBranchesOverCutOff && expChanges_PosNodeXY[pos][b][j][k]>gainLossOptions::_probCutOffCounts)
	1028	map_PosXY[pos][j][k] += 1;
	1029	else if(!gainLossOptions::_isNminBasedOnCountBranchesOverCutOff)
	1030	map_PosXY[pos][j][k] += expChanges_PosNodeXY[pos][b][j][k];
	1031	}
	1032	}
	1033	}
	1034
	1035	}
	1036
	1037	}
	1038	/********************************************************************************************
	1039	*********************************************************************************************/
	1040	MDOUBLE computeNminRforCorrelWithGainAndLoss(MDOUBLE gainVal, MDOUBLE lossVal){
	1041	return (gainVal+lossVal)/2.0;
	1042	}

+156

-0

programs/gainLoss/gainLossUtils.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___GAINLOSS_UTILS__
	19	#define ___GAINLOSS_UTILS__
	20
	21	#include "definitions.h"
	22	#include "gainLossAlphabet.h"
	23	#include "gammaDistribution.h"
	24	#include "gammaDistributionFixedCategories.h"
	25	#include "GamMixtureOptimizer.h"
	26	#include "generalGammaDistributionPlusInvariant.h"
	27	#include "logFile.h"
	28	#include "matrixUtils.h"
	29	#include "mixtureDistribution.h"
	30	#include "someUtil.h"
	31	#include "tree.h"
	32	#include "treeIt.h"
	33	#include "evaluateCharacterFreq.h"
	34	#include "trivialAccelerator.h"
	35	#include <math.h>
	36
	37	const string PROG_INFO = static_cast<string>("Version: gainLoss.VR01.266 - last updated 14.10.2013");
	38	const MDOUBLE MINIMUM_PROB_PARAM = static_cast<MDOUBLE>(0.001);
	39	const MDOUBLE MAXIMUM_PROB_PARAM = static_cast<MDOUBLE>(0.999);
	40	const MDOUBLE MINIMUM_FREQ_PARAM = static_cast<MDOUBLE>(0.001); //0.05
	41	const MDOUBLE MAXIMUM_FREQ_PARAM = static_cast<MDOUBLE>(0.999); //0.95
	42	const MDOUBLE MINIMUM_GAIN_PARAM = static_cast<MDOUBLE>(0.0); //0.01
	43	const MDOUBLE MAXIMUM_GAIN_PARAM = static_cast<MDOUBLE>(5.0);
	44	const MDOUBLE MINIMUM_LOSS_PARAM = static_cast<MDOUBLE>(0.01);
	45	const MDOUBLE MAXIMUM_LOSS_PARAM = static_cast<MDOUBLE>(10.0);
	46
	47	const MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM = static_cast<MDOUBLE>(0.01);
	48	const MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM = static_cast<MDOUBLE>(100.0);
	49
	50	const int PRECISION = static_cast<int>(4); // Used for print-outs
	51	const int LOW_PRECISION = static_cast<int>(2); // Used for print-outs, AncestralRec
	52
	53	void printTree (tree &tr, string treeFile);
	54	void printTree (tree &tr,ostream &out);
	55	void printTree (tree &tr);
	56
	57	void printTreeWithValuesAsBP(ostream &out, tree &tr, Vstring values, VVVdouble *probs=NULL ,bool printGains=true) ;
	58	void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs=NULL ,bool printGains=true) ;
	59
	60	void printTreeStatesAsBPValues(ostream &out, Vint &states, tree &tr, VVVdouble *probs=NULL ,bool printGains=true) ;
	61	void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode, VVVdouble *probs=NULL ,bool printGains=true) ;
	62
	63	void printTreeStatesAsBPValues(ostream &out, Vdouble &states, tree &tr,
	64	VVVdouble *probs=NULL ,bool printGains=true) ;
	65	void printTreeStatesAsBPValues(ostream &out, Vdouble &states, const tree::nodeP &myNode,
	66	VVVdouble *probs=NULL ,bool printGains=true) ;
	67
	68	// --->> into somaUtils
	69	//int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories);
	70	//int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories);
	71
	72	MDOUBLE factorial (MDOUBLE num);
	73
	74	void printHelp();
	75	void printProgramInfo();
	76
	77	bool isAlphaOptimization(distribution* dist);
	78	bool isBetaOptimization(distribution* dist);
	79	bool isMixOptimization(distribution* dist);
	80	bool isInvariantOptimization(distribution* dist, bool onlyForPrintVal=false);
	81	bool isThetaOptimization();
	82
	83	MDOUBLE getRateAlpha(distribution* dist);
	84	MDOUBLE getRateBeta(distribution* dist);
	85	//MDOUBLE getInvProbability(distribution* dist);
	86
	87	void setRateAlpha(distribution* dist, MDOUBLE paramAlpha);
	88	void setRateBeta(distribution* dist, MDOUBLE paramBeta);
	89
	90	void updateGainAlpha(MDOUBLE param,
	91	vector<vector<stochasticProcess*> >& spVVec,
	92	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	93	void updateGainBeta(MDOUBLE param,
	94	vector<vector<stochasticProcess*> >& spVVec,
	95	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	96	void updateGainProbInvariant(MDOUBLE param, distribution* gainDist);
	97
	98	void updateLossAlpha(MDOUBLE param,
	99	vector<vector<stochasticProcess*> >& spVVec,
	100	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	101	void updateLossBeta(MDOUBLE param,
	102	vector<vector<stochasticProcess*> >& spVVec,
	103	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	104	void updateLossProbInvariant(MDOUBLE param, distribution* lossDist);
	105
	106	void updateRateAlpha(MDOUBLE param,
	107	vector<vector<stochasticProcess*> >& spVVec,
	108	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	109	void updateRateProbInvariant(MDOUBLE param,
	110	vector<vector<stochasticProcess*> >& spVVec,
	111	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	112	void updateTheta(MDOUBLE param,
	113	vector<vector<stochasticProcess*> >& spVVec,
	114	distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true);
	115
	116	void cloneSpVVec(vector<vector<stochasticProcess> >& spVVec, vector<vector<stochasticProcess> >& neWspVVec);
	117	void deleteSpVVec(vector<vector<stochasticProcess> > spVVec_p);
	118
	119	void clearVVVV(VVVVdouble& vetor);
	120	void clearVVV(VVVdouble& vetor);
	121
	122	void resizeVVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor);
	123	void resizeVVV(int dim1, int dim2, int dim3, VVVdouble& vetor);
	124
	125	//MDOUBLE getDistance2ROOT(const tree::nodeP &myNode);
	126	//MDOUBLE getMinimalDistance2OTU(const tree::nodeP &myNode); // Only for binary trees
	127
	128	//void startZeroSequenceContainer(const sequenceContainer &sc, sequenceContainer &scZero, gainLossAlphabet &alph);
	129	void fillVnames(Vstring& Vnames,const tree& tr);
	130	void P11forgain(ostream& out=cout) ;
	131
	132	MDOUBLE normalizeQ(vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist);
	133	MDOUBLE sumPijQijVec(vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist);
	134	void normVec(const MDOUBLE scale, vector<vector<stochasticProcess> >& spVVec, distribution gainDist, distribution * lossDist);
	135	MDOUBLE normalizeQ(stochasticProcess* sp);
	136
	137	MDOUBLE computeExpectationOfStationaryFrequency(distribution* gainDist, distribution* lossDist);
	138	MDOUBLE computeExpectationOfGainLossRatio(distribution* gainDist, distribution* lossDist);
	139	MDOUBLE computeExpOfGainByExpOfLossRatio(distribution* gainDist, distribution* lossDist);
	140	MDOUBLE rateExpectation(distribution* dist);
	141
	142	void printMixtureParams(stochasticProcess* sp);
	143	stochasticProcess* startStochasticProcessSimpleGamma(MDOUBLE init_gain, MDOUBLE init_loss, Vdouble& freq, int numberOfRateCategories=4);
	144
	145	void readIntegersFromFileIntoVector(Vint& intVector, const int maxAllowed, const int minAllowed, string* inFile=NULL,Vint* evolvingSites=NULL);
	146
	147	void FlatTree(tree& trForSM , MDOUBLE defaultBranchLength=0.3);
	148
	149	void computeRateValPerPos(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_PosXY);
	150
	151	MDOUBLE computeNminRforCorrelWithGainAndLoss(MDOUBLE gainVal, MDOUBLE lossVal);
	152
	153
	154	#endif
	155

+360

-0

programs/gainLoss/junk.txt less more

	0	#include "computeJumps.h"
	1	//runComputation: Use Suchard equations to compute expectation - good only for {0,1}
	2	void runComputation(const MDOUBLE Lambda1, const MDOUBLE Lambda2);
	3
	4
	5	//runComputation: Use suchard equations to compute expectation - good only for {0,1}
	6	void simulateJumps::runComputation(const MDOUBLE Lambda1, const MDOUBLE Lambda2)
	7	{
	8	computeJumps computeJumpsObj(Lambda1,Lambda2);
	9
	10	//MDOUBLE prob01 = 0.0039;
	11	//MDOUBLE prob02 = 0.9961;
	12	//MDOUBLE branchLength = 0.1;
	13	//MDOUBLE gainExp = computeJumpsObj.gainExp(branchLength,prob01,prob02);
	14	//LOGnOUT(4,<< "gainExp with branchLength="<<branchLength<<" prob01="<<prob01<<" prob02="<<prob02<<" -> "<< gainExp <<endl);
	15
	16	}
	17
	18
	19
	20	Command line:
	21	"D:\My Documents\TAU.BU\HGT\gainLossCode\gainLoss\test\params.txt"
	22	-s "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\seqsABC.txt" -dl
	23	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName_noGaps.55.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.55.ph" -dl -di
	24	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName_noGaps.55.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.55.ph" -dl -di -xh
	25	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName_noGaps.55.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.55.ph"
	26	-s "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\seqsABC.txt" -t "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\treeABC.ph" -bg -ct
	27	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName_noGaps.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.ph" -bn -ct -v 6 -e 0.05 -dl
	28	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_fullName.ph"
	29	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -l "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_release\log.txt" -o "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_release\gainLoss.res" -y "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_release\gainLossOrig.res"
	30	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -l "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_debug\log.txt" -o "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_debug\gainLoss.res" -y "D:\My Documents\TAU.BU\HGT\COG_BBL_prints_debug\gainLossOrig.res"
	31	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -l "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.randSeedsAndBBL_Run06\log.txt" -o "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.randSeedsAndBBL_Run06\gainLoss.res" -y "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.randSeedsAndBBL_Run06\gainLossOrig.res"
	32	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -l "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.Baysian_Run01\log.txt" -o "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.Baysian_Run01\gainLoss.res" -y "D:\My Documents\TAU.BU\HGT\COG_gamma_mlAndAlphaBBL.Baysian_Run01\gainLossOrig.res"
	33	-s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -l "D:\My Documents\TAU.BU\HGT\COG_BBL_Model_sep\log.txt" -o "D:\My Documents\TAU.BU\HGT\COG_BBL_Model_sep\gainLoss.res" -y "D:\My Documents\TAU.BU\HGT\COG_BBL_Model_sep\gainLossOrig.res"
	34	-s "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\seqsABC.txt" -t "D:\My Documents\pupkoSVN\programs\gainLoss\test\treeABC.ph"
	35
	36	-s "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\seqsABC.txt" -t "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\treeABC.ph" -o "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\output.txt"
	37	-n -s "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.fa" -t "D:\My Documents\TAU.BU\HGT\seqs_COG_abrrev.ph" -o "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\outputALL.txt"
	38	-s "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\seqsABC.txt" -t "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\test\treeABC.ph" -d -n
	39
	40	Command line: rate4site
	41	-s "D:\My Documents\Temp\seqs.fa" -t "D:\My Documents\Temp\tree.ph" -o "D:\My Documents\Temp\r4s.out" -Mj
	42	-s "D:\My Documents\TAU.BU\DrugResistance\apv\CoMapLatest\Sequences\apv.coded.partial.fa" -t "D:\My Documents\TAU.BU\DrugResistance\apv\CoMapLatest\Phylogeny\apv.coded.tree" -o "D:\My Documents\pupkoSVN\trunk\programs\gainLoss\R4S\r4s.out" -Mj
	43
	44	/********************************************************************************************
	45	*********************************************************************************************/
	46	//void gainLoss::optimizationsManyStartsNoVec(const MDOUBLE epsilonOptimization, const int numIterations){
	47	// stochasticProcess* bestSp = _sp->clone();
	48	// tree bestTr = _tr;
	49	// cout<<"_tr at: "<<&_tr<<endl;
	50	// cout<<"_sp at: "<<_sp<<endl;
	51	// cout<<"bestTr at: "<<&bestTr<<endl;
	52	// cout<<"bestSp at: "<<bestSp<<endl;
	53	// MDOUBLE bestL = VERYSMALL;
	54	// int bestModel = 0;
	55	//
	56	// for(int i=0; i<gainLossOptions::_numberOfRandPointsInOptimization; ++i){
	57	// LOGnOUT(4,<<"-------startOptimization "<<i<<endl);
	58	// gainLossOptimizer glOpt(_tr,_sp,_sc,epsilonOptimization,numIterations,epsilonOptimization,numIterations,epsilonOptimization,numIterations);
	59	// glOpt.getBestL();
	60	// if(glOpt.getBestL()>bestL){
	61	// bestModel = i;
	62	// bestTr = glOpt.getOptTree();
	63	// bestSp = _sp;
	64	// }
	65	// LOGnOUT(4,<<"-------L= "<<glOpt.getBestL()<<endl);
	66	// }
	67	// _sp = bestSp;
	68	// _tr = bestTr;
	69	// cout<<"_tr at: "<<&_tr<<endl;
	70	// cout<<"_sp at: "<<_sp<<endl;
	71	//}
	72
	73
	74	// converting Q into doubleRep format
	75	// printing the input Q matrix
	76	//cout<<"Q[0][0]="<<convert(_Q[0][0])<<" ";
	77	//cout<<"Q[0][1]="<<convert(_Q[0][1])<<" ";
	78	//cout<<"Q[1][0]="<<convert(_Q[1][0])<<" ";
	79	//cout<<"Q[1][1]="<<convert(_Q[1][1])<<" "<<endl;
	80
	81	//debug: print matrix for 2x2
	82	//cout<<"M[0][0]="<<convert(Pt[0][0])<<" ";
	83	//cout<<"M[0][1]="<<convert(Pt[0][1])<<" ";
	84	//cout<<"M[1][0]="<<convert(Pt[1][0])<<" ";
	85	//cout<<"M[1][1]="<<convert(Pt[1][1])<<" "<<endl;
	86
	87	//DEBUG - reverse m1 and m2
	88	//static_cast<gainLossModelNonReversible>((_sp).getPijAccelerator()->getReplacementModel())->setMu1(bestM2);
	89	//static_cast<gainLossModelNonReversible>((_sp).getPijAccelerator()->getReplacementModel())->setMu2(bestM1);
	90	//res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,NULL,_isReverible);
	91
	92
	93	/********************************************************************************************
	94	run
	95	*********************************************************************************************/
	96	void gainLoss::run(){
	97	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,0,_isReverible);
	98	cout<<"The Tree Likelihood AllPosAlphTheSame is "<<res<<endl;
	99	optimizeParameters();
	100	printPij_t();
	101	}
	102
	103	/********************************************************************************************
	104	initialize
	105	*********************************************************************************************/
	106	void gainLoss::initialize(int argc, char* argv[]) {
	107	string inputTree;
	108	_rootAt = "";
	109	_isReverible = false;
	110	for (int ix = 0; ix < argc; ix++) {
	111	char *pchar=argv[ix];
	112	switch (pchar[0]) {
	113	case '-':
	114	switch (pchar[1]) {
	115	case 'h':
	116	cout <<"USAGE: "<<argv[0]<<" [-options] "<<endl <<endl;
	117	cout <<"+----------------------------------------------+"<<endl;
	118	cout <<"\|-t input treeFile \|"<<endl;
	119	cout <<"\|-s input seqsFile (must be same names as treeFile!\|"<<endl;
	120	cout <<"\|-l logFile \|"<<endl;
	121	cout <<"\|-o output file \|"<<endl;
	122	cout <<"\|-r root at (input the name of the node) \|"<<endl;
	123	cout <<"\|-n The replacement model is non reversible \|"<<endl;
	124	cout <<"\|----------------------------------------------\|"<<endl;
	125	cout <<"\|-h or -? or -H help \|"<<endl;
	126	cout <<"\|capital and lowercase letters are ok \|"<<endl;
	127	cout <<"+----------------------------------------------+"<<endl;
	128	cout<<endl; cerr<<" please press 0 to exit "; int d; cin>>d;exit (0);
	129	case 'l':
	130	_logFile=argv[++ix];
	131	break;
	132	case 'o':
	133	_outPutFile=argv[++ix];
	134	break;
	135	case 'r':
	136	_rootAt=argv[++ix];
	137	break;
	138	case 's':
	139	_seqsFile=argv[++ix];
	140	break;
	141	case 't':
	142	inputTree=argv[++ix];
	143	break;
	144	case 'n':
	145	_isReverible=false;
	146	break;
	147	}
	148	}
	149	}
	150	tree t(inputTree);
	151	_tr = t;
	152	if (!(_rootAt =="")){
	153	tree::nodeP myroot = _tr.findNodeByName(_rootAt); //returns NULL if not found
	154	if (myroot){
	155	_tr.rootAt(myroot);
	156	cout<<"tree rooted at "<<myroot->name()<<endl;
	157	cout<<"sons of root are "<<_tr.getRoot()->getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<<endl;
	158	return;
	159	}
	160	}
	161	cout<<"default rooting used, root name is "<<_tr.getRoot()->name()<<endl;
	162	cout<<"sons of root are "<<_tr.getRoot()->getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<endl;
	163	}
	164
	165	/********************************************************************************************
	166	startingBranchLengthsAndAlpha
	167	*********************************************************************************************/
	168	void gainLoss::startingBranchLengthsAndAlpha(){
	169	int maxBBLIterations = 5;
	170	int maxTotalAlphaBBLIterations = 3;
	171	MDOUBLE epsilonForBBL= 0.1;
	172	MDOUBLE epsilonForAlpha= 0.1;
	173	MDOUBLE upperBoundAlpha = 5.0;
	174	MDOUBLE intitalAlpha = upperBoundAlpha * 0.3;
	175
	176	bestAlphaAndBBL bbl1(_tr, _sc, *_sp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	177	}
	178
	179	/********************************************************************************************
	180	optimizeBranchLengths
	181	*********************************************************************************************/
	182	void gainLoss::optimizeBranchLengths(MDOUBLE epsilonOptimization){
	183	time_t ltime1;
	184	time( &ltime1 );
	185	LOGnOUT(LOGLEVEL,<<" #################### get Starting Branch Lengths #################### "<<endl);
	186	int maxBBLIterations = 5;
	187	int maxTotalAlphaBBLIterations = 2;
	188	MDOUBLE epsilonForBBL= epsilonOptimization;
	189	MDOUBLE epsilonForAlpha= epsilonOptimization;
	190	MDOUBLE upperBoundAlpha = 10.0;
	191	MDOUBLE intitalAlpha = static_cast<gammaDistribution>((_sp).distr())->getAlpha();
	192
	193	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) {
	194	if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::noBBL) {
	195	return;
	196	} else if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::mlBBLUniform) {
	197	bblEM bblEM1(_tr, _sc, *_sp, NULL, maxBBLIterations , epsilonForBBL, epsilonForBBL);
	198	} else {
	199	// Here we want to optimize branch lengths with a gamma model,
	200	// but sp is with a inhomogeneous model. Hence, we have to create a local
	201	// copy of a gamma stochastic process.
	202	if (gainLossOptions::_userInputAlpha != 0) intitalAlpha = gainLossOptions::_userInputAlpha;
	203	gammaDistribution localDist(intitalAlpha,gainLossOptions::_numberOfRateCategories);
	204	stochasticProcess localSP(&localDist,_sp->getPijAccelerator());
	205	if (gainLossOptions::_userInputAlpha == 0) {
	206	// in this case we have to optimize both the alpha and the branch lengths
	207	bestAlphaAndBBL bbl1(_tr, _sc, localSP, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	208	} else {
	209	// in this case we know the alpa, and we want to just optimize branch lengths with this alpha
	210	bestAlphaAndBBL bbl(_tr, _sc, localSP, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	211	}
	212	}
	213	} else { // method for inference is Bayesian
	214	if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::noBBL) {
	215	//FIND BEST ALPHA, AND RETURN WITHOUT CHANING THE TREE
	216	if (gainLossOptions::_userInputAlpha == 0){
	217	bestAlphaFixedTree bbl2(_tr, _sc, *_sp, NULL, upperBoundAlpha, epsilonForAlpha);
	218	} else {// in this case we just want to set the alpha to the right one
	219	static_cast<gammaDistribution*>(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha);
	220	}
	221	} else if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::mlBBLUniform) {
	222	//FIND TREE WITHOUT ALPHA with an homogenoues model. Update
	223	uniDistribution lUni;
	224	const pijAccelerator* lpijAcc = _sp->getPijAccelerator();// note this is just a copy of the pointer.
	225	stochasticProcess lsp(&lUni,lpijAcc);
	226	bestAlphaAndBBL bbl(_tr, _sc, lsp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	227	//THEN FIND ALPHA WITHOUT OPT TREE
	228	if (gainLossOptions::_userInputAlpha == 0){
	229	bestAlphaFixedTree bbl3(_tr,_sc,*_sp, NULL, upperBoundAlpha, epsilonForAlpha);
	230	} else {
	231	static_cast<gammaDistribution*>(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha);
	232	}
	233	} else {
	234	//ML OPT WITH GAMMA
	235	if (gainLossOptions::_userInputAlpha == 0){
	236	bestAlphaAndBBL bbl1(_tr, _sc, *_sp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	237	} else {// alpha is known
	238	static_cast<gammaDistribution*>(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha);
	239	bestAlphaAndBBL bbl1(_tr, _sc, *_sp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations);
	240	}
	241	}
	242	}
	243	LOGnOUT(LOGLEVEL,<<" #################### After Branch Lengths And Alpha #################### "<<endl);
	244	time_t ltime2;
	245	time( &ltime2 );
	246	int t = static_cast<long>(ltime2 - ltime1);
	247	//timingsF<<"time for alpha and branch lengths optimization = "<<t<<endl;
	248	}
	249
	250	/********************************************************************************************
	251	bestAlphaFixedTree
	252	*********************************************************************************************/
	253	//MDOUBLE* optimizeGainLossModel::startingBestAlphaFixedTree(tree& tr,sequenceContainer& sc,stochasticProcess& sp){
	254	// MDOUBLE epsilonForAlpha= 0.01;
	255	// MDOUBLE upperBoundAlpha = upperValueOfParam;
	256	// MDOUBLE intitalAlpha = upperBoundAlpha * 0.3;
	257	// Vdouble* weights = 0;
	258	//
	259	// bestAlphaFixedTree bAlpha(tr,sc,sp,weights,upperBoundAlpha,epsilonForAlpha);
	260	//
	261	// MDOUBLE bestAlphaAndL[] = {bAlpha.getBestAlpha(), bAlpha.getBestL()};
	262	// return bestAlphaAndL; //cause a warning "returning address of local variable or temporary"
	263	//
	264	//}
	265
	266
	267	//MDOUBLE currM1 = talRandom::giveRandomNumberBetweenTwoPoints(lowerValueOfParam, upperValueOfParam);
	268	//MDOUBLE currM2 = talRandom::giveRandomNumberBetweenTwoPoints(lowerValueOfParam, upperValueOfParam);
	269	//MDOUBLE currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(lowerValueOfParam, upperValueOfParam);
	270
	271
	272	/********************************************************************************************
	273	*********************************************************************************************/
	274	void printTreeStatesAsBPValues(ostream &out, Vint &states, tree &tr,
	275	VVVdouble *probs,bool printGains) {
	276	printTreeStatesAsBPValues(out,states, tr.getRoot(), probs);
	277	out<<"["<<states[(tr.getRoot())->id()]<<"];";
	278	//out<<"["<<(tr.getRoot())->name()<<"];";
	279	}
	280	void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode,
	281	VVVdouble *probs,bool printGains) {
	282	if (myNode->isLeaf()) {
	283	out << myNode->name()<< ":"<<myNode->dis2father();
	284	return;
	285	} else {
	286	out <<"(";
	287	for (int i=0;i<myNode->getNumberOfSons();++i) {
	288	if (i>0) out <<",";
	289	printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs);
	290	}
	291	out <<")";
	292	if (myNode->isRoot()==false) {
	293	//out<<states[myNode->id()]<<"--";
	294	//out<<myNode->name();
	295	out.precision(3);
	296	if (probs){
	297	if (printGains)
	298	out<<(*probs)[myNode->id()][0][1];
	299	else //print losses
	300	out<<(*probs)[myNode->id()][1][0];
	301	}
	302	out << "["<<myNode->name()<<"]";
	303	out<<":"<<myNode->dis2father();
	304	}
	305	}
	306	}
	307
	308	/********************************************************************************************
	309	*********************************************************************************************/
	310	void computeEB_EXP_siteSpecificGL_zero(Vdouble & GainLossV,
	311	Vdouble & stdV,
	312	Vdouble & lowerBoundV,
	313	Vdouble & upperBoundV,
	314	VVdouble & posteriorsV,
	315	const sequenceContainer& sc,
	316	const vector<vector<stochasticProcess*> >& spVVec,
	317	const tree& tr,
	318	const distribution * gainDist,
	319	const distribution * lossDist,
	320	const distribution * distPrim,
	321	const MDOUBLE alphaConf)
	322	{
	323	LOG(5,<<"Calculating posterior and expectation of posterior values for all sites Under 'Zero' assignment for non-computed value"<<endl);
	324
	325	vector<vector<stochasticProcess*> > spVVecZero;
	326	spVVecZero.resize(gainDist->categories());
	327	for (int gainCategor=0; gainCategor<gainDist->categories(); gainCategor++){
	328	spVVecZero[gainCategor].resize(lossDist->categories());
	329	for (int lossCategor=0; lossCategor<lossDist->categories(); lossCategor++){
	330	spVVecZero[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone();
	331	if(distPrim == gainDist){
	332	static_cast<gainLossModelNonReversible>((spVVecZero[gainCategor][lossCategor]).getPijAccelerator()->getReplacementModel())->setMu2(0.0) ;
	333	}
	334	else{
	335	static_cast<gainLossModel>((spVVecZero[gainCategor][lossCategor]).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible) ;
	336	}
	337
	338	}
	339	}
	340
	341	int seqLen = sc.seqLen();
	342	GainLossV.resize(seqLen);
	343	stdV.resize(seqLen);
	344	lowerBoundV.resize(seqLen);
	345	upperBoundV.resize(seqLen);
	346	int numOfSPs = gainDist->categories()*lossDist->categories();
	347	resizeMatrix(posteriorsV,seqLen,numOfSPs);
	348	//computePijGam cpg;
	349	//cpg._V.resize(numOfSPs);
	350	//for (int i=0; i < numOfSPs; ++i) {
	351	// int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	352	// int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	353	// cpg._V[i].fillPij(tr,*spVVec[gainIndex][lossIndex]);
	354	//}
	355	for (int pos=0; pos < sc.seqLen(); ++pos) {
	356	computeEB_EXP_siteSpecificGL(pos, sc, spVVecZero, tr, gainDist,lossDist,distPrim,posteriorsV[pos], //cpg
	357	GainLossV[pos], stdV[pos], lowerBoundV[pos], upperBoundV[pos], alphaConf);
	358	}
	359	}⏎

+75

-0

programs/gainLoss/likelihoodClasses.suffStat.computeUp.computeDown.txt less more

	0	How the likelihood is computed:
	1	1. likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame
	2	All positions, sp (could be gammaRate categories)
	3	Perform: fillPij with the class computePijGam (filled for all rate categories)
	4	Next:
	5	1.1 likelihoodComputation::getLofPos( with pi and for all cat)
	6	Use the Pij info
	7	Next:
	8	1.1.1 likelihoodComputation::getLofPos( with pi for single cat )
	9	Perform: fillPij with the class computePijGam (filled for all rate categories)
	10	Use computeUpAlg class to fillComputeUp.
	11	Res += suffStatGlobalHomPos.get(et.getRoot()->id(),let) * sp.freq(let);
	12
	13
	14
	15
	16
	17	The likelihood classes:
	18	Based on the equivalent likelihood by:
	19	L(Tree\|Data) = E_all_x_y = P(D,NodeF=x,Node=y)
	20	and
	21	P(D,Node1=x,Node2=y) = Up(Node,y) * Pij(x,y,dist(NodeF,Node)) * Down(Node,y) //Down is actaully calculated over the father
	22
	23
	24
	25
	26	___COMPUTE_PIJ_COMPONENT
	27	// holds the prob fillPij for the tree (all nodes)
	28	computePijHomSpec // for specific node
	29	_V[let1][let2]
	30	computePijHom //all nodes, based on the previous
	31	vector<computePijHomSpec> _V; // let, let
	32	_V[nodeId].getPij(let1,let2)
	33	computePijGam // all rateCategories, based on the previous
	34	vector<computePijHom> _V; // each rate category
	35	_V[rateCategor].getPij(nodeId,let1,let2)
	36
	37
	38
	39	___SUFF_STAT_COMPONENT:
	40	// holds the prob results of computeUp and computeDown
	41	suffStatSpecHomPos[letter][prob] // this is for a specific node
	42	suffStatGlobalHomPos[nodeid][letter][prob] // this is for all nodes
	43	suffStatGlobalGamPos[category][nodeid][letter][prob] // this is for all nodes
	44	For fixed root (non-reversible)- also used as suffStatGlobalGamPos[letter@root][nodeid][letter][prob]
	45
	46	suffStatGlobalHom[pos][nodeid][letter][prob] // this is for all positions (and for all nodes).
	47	suffStatGlobalGam[pos][category][nodeid][letter][prob] // this is for all positions (and for all nodes).
	48
	49
	50	___COMPUTE_UP_ALG
	51	// compute partial likelihoods of subtrees (for each node) - filled into suffStats
	52	fillComputeUp(tr,sc,pi, ->suffStatGlobalGam)
	53	calls:
	54	foreach pos
	55	foreach categor
	56	cupAlg.fillComputeUp(tr,sc,pos,pi[categor],ssc[pos][categor]) // go over all tree to fill suffStatGlobalGam[pos][category][nodeid][letter][prob]
	57
	58	___COMPUTE_DOWN_ALG
	59	// compute parial "upward" likelihoods - for each node N, if Up(N) is the N subtree than Down(N)=P(Tree\Subtree_N)
	60	// uses the suffStat computed by the UpAlg
	61	fillComputeDown(tr,sc,pos,pi,->suffStatGlobalHomPos& ssc, using: suffStatGlobalHomPos& cup)
	62	also a version with given sp instead of pi, if it was not pre-computed (use sp.Pij_t(letter, letterInFather, dist))
	63
	64	Note: the "foreach pos,foreach categor" is looped externally.
	65
	66
	67	___BBL_EM_H
	68	Using the following members:
	69	vector<countTableComponentGam> _computeCountsV; // for each node - a table of ratealphalph
	70	computePijGam _pij;
	71	suffStatGlobalGam _cup;
	72	suffStatGlobalGamPos _cdown;
	73
	74

+551

-0

programs/gainLoss/optimizeGainLossModel.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "optimizeGainLossModel.h"
	17	#include "Parameters.h"
	18
	19	optimizeGainLossModel::optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc,
	20	const bool isReversible, /const bool evalTheta,/
	21	MDOUBLE epsilonOptimization, const int numIterations,
	22	Vdouble* weights,
	23	unObservableData* unObservableData_p):
	24	_weightsUniqPatterns(weights), _unObservableData_p(unObservableData_p)
	25	{
	26	//_weights = gainLossOptions::_weights; // since - no weights are used over positions
	27
	28	MDOUBLE MINIMUM_ALPHA_PARAM;
	29	if(gainLossOptions::_isAlphaLimit){
	30	MINIMUM_ALPHA_PARAM = 0.1;
	31	}
	32	else{
	33	MINIMUM_ALPHA_PARAM = ::MINIMUM_ALPHA_PARAM;
	34	}
	35	MDOUBLE MINIMUM_GAIN_PARAM;
	36	if(gainLossOptions::_isGainLimit){
	37	MINIMUM_GAIN_PARAM = 0.1;
	38	}
	39	else{
	40	MINIMUM_GAIN_PARAM = ::MINIMUM_GAIN_PARAM;
	41	}
	42	MDOUBLE MAXIMUM_GAIN_PARAM;
	43	if(gainLossOptions::_gainLossRateAreFreq){
	44	MAXIMUM_GAIN_PARAM = 0.9999;
	45	}
	46	else{
	47	MAXIMUM_GAIN_PARAM = ::MAXIMUM_GAIN_PARAM;
	48	}
	49	MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM;
	50	MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM;
	51	if(gainLossOptions::_isOptimizeParamsWithLogMinMax){
	52	MINMUM_GAIN_LOSS_RATIO_PARAM = log10(::MINMUM_GAIN_LOSS_RATIO_PARAM);
	53	MAXIMUM_GAIN_LOSS_RATIO_PARAM = log10(::MAXIMUM_GAIN_LOSS_RATIO_PARAM);
	54	}else{
	55	MINMUM_GAIN_LOSS_RATIO_PARAM = ::MINMUM_GAIN_LOSS_RATIO_PARAM;
	56	MAXIMUM_GAIN_LOSS_RATIO_PARAM = ::MAXIMUM_GAIN_LOSS_RATIO_PARAM;
	57	}
	58
	59	bool isAllowHigherAlpha = true; // for distribution more 'gaussian' and Eq, need higher alpha, else 10.0
	60	MDOUBLE MAXIMUM_ALPHA_PARAM;
	61	if(isAllowHigherAlpha){
	62	MAXIMUM_ALPHA_PARAM = 100;
	63	}
	64	else{
	65	MAXIMUM_ALPHA_PARAM = ::MAXIMUM_ALPHA_PARAM;
	66	}
	67
	68	bool optimizeAlpha = isAlphaOptimization(sp.distr());
	69	bool optimizeBeta = isBetaOptimization(sp.distr());
	70	bool optimizeMixture = isMixOptimization(sp.distr());
	71	bool probInvariant = isInvariantOptimization(sp.distr());
	72	bool evalTheta = isThetaOptimization();
	73
	74	MDOUBLE previousL;
	75	MDOUBLE currBestL=VERYSMALL;
	76	MDOUBLE currM1=0.1;
	77	MDOUBLE currM2=1; // for non-reversible model only
	78	MDOUBLE currAlpha=1;
	79	MDOUBLE currBeta=1;
	80	MDOUBLE currTheta = 0.5;
	81	MDOUBLE currRateProbInvariant = 0.05;
	82	MDOUBLE currGainLossRatio = 1;
	83	MDOUBLE incrementFactorForGain = gainLossOptions::_slopeFactorForGain; // forces slow climb for gain param
	84	MDOUBLE sumPijQij;
	85	// MissingData
	86	//unObservableData* currUnObservableData_p;
	87	//if(gainLossOptions::_accountForMissingData){
	88	// currUnObservableData_p = new unObservableData(sc, &sp, gainLossAlphabet(),gainLossOptions::_minNumOfOnes);
	89	// currUnObservableData_p->setLforMissingData(tr,&sp);
	90	//}
	91	//else{
	92	// currUnObservableData_p = NULL;
	93	//}
	94
	95	// currSeeds
	96	if(gainLossOptions::_initParamsAtRandPointsInOptimization){
	97	currM1 =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_GAIN_PARAM, MAXIMUM_GAIN_PARAM);
	98	currM2=talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_LOSS_PARAM, MAXIMUM_LOSS_PARAM);
	99	currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM);
	100	currBeta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM);
	101	currTheta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_FREQ_PARAM, MINIMUM_FREQ_PARAM);
	102	currRateProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	103	}
	104
	105	// initialize - best
	106	int numberOfParameters = 1;
	107	_bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,sp,_weightsUniqPatterns,_unObservableData_p); //PerCat
	108	_bestMu1 = static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->getMu1();
	109	if (!isReversible){
	110	_bestMu2 = static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->getMu2();
	111	++numberOfParameters;
	112	}
	113	if(optimizeAlpha){
	114	_bestAlpha = getRateAlpha(sp.distr());
	115	++numberOfParameters;
	116	}
	117	if(optimizeBeta){
	118	_bestBeta = getRateBeta(sp.distr());
	119	++numberOfParameters;
	120	}
	121	if(evalTheta)
	122	++numberOfParameters;
	123	_bestTheta = static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->getTheta(); // take eiter way
	124	if(probInvariant){
	125	_bestRateProbInvariant = static_cast<generalGammaDistributionPlusInvariant*>(sp.distr())->getInvProb();
	126	++numberOfParameters;
	127	}
	128	_bestGainLossRatio = _bestMu1/_bestMu2;
	129	MDOUBLE epsilonOptimizationIterFactor = numberOfParameters;
	130	epsilonOptimizationIterFactor = max(3.0,epsilonOptimizationIterFactor);
	131	MDOUBLE epsilonOptimizationIter = epsilonOptimization*epsilonOptimizationIterFactor; // for e=0.1 next iteration only for ~0.5 logL points
	132
	133	// optimize
	134	LOGnOUT(3,<<"### "<<"optimization starting- epsilonOptParam="<<epsilonOptimization<<" epsilonOptIter= "<<epsilonOptimizationIter<<", MaxNumIterations="<<numIterations<<endl);
	135	LOGnOUT(3,<<"start optimization with:"<<endl<<" L= "<<_bestL<<" gainLossRatio= "<<_bestGainLossRatio<<" gain= "<<_bestMu1);
	136	if(!isReversible) LOGnOUT(3,<<" loss= "<<_bestMu2);
	137	if(optimizeAlpha) LOGnOUT(3,<<" Alpha= "<<_bestAlpha);
	138	if(optimizeBeta) LOGnOUT(3,<<" Beta= "<<_bestBeta);
	139	if(optimizeMixture) LOGnOUT(3,<<" ");
	140	if(evalTheta) LOGnOUT(3,<<" Theta= "<<_bestTheta);
	141	if(probInvariant) LOGnOUT(3,<<" RateProbInvariant= "<<_bestRateProbInvariant<<"\n");
	142	if(optimizeMixture) printMixtureParams(&sp);
	143	LOGnOUT(3,<<endl);
	144
	145	int iter;
	146	for (iter=1;iter<=numIterations;iter++){
	147	previousL = _bestL; // breaking out of loop when no (>epsilon) improvement is made by comparing to previousL
	148	LOGnOUT(4,<<"\n---- iter="<<iter<<endl);
	149	// optimization - Freq (Theta)
	150	if (gainLossOptions::_isStartWithTheta && evalTheta && !gainLossOptions::_isRootFreqEQstationary){
	151	currBestL = -brent(MINIMUM_FREQ_PARAM,_bestTheta,MAXIMUM_FREQ_PARAM,C_evalParam(tr,sp,sc,C_evalParam::theta,isReversible,_weightsUniqPatterns,_unObservableData_p),
	152	epsilonOptimization*gainLossOptions::_epsilonOptimizationThetaFactor,&currTheta);
	153	if (currBestL>_bestL) {
	154	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta);
	155	sumPijQij = normalizeQ(&sp); //TEST
	156	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	157	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tTheta= "<<currTheta<<endl);
	158	_bestTheta=currTheta;
	159	_bestL=currBestL;
	160	}
	161	}
	162	// optimize mixture
	163	if(optimizeMixture){
	164	GamMixtureOptimizer optGamma(&sp, sc, tr, _unObservableData_p);
	165	int maxIterations = 1;
	166	if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::EM)
	167	currBestL = optGamma.findBestParam(GamMixtureOptimizer::EM, maxIterations, epsilonOptimization, gainLossOptions::_weights);
	168	else if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::ONE_DIM)
	169	currBestL = optGamma.findBestParam(GamMixtureOptimizer::ONE_DIM, maxIterations, epsilonOptimization, gainLossOptions::_weights);
	170	else errorMsg::reportError("unknown type in gammmaMixtureOptimizerAlgType");
	171	if (currBestL>_bestL) {
	172	sumPijQij = normalizeQ(&sp); //TEST
	173	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	174	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\timprovment in optimize gammaMixture params"<<endl);
	175	_bestL=currBestL;
	176	}
	177	}
	178	// gainLoss ratio
	179	if(gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately && !Parameters::getInt("_keepUserGainLossRatio")){
	180	currBestL = -brent(MINMUM_GAIN_LOSS_RATIO_PARAM , _bestGainLossRatio, MAXIMUM_GAIN_LOSS_RATIO_PARAM, C_evalParam(tr,sp,sc,C_evalParam::gainLossRatio,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currGainLossRatio);
	181	if(gainLossOptions::_isOptimizeParamsWithLogMinMax) currGainLossRatio = pow(10,currGainLossRatio);
	182	if (currBestL>_bestL) {
	183	_bestMu1=sqrt(currGainLossRatio);
	184	_bestMu2=sqrt(1.0/currGainLossRatio);
	185	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setMu1(_bestMu1,isReversible);
	186	static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->setMu2(_bestMu2);
	187	sumPijQij = normalizeQ(&sp); //TEST
	188	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	189	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGainLossRatio= "<<currGainLossRatio<<endl);
	190	_bestGainLossRatio=currGainLossRatio;
	191	_bestL=currBestL;
	192	}
	193	}else if(!Parameters::getInt("_keepUserGainLossRatio")){
	194	// optimization - gain
	195	if(!gainLossOptions::_isSkipGainOptimization){
	196	if(gainLossOptions::_incrementFactorForGain)
	197	currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,min((_bestMu1*incrementFactorForGain),MAXIMUM_GAIN_PARAM),C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currM1);
	198	else if(gainLossOptions::_lossBiggerGainLimit)
	199	currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,min(_bestMu2,MAXIMUM_GAIN_PARAM),C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currM1);
	200	else
	201	currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,MAXIMUM_GAIN_PARAM,C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currM1);
	202	}
	203	if (currBestL>_bestL) {
	204	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setMu1(currM1,isReversible);
	205	sumPijQij = normalizeQ(&sp); //TEST
	206	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	207	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGain= "<<currM1<<endl);
	208	_bestMu1=currM1;
	209	_bestL=currBestL;
	210	}
	211	// optimization - loss
	212	if (!isReversible & !gainLossOptions::_gainLossRateAreFreq){
	213	if(gainLossOptions::_lossBiggerGainLimit) currBestL = -brent(max(_bestMu1,MINIMUM_LOSS_PARAM),_bestMu2,MAXIMUM_LOSS_PARAM,C_evalParam(tr,sp,sc,C_evalParam::loss,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currM2);
	214	else currBestL = -brent(MINIMUM_LOSS_PARAM,_bestMu2,MAXIMUM_LOSS_PARAM,C_evalParam(tr,sp,sc,C_evalParam::loss,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currM2);
	215	if (currBestL>_bestL) {
	216	static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->setMu2(currM2);
	217	sumPijQij = normalizeQ(&sp); //TEST
	218	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	219	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLoss= "<<currM2<<endl);
	220	_bestMu2=currM2;
	221	_bestL=currBestL;
	222	}
	223	}
	224	}
	225	// optimize Beta
	226	if(optimizeBeta && !gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately){
	227	currBestL = -brent(MINIMUM_BETA_PARAM,_bestBeta,MAXIMUM_BETA_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateBeta,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currBeta);
	228	if (currBestL>_bestL) {
	229	setRateBeta(sp.distr(),currBeta);
	230	sumPijQij = normalizeQ(&sp); //TEST
	231	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	232	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tBeta= "<<currBeta<<endl);
	233	_bestBeta=currBeta;
	234	_bestL=currBestL;
	235	}
	236	}
	237	// optimize Alpha - 3 options (all results with same values)
	238	if(optimizeAlpha){
	239	currBestL = -brent(MINIMUM_ALPHA_PARAM,_bestAlpha,MAXIMUM_ALPHA_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateAlpha,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currAlpha);
	240	if (currBestL>_bestL) {
	241	setRateAlpha(sp.distr(),currAlpha);
	242	sumPijQij = normalizeQ(&sp); //TEST
	243	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	244	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tAlpha= "<<currAlpha<<endl);
	245	_bestAlpha=currAlpha;
	246	_bestL=currBestL;
	247	}
	248	}
	249	// optimization - probInvariant
	250	if (probInvariant){
	251	currBestL = -brent(MINIMUM_PROB_PARAM,_bestRateProbInvariant,MAXIMUM_PROB_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateProbInvariant,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currRateProbInvariant);
	252	if (currBestL>_bestL) {
	253	static_cast<generalGammaDistributionPlusInvariant*>(sp.distr())->setInvProb(currRateProbInvariant);
	254	sumPijQij = normalizeQ(&sp); //TEST
	255	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	256	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tRateProbInvariant= "<<currRateProbInvariant<<endl);
	257	_bestRateProbInvariant=currRateProbInvariant;
	258	_bestL=currBestL;
	259	}
	260	}
	261	// optimization - Freq (Theta)
	262	if (!gainLossOptions::_isStartWithTheta && evalTheta && !gainLossOptions::_isRootFreqEQstationary){
	263	currBestL = -brent(MINIMUM_FREQ_PARAM,_bestTheta,MAXIMUM_FREQ_PARAM,C_evalParam(tr,sp,sc,C_evalParam::theta,isReversible,_weightsUniqPatterns,_unObservableData_p),
	264	epsilonOptimization*gainLossOptions::_epsilonOptimizationThetaFactor,&currTheta);
	265	if (currBestL>_bestL) {
	266	static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta);
	267	sumPijQij = normalizeQ(&sp); //TEST
	268	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp);
	269	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tTheta= "<<currTheta<<endl);
	270	_bestTheta=currTheta;
	271	_bestL=currBestL;
	272	//MDOUBLE currentlogL =likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,sp,_weightsUniqPatterns,_unObservableData_p);
	273	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	274	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	275	//}
	276	}
	277	}
	278	//else{
	279	// _bestTheta = _bestMu1/(_bestMu1+_bestMu2);
	280	//}
	281
	282	if (!(_bestL>previousL+epsilonOptimizationIter)) // no significant improvement -> break
	283	{
	284	//if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); // Done after each update, not here
	285	//_bestL=max(_bestL,currBestL); // not to reduce likelihood
	286	LOGnOUT(3,<<" model optimization converged. Iter= "<<iter<<" Likelihood="<<_bestL<<endl);
	287	break;
	288	}
	289	if(gainLossOptions::_simulatedAnnealing)
	290	epsilonOptimization = max(epsilonOptimizationgainLossOptions::_simulatedAnnealingCoolingFactor,0.1gainLossOptions::_simulatedAnnealingMinEpsilonFactor); //simulated annealing
	291	}
	292	if (iter>=numIterations){
	293	_bestL=max(_bestL,currBestL); // not to reduce likelihood
	294	LOGnOUT(3,<<" Too many iterations in optimizeGainLossModel. Iter= "<<iter<<" Last optimized parameters are used. iter="<<iter<<endl);
	295	}
	296	//if(currUnObservableData_p) delete currUnObservableData_p;
	297	}
	298
	299	/********************************************************************************************
	300	*********************************************************************************************/
	301	//bool optimizeGainLossModel::isUpdateGain(const MDOUBLE currBestL, MDOUBLE& currM1, const MDOUBLE lossLikelihoodImprovmet)
	302	//{
	303	// bool isUpdateGain = false;
	304	// if((currBestL-_bestL)>lossLikelihoodImprovmet){
	305	// isUpdateGain =true;
	306	// }
	307	// return isUpdateGain;
	308	//}
	309
	310	/********************************************************************************************
	311	*********************************************************************************************/
	312	//void optimizeGainLossModel::initMissingDataInfo()
	313	//{
	314	// //if(gainLossOptions::_accountForMissingData && (_plogLforMissingData==NULL)){ // plogLforMissingData was't sent but it is needed
	315	// // LOGnOUT(4,<<"----------plogLforMissingData was't sent but it is needed"<<endl);
	316	// // _plogLforMissingData = &_logLforMissingData;
	317	// //}
	318	//}
	319
	320
	321
	322	/********************************************************************************************
	323	*********************************************************************************************/
	324	//optimizeGainLossModel::optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc,
	325	// const bool isReversible, /const bool evalTheta,/
	326	// const MDOUBLE epsilonOptimization, const int numIterations,
	327	// MDOUBLE* plogLforMissingData,
	328	// /const MDOUBLE upperValueOfParam, const MDOUBLE lowerValueOfParam,/
	329	// ostream& out):
	330	//_plogLforMissingData(plogLforMissingData)
	331	//{
	332	// //initMissingDataInfo();
	333	//
	334	// MDOUBLE MINIMUM_ALPHA_PARAM;
	335	// if(gainLossOptions::_isAlphaLimit){
	336	// MINIMUM_ALPHA_PARAM = 0.3;
	337	// }
	338	// else{
	339	// MINIMUM_ALPHA_PARAM = ::MINIMUM_ALPHA_PARAM;
	340	// }
	341	// MDOUBLE MAXIMUM_GAIN_PARAM;
	342	// if(gainLossOptions::_gainLossRateAreFreq){
	343	// MAXIMUM_GAIN_PARAM = 0.9999;
	344	// }
	345	// else{
	346	// MAXIMUM_GAIN_PARAM = ::MAXIMUM_GAIN_PARAM;
	347	// }
	348	//
	349	//
	350	// bool optimizeAlpha = isAlphaOptimization(sp.distr());
	351	// bool optimizeBeta = isBetaOptimization(sp.distr());
	352	// bool optimizeMixture = isMixOptimization(sp.distr());
	353	// bool probInvariant = isInvariantOptimization(sp.distr());
	354	// bool evalTheta = isThetaOptimization();
	355	//
	356	// MDOUBLE previousL;
	357	// MDOUBLE currBestL=VERYSMALL;
	358	// MDOUBLE currM1=0.1;
	359	// MDOUBLE currM2=1; // for non-reversible model only
	360	// MDOUBLE currAlpha=1;
	361	// MDOUBLE currBeta=1;
	362	// MDOUBLE currTheta = 0.5;
	363	// MDOUBLE currRateProbInvariant = 0.05;
	364	// MDOUBLE lossLikelihoodImprovmet = 0;
	365	// MDOUBLE incrementFactorForGain = gainLossOptions::_slopeFactorForGain; // forces slow climb for gain param
	366	// MDOUBLE currLogLforMissingData;
	367	// MDOUBLE* currpLogLforMissingData;
	368	// if(gainLossOptions::_accountForMissingData){
	369	// currpLogLforMissingData = &currLogLforMissingData;
	370	// currpLogLforMissingData = _plogLforMissingData;
	371	// }
	372	// else
	373	// currpLogLforMissingData = NULL;
	374	//
	375	//
	376	//
	377	// if(gainLossOptions::_initParamsAtRandPointsInOptimization){
	378	// currM1 =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_GAIN_PARAM, MAXIMUM_GAIN_PARAM);
	379	// currM2=talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_LOSS_PARAM, MAXIMUM_LOSS_PARAM);
	380	// currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM);
	381	// currBeta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM);
	382	// currTheta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	383	// currRateProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	384	// }
	385	//
	386	//// initialize
	387	// _bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,sp,0,currpLogLforMissingData);
	388	// _bestMu1 = static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->getMu1();
	389	// if (!isReversible){
	390	// _bestMu2 = static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->getMu2(); }
	391	// if(optimizeAlpha){
	392	// _bestAlpha = getRateAlpha(sp.distr()); }
	393	// if(optimizeBeta){
	394	// _bestBeta = getRateBeta(sp.distr()); }
	395	// if(evalTheta){
	396	// _bestTheta = static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->getTheta(); }
	397	// if(probInvariant){
	398	// _bestRateProbInvariant = static_cast<generalGammaDistributionPlusInvariant*>(sp.distr())->getInvProb(); }
	399	//
	400	//// optimize
	401	// LOGnOUT(3,<<"### "<<"optimization starting- 'epsilonOptimization'="<<epsilonOptimization<<" 'numIterations'="<<numIterations<<endl);
	402	// LOGnOUT(3,<<"start optimization with:"<<endl<<" L= "<<_bestL<<" gain= "<<_bestMu1);
	403	// if(!isReversible) LOGnOUT(3,<<" loss= "<<_bestMu2);
	404	// if(optimizeAlpha) LOGnOUT(3,<<" Alpha= "<<_bestAlpha);
	405	// if(optimizeBeta) LOGnOUT(3,<<" Beta= "<<_bestBeta);
	406	// if(optimizeMixture) LOGnOUT(3,<<" ");
	407	// if(evalTheta) LOGnOUT(3,<<" Theta= "<<_bestTheta);
	408	// if(probInvariant) LOGnOUT(3,<<" RateProbInvariant= "<<_bestRateProbInvariant);
	409	// LOGnOUT(3,<<endl);
	410	//
	411	// int iter;
	412	// for (iter=1;iter<=numIterations;iter++){
	413	// previousL = _bestL;
	414	// //bool changed=false;
	415	// LOGnOUT(4,<<"iter="<<iter<<endl);
	416	//// optimization - gain
	417	// if(gainLossOptions::_incrementFactorForGain) currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,min((_bestMu1*incrementFactorForGain),MAXIMUM_GAIN_PARAM),C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,currpLogLforMissingData),epsilonOptimization,&currM1);
	418	// if(gainLossOptions::_lossBiggerGainLimit) currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,min(_bestMu2,MAXIMUM_GAIN_PARAM),C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,currpLogLforMissingData),epsilonOptimization,&currM1);
	419	// else currBestL = -brent(MINIMUM_GAIN_PARAM,_bestMu1,MAXIMUM_GAIN_PARAM,C_evalParam(tr,sp,sc,C_evalParam::gain,isReversible,currpLogLforMissingData),epsilonOptimization,&currM1);
	420	// if (currBestL>_bestL) {
	421	// //lossLikelihoodImprovmet *= requiredPresentOflastLikelihoodImprovmet;
	422	// //if (isUpdateGain(currBestL,currM1,lossLikelihoodImprovmet)) {
	423	// static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setMu1(currM1,isReversible);
	424	// _plogLforMissingData = currpLogLforMissingData;
	425	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGain= "<<currM1<<endl);
	426	// _bestMu1=currM1;
	427	// _bestL=currBestL;
	428	// }
	429	// //if (currBestL>_bestL+epsilonOptimization) {
	430	// ////if (isUpdateGain(currBestL,currM1,lossLikelihoodImprovmet)) {
	431	// // changed=true;
	432	// // _bestL=currBestL;
	433	// //}
	434	//
	435	//// optimization - loss
	436	// if (!isReversible & !gainLossOptions::_gainLossRateAreFreq){
	437	// if(gainLossOptions::_lossBiggerGainLimit) currBestL = -brent(max(_bestMu1,MINIMUM_LOSS_PARAM),_bestMu2,MAXIMUM_LOSS_PARAM,C_evalParam(tr,sp,sc,C_evalParam::loss,isReversible,currpLogLforMissingData),epsilonOptimization,&currM2);
	438	// else currBestL = -brent(MINIMUM_LOSS_PARAM,_bestMu2,MAXIMUM_LOSS_PARAM,C_evalParam(tr,sp,sc,C_evalParam::loss,isReversible,currpLogLforMissingData),epsilonOptimization,&currM2);
	439	// if (currBestL>_bestL) {
	440	// lossLikelihoodImprovmet = currBestL-_bestL;
	441	// static_cast<gainLossModelNonReversible*>(sp.getPijAccelerator()->getReplacementModel())->setMu2(currM2);
	442	// _plogLforMissingData = currpLogLforMissingData;
	443	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLoss= "<<currM2<<endl);
	444	// _bestMu2=currM2;
	445	// _bestL=currBestL;
	446	// }
	447	// //if (currBestL>_bestL+epsilonOptimization) {
	448	// // changed=true;
	449	// // _bestL=currBestL;
	450	// //}
	451	// }
	452	//
	453	//// optimize Alpha - 3 options (all results with same values)
	454	// if(optimizeAlpha){
	455	// currBestL = -brent(MINIMUM_ALPHA_PARAM,_bestAlpha,MAXIMUM_ALPHA_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateAlpha,isReversible,currpLogLforMissingData),epsilonOptimization,&currAlpha);
	456	// if (currBestL>_bestL) {
	457	// //static_cast<gammaDistribution*>(sp.distr())->setAlpha(currAlpha);
	458	// setRateAlpha(sp.distr(),currAlpha);
	459	// _plogLforMissingData = currpLogLforMissingData;
	460	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tAlpha= "<<currAlpha<<endl);
	461	// _bestAlpha=currAlpha;
	462	// _bestL=currBestL;
	463	// }
	464	// //if (currBestL>_bestL+epsilonOptimization) {
	465	// // changed=true;
	466	// // _bestL=currBestL;
	467	// //}
	468	// }
	469	//// optimize Beta
	470	// if(optimizeBeta){
	471	// currBestL = -brent(MINIMUM_BETA_PARAM,_bestBeta,MAXIMUM_BETA_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateBeta,isReversible,currpLogLforMissingData),epsilonOptimization,&currBeta);
	472	// if (currBestL>_bestL) {
	473	// setRateBeta(sp.distr(),currBeta);
	474	// _plogLforMissingData = currpLogLforMissingData;
	475	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tBeta= "<<currBeta<<endl);
	476	// _bestBeta=currBeta;
	477	// _bestL=currBestL;
	478	// }
	479	// //if (currBestL>_bestL+epsilonOptimization) {
	480	// // changed=true;
	481	// // _bestL=currBestL;
	482	// //}
	483	// }
	484	//// optimize mixture
	485	// if(optimizeMixture){
	486	// //Vint pointsNum(1, 1);
	487	// //Vint iterNum(1, 1), const vector<GamMixtureOptimizer::OptimAlg> optAlgs, const Vdouble tols
	488	//
	489	// GamMixtureOptimizer optGamma(&sp, sc, tr);
	490	// if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::EM) currBestL = optGamma.findBestParam(GamMixtureOptimizer::EM, 1, epsilonOptimization, NULL);
	491	// else if(gainLossOptions::_gammmaMixtureOptimizerAlg == gainLossOptions::ONE_DIM) currBestL = optGamma.findBestParam(GamMixtureOptimizer::ONE_DIM, 1, epsilonOptimization, NULL);
	492	// else errorMsg::reportError("unknown type in gammmaMixtureOptimizerAlgType");
	493	//
	494	// if (currBestL>_bestL) {
	495	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\timprovment in optimize gammaMixture params"<<endl);
	496	// _bestL=currBestL;
	497	// }
	498	// //if (currBestL>_bestL+epsilonOptimization) {
	499	// // changed=true;
	500	// // _bestL=currBestL;
	501	// //}
	502	// }
	503	//// optimization - Freq (Theta)
	504	// if (evalTheta){
	505	// currBestL = -brent(MINIMUM_PROB_PARAM,_bestTheta,MAXIMUM_PROB_PARAM,C_evalParam(tr,sp,sc,C_evalParam::theta,isReversible,currpLogLforMissingData),epsilonOptimization,&currTheta);
	506	// if (currBestL>_bestL) {
	507	// static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta);
	508	// _plogLforMissingData = currpLogLforMissingData;
	509	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tTheta= "<<currTheta<<endl);
	510	// _bestTheta=currTheta;
	511	// _bestL=currBestL;
	512	// }
	513	// //if (currBestL>_bestL+epsilonOptimization) {
	514	// // changed=true;
	515	// // _bestL=currBestL;
	516	// //}
	517	// }
	518	//// optimization - Prob
	519	// if (probInvariant){
	520	// currBestL = -brent(MINIMUM_PROB_PARAM,_bestRateProbInvariant,MAXIMUM_PROB_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateProbInvariant,isReversible,currpLogLforMissingData),epsilonOptimization,&currRateProbInvariant);
	521	// if (currBestL>_bestL) {
	522	// static_cast<generalGammaDistributionPlusInvariant*>(sp.distr())->setInvProb(currRateProbInvariant);
	523	// _plogLforMissingData = currpLogLforMissingData;
	524	// LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tRateProbInvariant= "<<currRateProbInvariant<<endl);
	525	// _bestRateProbInvariant=currRateProbInvariant;
	526	// _bestL=currBestL;
	527	// }
	528	// //if (currBestL>_bestL+epsilonOptimization) {
	529	// // changed=true;
	530	// // _bestL=currBestL;
	531	// //}
	532	// }
	533	// if (!(_bestL>previousL+epsilonOptimization)) // no significant improvement -> break
	534	// {
	535	// _bestL=max(_bestL,currBestL); // not to reduce likelihood
	536	// break;
	537	// }
	538	// }
	539	// if (iter>=numIterations){
	540	// _bestL=max(_bestL,currBestL); // not to reduce likelihood
	541	// LOGnOUT(3,<<"WARNING: Too many iterations in optimizeGainLossModel. Last optimized parameters are used. iter="<<iter<<endl);
	542	// }
	543	//}
	544
	545
	546
	547
	548
	549
	550

+163

-0

programs/gainLoss/optimizeGainLossModel.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___OPTIMIZE_GLM
	19	#define ___OPTIMIZE_GLM
	20
	21	#include "bblEM.h"
	22	#include "bestAlpha.h"
	23	#include "computePijComponent.h"
	24	#include "computeUpAlg.h"
	25	#include "definitions.h"
	26	#include "gainLossModel.h"
	27	#include "gammaDistribution.h"
	28	#include "likelihoodComputation.h"
	29	#include "likelihoodComputationGL.h"
	30	#include "numRec.h"
	31	#include "sequenceContainer.h"
	32	#include "stochasticProcess.h"
	33	#include "tree.h"
	34	#include "talRandom.h"
	35	#include "gainLossUtils.h"
	36	#include "gainLossOptions.h"
	37	#include "unObservableData.h"
	38	#include "GamMixtureOptimizer.h"
	39	#include "gammaDistributionFixedCategories.h"
	40	#include "generalGammaDistributionPlusInvariant.h"
	41	#include "mixtureDistribution.h"
	42	#include "gammaUtilities.h"
	43	#include "gainLossOptions.h"
	44
	45
	46
	47	class optimizeGainLossModel {
	48	public:
	49	//explicit optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc,
	50	// const bool isReversible =false, /const bool evalTheta =true,/
	51	// const MDOUBLE epsilonOptimization =0.1, const int numIterations =10,
	52	// MDOUBLE* logLforMissingData =NULL,
	53	// ostream& out=cout);
	54	explicit optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc,
	55	const bool isReversible =false, /const bool evalTheta =true,/
	56	MDOUBLE epsilonOptimization =0.1, const int numIterations =10,
	57	Vdouble* weights = NULL,
	58	unObservableData* unObservableData_p=NULL);
	59
	60
	61	//bool isUpdateGain(const MDOUBLE currBestL, MDOUBLE& currM1, const MDOUBLE lossLikelihoodImprovmet);
	62	MDOUBLE getBestMu1() {return _bestMu1;}
	63	MDOUBLE getBestMu2() {return _bestMu2;}
	64	MDOUBLE getBestTheta() {return _bestTheta;}
	65	MDOUBLE getBestAlpha() {return _bestAlpha;}
	66	MDOUBLE getBestBeta() {return _bestBeta;}
	67	MDOUBLE getBestRateProbInvariant() {return _bestRateProbInvariant;}
	68	MDOUBLE getBestL() {return _bestL;}
	69	//void initMissingDataInfo();
	70
	71	//MDOUBLE* startingBestAlphaFixedTree(tree& tr,sequenceContainer& sc,stochasticProcess& sp);
	72
	73	private:
	74	MDOUBLE _bestMu1;
	75	MDOUBLE _bestMu2; // for non-reversible model only
	76	MDOUBLE _bestGainLossRatio;
	77	MDOUBLE _bestAlpha;
	78	MDOUBLE _bestBeta;
	79	MDOUBLE _bestTheta;
	80	MDOUBLE _bestRateProbInvariant;
	81	MDOUBLE _bestL;
	82	////MDOUBLE _logLforMissingData;
	83	//MDOUBLE* _plogLforMissingData;
	84	//Vdouble* _pLforMissingDataPerCat;
	85	unObservableData* _unObservableData_p;
	86	Vdouble* _weightsUniqPatterns;
	87	};
	88
	89	/********************************************************************************************
	90	*********************************************************************************************/
	91	/********************************************************************************************
	92	*********************************************************************************************/
	93	class C_evalParam{
	94	public:
	95	C_evalParam(const tree& tr,
	96	const stochasticProcess& sp, const sequenceContainer &sc, int which_mu, bool isReversible,Vdouble* weights, const unObservableData* unObservableData_p)
	97	: _tr(tr),/_sp(sp),/ _sc(sc),_which_param(which_mu),_isReversible(isReversible),_weights(weights)
	98	{
	99
	100	_sp = sp.clone(); // the original sp is not effected
	101	if(unObservableData_p)
	102	_unObservableData_p = unObservableData_p->clone();
	103	else
	104	_unObservableData_p = NULL;
	105	//unObservableData currUnObs(*unObservableData_p);
	106
	107	//_weights = gainLossOptions::_weights;
	108
	109	//if(gainLossOptions::_accountForMissingData){ // plogLforMissingData is not sent but it is needed (the change is local)
	110	// _plogLforMissingData = &_logLforMissingData;
	111	//}
	112	//else{
	113	// _plogLforMissingData = NULL;
	114	//}
	115	if ((_which_param>6) \|\| (_which_param<0))
	116	errorMsg::reportError("Error in C_evalParam, error at _which_param");
	117	};
	118	virtual ~C_evalParam(){
	119	if(_sp) delete _sp;
	120	if(_unObservableData_p) delete _unObservableData_p;
	121	}
	122
	123	private:
	124	const tree& _tr;
	125	stochasticProcess* _sp;
	126	const sequenceContainer &_sc;
	127	int _which_param;
	128	bool _isReversible;
	129	unObservableData* _unObservableData_p;
	130	Vdouble* _weights;
	131
	132	public:
	133	enum paramName {gain,loss,rateAlpha,rateBeta,theta,rateProbInvariant,gainLossRatio};
	134
	135	MDOUBLE operator() (MDOUBLE param) {
	136	MDOUBLE sumPijQij = 1.0;
	137	switch (_which_param) {
	138	case (C_evalParam::gain) : static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->setMu1(param,_isReversible); break;
	139	case (C_evalParam::loss) : static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->setMu2(param); break;
	140	case (C_evalParam::rateAlpha) : setRateAlpha(_sp->distr(),param); break;
	141	case (C_evalParam::rateBeta) : setRateBeta(_sp->distr(),param); break;
	142	case (C_evalParam::theta) : (static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel()))->setTheta(param); break;
	143	case (C_evalParam::rateProbInvariant) : static_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())->setInvProb(param); break;
	144	case (C_evalParam::gainLossRatio) :
	145	if(gainLossOptions::_isOptimizeParamsWithLogMinMax) param = pow(10,param);
	146	static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel())->setMu1(sqrt(param),_isReversible);
	147	static_cast<gainLossModelNonReversible*>(_sp->getPijAccelerator()->getReplacementModel())->setMu2( sqrt(1.0/param) );
	148	//norm_factor = normalizeQ(_sp);
	149	break;
	150	}
	151	sumPijQij = normalizeQ(_sp);
	152	if(_unObservableData_p){ _unObservableData_p->setLforMissingData(_tr,_sp); }
	153	MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weights,_unObservableData_p);
	154	(static_cast<gainLossModel*>(_sp->getPijAccelerator()->getReplacementModel()))->norm( sumPijQij ); // reverse the normalization after likelihood computation.
	155	LOG(5,<<"for _which_param "<<_which_param<<" with val = "<<param<<" logL = "<<res<<endl);
	156	return -res;
	157	}
	158	};
	159
	160
	161
	162	#endif

+460

-0

programs/gainLoss/optimizeGainLossModelVV.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "optimizeGainLossModelVV.h"
	17	#include "gainLossUtils.h"
	18	#include "gainLossOptions.h"
	19	#include "Parameters.h"
	20
	21	/********************************************************************************************
	22	optimizeGainLossModel - for gain,Loss ~ Gamma(Alpha,Beta)
	23	*********************************************************************************************/
	24	optimizeGainLossModelVV::optimizeGainLossModelVV(const tree& tr,
	25	vector<vector<stochasticProcess*> >& spVVec, const sequenceContainer &sc,
	26	distribution * gainDist, distribution * lossDist,
	27	const bool isReversible,
	28	MDOUBLE epsilonOptimization, const int numIterations,
	29	Vdouble* weights,
	30	unObservableData* unObservableData_p):
	31	_weightsUniqPatterns(weights),_unObservableData_p(unObservableData_p)
	32	{
	33	MDOUBLE MINIMUM_ALPHA_PARAM;
	34	if(gainLossOptions::_isAlphaLimit){
	35	MINIMUM_ALPHA_PARAM = 0.1;
	36	}
	37	else{
	38	MINIMUM_ALPHA_PARAM = ::MINIMUM_ALPHA_PARAM;
	39	}
	40	bool isAllowHigherAlpha = false; // for distribution more 'gaussian' and Eq, need higher alpha, else 10.0
	41	MDOUBLE MAXIMUM_ALPHA_PARAM;
	42	if(isAllowHigherAlpha){
	43	MAXIMUM_ALPHA_PARAM = 100;
	44	}
	45	else{
	46	MAXIMUM_ALPHA_PARAM = ::MAXIMUM_ALPHA_PARAM;
	47	}
	48	MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM;
	49	MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM;
	50	if(gainLossOptions::_isOptimizeParamsWithLogMinMax){
	51	MINMUM_GAIN_LOSS_RATIO_PARAM = log10(::MINMUM_GAIN_LOSS_RATIO_PARAM);
	52	MAXIMUM_GAIN_LOSS_RATIO_PARAM = log10(::MAXIMUM_GAIN_LOSS_RATIO_PARAM);
	53	}else{
	54	MINMUM_GAIN_LOSS_RATIO_PARAM = ::MINMUM_GAIN_LOSS_RATIO_PARAM;
	55	MAXIMUM_GAIN_LOSS_RATIO_PARAM = ::MAXIMUM_GAIN_LOSS_RATIO_PARAM;
	56	}
	57
	58
	59	stochasticProcess sp = *spVVec[0][0];
	60
	61	bool optimizeBetaGain = isBetaOptimization(gainDist);
	62	bool optimizeBetaLoss = isBetaOptimization(lossDist);
	63	bool optimizeAlphasGainLoss = true;
	64	if(gainLossOptions::_optimizationLevel<=2){ // Vlow and below
	65	optimizeAlphasGainLoss = false;
	66	LOGnOUT(4,<<"No optimization of rate shape (Alphas) in low optimization level"<<endl);
	67	}
	68	bool optimizeGLProbInvariant = isInvariantOptimization(gainDist); // for both gain and loss
	69
	70	bool optimizeRateAlpha = isAlphaOptimization((sp.distr()));
	71	bool optimizeRateProbInvariant = isInvariantOptimization((sp.distr()));
	72	bool evalTheta = isThetaOptimization();
	73
	74	MDOUBLE currBestL,currGainAlpha,currGainBeta,currGainProbInvariant,currLossAlpha,currLossBeta,currLossProbInvariant,currRateAlpha,currRateProbInvariant,currTheta,previousL,currGainLossRatio;
	75	MDOUBLE sumPijQij;
	76	//distribution* gainDistPrev=gainDist->clone();
	77	//distribution* lossDistPrev=lossDist->clone();
	78	//vector<vector<stochasticProcess*> > spVVecPrev;
	79	//spVVecPrev.resize(_gainDist->categories());
	80	//for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	81	// _spVVec[gainCategor].resize(_lossDist->categories());
	82	// for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	83	// spVVecPrev[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone();
	84	// }
	85	//}
	86	//unObservableData* unObservableData_pPrev;
	87	//if(unObservableData_p)
	88	// unObservableData_pPrev = unObservableData_p->clone();
	89	//else
	90	// unObservableData_pPrev = NULL;
	91
	92	//Random Starts
	93	//unObservableData* currUnObservableData_p;
	94	//if(gainLossOptions::_accountForMissingData){
	95	// currUnObservableData_p = new unObservableData(sc, &sp, gainLossAlphabet(),gainLossOptions::_minNumOfOnes);
	96	// currUnObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	97	//}
	98	//else{
	99	// currUnObservableData_p = NULL;
	100	//}
	101	if(gainLossOptions::_initParamsAtRandPointsInOptimization){
	102	currGainAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM);
	103	currGainBeta=talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM);
	104	currGainProbInvariant = talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	105	currLossAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM);
	106	currLossBeta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM);
	107	currLossProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	108
	109	currRateProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM);
	110	currRateAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM);
	111	currTheta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_FREQ_PARAM, MAXIMUM_FREQ_PARAM);
	112	}
	113	else{
	114	currBestL=VERYSMALL;
	115	currGainAlpha=1; //Gain
	116	currGainBeta=1;
	117	currGainProbInvariant = 0.1;
	118	currLossAlpha=1; // Loss (for non-reversible model only)
	119	currLossBeta=1;
	120	currLossProbInvariant = 0.1;
	121
	122	currRateAlpha=1; //Rate
	123	currRateProbInvariant = 0.1;
	124	currTheta = 0.5;
	125	currGainLossRatio = 1;
	126	}
	127
	128	int numberOfParameters = 1;
	129	// initialize
	130	// Gain
	131	_bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	132	if(optimizeGLProbInvariant) {
	133	_bestGainProbInvariant = static_cast<generalGammaDistributionPlusInvariant*>(gainDist)->getInvProb();
	134	++numberOfParameters;
	135	}
	136	//_bestGainAlpha = static_cast<generalGammaDistribution*>(gainDist)->getAlpha();
	137	_bestGainAlpha = getRateAlpha(gainDist);
	138
	139	//if(optimizeBetaGain) _bestGainBeta = static_cast<generalGammaDistribution*>(gainDist)->getBeta();
	140	if(optimizeBetaGain) {
	141	_bestGainBeta = getRateBeta(gainDist);
	142	++numberOfParameters;
	143	}
	144	// Loss
	145	if (!isReversible){
	146	if(optimizeGLProbInvariant) {
	147	_bestLossProbInvariant = static_cast<generalGammaDistributionPlusInvariant*>(lossDist)->getInvProb();
	148	++numberOfParameters;
	149	}
	150	//_bestLossAlpha = static_cast<generalGammaDistribution*>(lossDist)->getAlpha();
	151	//if(optimizeBetaLoss) _bestLossBeta = static_cast<generalGammaDistribution*>(lossDist)->getBeta();
	152	_bestLossAlpha = getRateAlpha(lossDist);
	153	if(optimizeBetaLoss){
	154	_bestLossBeta = getRateBeta(lossDist);
	155	++numberOfParameters;
	156	}
	157	}
	158	// overall rate
	159	if(optimizeRateAlpha){
	160	_bestRateAlpha = getRateAlpha(static_cast<gammaDistribution*>(sp.distr()));
	161	++numberOfParameters;
	162	}
	163	if(optimizeRateProbInvariant){
	164	_bestRateProbInvariant = static_cast<generalGammaDistributionPlusInvariant*>((sp.distr()))->getInvProb();
	165	++numberOfParameters; }
	166
	167	if(evalTheta){
	168	++numberOfParameters;
	169	}
	170	_bestTheta = static_cast<gainLossModel*>(sp.getPijAccelerator()->getReplacementModel())->getTheta(); // taken either way
	171	_bestGainLossRatio = computeExpOfGainByExpOfLossRatio(gainDist, lossDist); //(_bestGainAlpha/_bestGainBeta)/(_bestLossAlpha/_bestLossBeta);
	172	MDOUBLE epsilonOptimizationIterFactor = numberOfParameters;
	173	epsilonOptimizationIterFactor = max(3.0,epsilonOptimizationIterFactor);
	174	MDOUBLE epsilonOptimizationIter = epsilonOptimization*epsilonOptimizationIterFactor; // for e=0.1 next iteration only for ~0.5 logL points
	175
	176	// optimize
	177	LOGnOUT(3,<<"### "<<"optimization starting- epsilonOptParam="<<epsilonOptimization<<" epsilonOptIter= "<<epsilonOptimizationIter<<", MaxNumIterations="<<numIterations<<endl);
	178	LOGnOUT(3,<<"start optimization with:" <<endl<<" L= "<<_bestL<<endl
	179	<<" gainLossRatio= "<<_bestGainLossRatio<<endl
	180	<<" GainAlpha= "<<_bestGainAlpha<<" GainBeta= "<<_bestGainBeta<<endl
	181	<<" LossAlpha= "<<_bestLossAlpha<<" LossBeta= "<<_bestLossBeta<<endl);
	182	if(evalTheta) LOGnOUT(3,<<" Theta= "<<_bestTheta);
	183	if(optimizeGLProbInvariant) LOGnOUT(3,<<" GainProbInvariant= "<<_bestGainProbInvariant<<" LossProbInvariant= "<<_bestLossProbInvariant<<endl);
	184	if(optimizeRateAlpha) LOGnOUT(3,<<" RateAlpha= "<<_bestRateAlpha<<endl);
	185	if(optimizeRateProbInvariant) LOGnOUT(3,<<" RateProbInvariant= "<<_bestRateProbInvariant<<endl);
	186
	187	int iter;
	188	for (iter=1;iter<=numIterations;iter++)
	189	{
	190	previousL = _bestL; // before loop likelihood
	191	LOGnOUT(4,<<"\n---- iter="<<iter<<endl);
	192	//bool isOptimizeModelParametersInRandomOrderNoReturns = true;
	193	//int numOfParameters = 9;
	194	//Vint paramsAlreadyOptimizedV;
	195	//int curParam;
	196	//for(int parInd=1; parInd<=numOfParameters; ++parInd){
	197	// bool isParamAlreadyOptimized = true;
	198	// if(isOptimizeModelParametersInRandomOrderNoReturns){
	199	// while (isParamAlreadyOptimized) {
	200	// curParam = floor(talRandom::giveRandomNumberBetweenTwoPoints(1,numOfParameters+1));
	201	// int::iterator begin = paramsAlreadyOptimizedV.begin();
	202	// int::iterator end = paramsAlreadyOptimizedV.end();
	203
	204
	205	// if(! paramsAlreadyOptimizedV.find(begin,end,curParam)){
	206	// isParamAlreadyOptimized = false;
	207	// }
	208	// }
	209
	210	// }
	211	// else if (gainLossOptions::_isStartWithTheta) {
	212	// curParam = C_evalParamVV::theta;
	213
	214	// }
	215	// else{
	216	// curParam = parInd;
	217	// }
	218	//}
	219
	220	// optimization - Freq (Theta)
	221	if (gainLossOptions::_isStartWithTheta && evalTheta && !gainLossOptions::_isRootFreqEQstationary){
	222	currBestL = -brent(MINIMUM_FREQ_PARAM,_bestTheta,MAXIMUM_FREQ_PARAM,
	223	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::theta, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),
	224	epsilonOptimization*gainLossOptions::_epsilonOptimizationThetaFactor,&currTheta);
	225	if (currBestL>_bestL) {
	226	updateTheta(currTheta,spVVec,gainDist,lossDist);
	227	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	228	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	229	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tTheta="<<currTheta<<endl);
	230	_bestTheta=currTheta;
	231	_bestL=currBestL;
	232	}
	233	}
	234	// gainLoss ratio
	235	if(gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately && !Parameters::getInt("_keepUserGainLossRatio")){
	236	currBestL = -brent(MINMUM_GAIN_LOSS_RATIO_PARAM,_bestGainLossRatio,MAXIMUM_GAIN_LOSS_RATIO_PARAM,
	237	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::gainLossRatio, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currGainLossRatio);
	238	if(gainLossOptions::_isOptimizeParamsWithLogMinMax) currGainLossRatio = pow(10,currGainLossRatio);
	239	if (currBestL>_bestL) {
	240	MDOUBLE gainLossRatioToCompleteByBeta = currGainLossRatio * (getRateAlpha(lossDist)/getRateAlpha(gainDist));
	241	if(gainLossOptions::_isUpdateOnlyGainBetaForRatio){
	242	currGainBeta = (getRateBeta(lossDist)/gainLossRatioToCompleteByBeta);
	243	updateGainBeta(currGainBeta,spVVec,gainDist,lossDist);
	244	}else{
	245	currGainBeta = sqrt(1.0/gainLossRatioToCompleteByBeta);
	246	currLossBeta = sqrt(gainLossRatioToCompleteByBeta);
	247	updateGainBeta(currGainBeta,spVVec,gainDist,lossDist);
	248	updateLossBeta(currLossBeta,spVVec,gainDist,lossDist);
	249	}
	250	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	251	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	252	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGainLossRatio="<<currGainLossRatio<<endl);
	253	_bestGainLossRatio=currGainLossRatio;
	254	_bestGainBeta=currGainBeta;
	255	_bestLossBeta=currLossBeta;
	256	_bestL=currBestL;
	257	MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	258	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	259	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	260	//}
	261
	262	}
	263	}else{
	264	// optimization - GainBeta
	265	if(optimizeBetaGain && !Parameters::getInt("_keepUserGainLossRatio")){
	266	currBestL = -brent(MINIMUM_BETA_PARAM,_bestGainBeta,MAXIMUM_BETA_PARAM,
	267	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::gainBeta, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currGainBeta);
	268	if (currBestL>_bestL) {
	269	updateGainBeta(currGainBeta,spVVec,gainDist,lossDist);
	270	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	271	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	272	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGainBeta="<<currGainBeta<<endl);
	273	_bestGainBeta=currGainBeta;
	274	_bestL=currBestL;
	275	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	276	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	277	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	278	//}
	279	}
	280	}
	281	// optimization - LossBeta
	282	if(optimizeBetaLoss && !Parameters::getInt("_keepUserGainLossRatio")){
	283	currBestL = -brent(MINIMUM_BETA_PARAM,_bestLossBeta,MAXIMUM_BETA_PARAM,
	284	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::lossBeta, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currLossBeta);
	285	if (currBestL>_bestL) {
	286	updateLossBeta(currLossBeta,spVVec,gainDist,lossDist);
	287	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	288	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	289	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLossBeta="<<currLossBeta<<endl);
	290	_bestLossBeta=currLossBeta;
	291	_bestL=currBestL;
	292	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	293	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	294	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	295	//}
	296	}
	297	}
	298	}
	299
	300	// optimization - GainAlpha
	301	if(optimizeAlphasGainLoss){
	302	currBestL = -brent(MINIMUM_ALPHA_PARAM,_bestGainAlpha,MAXIMUM_ALPHA_PARAM,
	303	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::gainAlpha, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currGainAlpha);
	304	if (currBestL>_bestL) {
	305	if(1){ // keep gainLossRatio
	306	MDOUBLE previousAlpha = getRateAlpha(gainDist);
	307	MDOUBLE increaseToGainLossRatioInducedByAlphaModification = currGainAlpha/previousAlpha;
	308	currGainBeta = getRateBeta(gainDist)*increaseToGainLossRatioInducedByAlphaModification;
	309	updateGainBeta( currGainBeta, spVVec,gainDist,lossDist);
	310	_bestGainBeta = currGainBeta;
	311	}
	312	updateGainAlpha(currGainAlpha,spVVec,gainDist,lossDist);
	313	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	314	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	315	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGainAlpha="<<currGainAlpha<<endl);
	316	_bestGainAlpha=currGainAlpha;
	317	_bestL=currBestL;
	318	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	319	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	320	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	321	//}
	322	}
	323	}
	324	// optimization - GainProbInvariant
	325	if(optimizeGLProbInvariant && !Parameters::getInt("_keepUserGainLossRatio")){
	326	currBestL = -brent(MINIMUM_PROB_PARAM,_bestGainProbInvariant,MAXIMUM_PROB_PARAM,
	327	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::gainProbInvariant, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currGainProbInvariant);
	328	if (currBestL>_bestL) {
	329	updateGainProbInvariant(currGainProbInvariant,gainDist);
	330	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	331	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	332	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tGainProbInvariant="<<currGainProbInvariant<<endl);
	333	_bestGainProbInvariant=currGainProbInvariant;
	334	_bestL=currBestL;
	335	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	336	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	337	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	338	//}
	339	}
	340	}
	341	// optimization - LossAlpha
	342	if (!isReversible ){
	343	if(optimizeAlphasGainLoss){
	344	currBestL = -brent(MINIMUM_ALPHA_PARAM,_bestLossAlpha,MAXIMUM_ALPHA_PARAM,
	345	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::lossAlpha, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currLossAlpha);
	346	if (currBestL>_bestL) {
	347	if(1){ // keep gainLossRatio
	348	MDOUBLE previousAlpha = getRateAlpha(lossDist);
	349	MDOUBLE increaseToGainLossRatioInducedByAlphaModification = currLossAlpha/previousAlpha;
	350	currLossBeta = getRateBeta(lossDist)*increaseToGainLossRatioInducedByAlphaModification;
	351	updateLossBeta( currLossBeta, spVVec,gainDist,lossDist);
	352	_bestLossBeta = currLossBeta;
	353	}
	354	updateLossAlpha(currLossAlpha,spVVec,gainDist,lossDist);
	355	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	356	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	357	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLossAlpha="<<currLossAlpha<<endl);
	358	_bestLossAlpha=currLossAlpha;
	359	_bestL=currBestL;
	360	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	361	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	362	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	363	//}
	364	}
	365	}
	366	// optimization - LossprobInvariant
	367	if(optimizeGLProbInvariant && !Parameters::getInt("_keepUserGainLossRatio"))
	368	{
	369	currBestL = -brent(MINIMUM_PROB_PARAM,_bestLossProbInvariant,MAXIMUM_PROB_PARAM,
	370	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::lossProbInvariant, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currLossProbInvariant);
	371	if (currBestL>_bestL) {
	372	updateLossProbInvariant(currLossProbInvariant,lossDist);
	373	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	374	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	375	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLossProbInvariant="<<currLossProbInvariant<<endl);
	376	_bestLossProbInvariant=currLossProbInvariant;
	377	_bestL=currBestL;
	378	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	379	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	380	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	381	//}
	382	}
	383	}
	384	}
	385	// optimize rateAlpha - additionally (inner sp)...
	386	if(optimizeRateAlpha){
	387	currBestL = -brent(MINIMUM_ALPHA_PARAM,_bestRateAlpha,MAXIMUM_ALPHA_PARAM,
	388	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::rateAlpha, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currRateAlpha);
	389	if (currBestL>_bestL) {
	390	updateRateAlpha(currRateAlpha,spVVec,gainDist,lossDist);
	391	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	392	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	393	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tRate Alpha="<<currRateAlpha<<endl);
	394	_bestRateAlpha=currRateAlpha;
	395	_bestL=currBestL;
	396	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	397	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	398	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	399	//}
	400	}
	401	}
	402	// optimization - RateprobInvariant
	403	if(optimizeRateProbInvariant){
	404	currBestL = -brent(MINIMUM_PROB_PARAM,_bestRateProbInvariant,MAXIMUM_PROB_PARAM,
	405	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::rateProbInvariant, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),epsilonOptimization,&currRateProbInvariant);
	406	if (currBestL>_bestL) {
	407	updateRateProbInvariant(currRateProbInvariant,spVVec,gainDist,lossDist);
	408	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	409	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	410	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tLossProbInvariant="<<currRateProbInvariant<<endl);
	411	_bestRateProbInvariant=currRateProbInvariant;
	412	_bestL=currBestL;
	413	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	414	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	415	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	416	//}
	417	}
	418	}
	419
	420	// optimization - Freq (Theta)
	421	if (!gainLossOptions::_isStartWithTheta && evalTheta && !gainLossOptions::_isRootFreqEQstationary){
	422	currBestL = -brent(MINIMUM_FREQ_PARAM,_bestTheta,MAXIMUM_FREQ_PARAM,
	423	C_evalParamVV(tr,spVVec,sc,C_evalParamVV::theta, gainDist,lossDist,isReversible,_weightsUniqPatterns,_unObservableData_p),
	424	epsilonOptimization*gainLossOptions::_epsilonOptimizationThetaFactor,&currTheta);
	425	if (currBestL>_bestL) {
	426	updateTheta(currTheta,spVVec,gainDist,lossDist);
	427	sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST
	428	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist);
	429	LOGnOUT(4,<<"currBestL= "<<currBestL<<"\tTheta="<<currTheta<<endl);
	430	_bestTheta=currTheta;
	431	_bestL=currBestL;
	432	//MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p);
	433	//if(!DEQUAL(currentlogL,_bestL)){ //DEQUAL(currentlogL,bestL)
	434	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currentlogL-_bestL <<"\n");
	435	//}
	436	}
	437	}
	438	if (!(_bestL>previousL+epsilonOptimizationIter)){ // previousL is before loop likelihood - if no epsilon improvment => break
	439	if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); //not clear needed...
	440	LOGnOUT(3,<<" model optimization converged. Iter= "<<iter<<" Likelihood="<<_bestL<<endl);
	441	_bestL=max(_bestL,currBestL); // not to reduce likelihood. currBestL, returning from brent may be lower
	442	//if(!DEQUAL(currBestL,_bestL)){ //DEQUAL(currentlogL,bestL)
	443	// LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<<currBestL-_bestL <<"\n");
	444	//}
	445	break;
	446	}
	447	if(gainLossOptions::_simulatedAnnealing)
	448	epsilonOptimization = max(epsilonOptimizationgainLossOptions::_simulatedAnnealingCoolingFactor,0.1gainLossOptions::_simulatedAnnealingMinEpsilonFactor); //simulated annealing
	449	}
	450	if (iter>=numIterations){
	451	_bestL=max(_bestL,currBestL); // not to reduce likelihood. currBestL, returning from brent may be lower
	452	LOGnOUT(3,<<" Too many iterations in optimizeGainLossModelVV. Iter= "<<iter<< " Last optimized parameters are used."<<endl);
	453	}
	454	//if(currUnObservableData_p) delete currUnObservableData_p;
	455	}
	456
	457
	458
	459

+197

-0

programs/gainLoss/optimizeGainLossModelVV.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___OPTIMIZE_GLM_VV
	19	#define ___OPTIMIZE_GLM_VV
	20
	21	#include "bblEM.h"
	22	#include "bestAlpha.h"
	23	#include "computePijComponent.h"
	24	#include "computeUpAlg.h"
	25	#include "definitions.h"
	26	#include "gainLossModel.h"
	27	#include "gammaDistribution.h"
	28	#include "generalGammaDistribution.h"
	29	#include "generalGammaDistributionPlusInvariant.h"
	30	#include "distributionPlusInvariant.h"
	31	#include "likelihoodComputation.h"
	32	#include "likelihoodComputationGL.h"
	33	#include "numRec.h"
	34	#include "sequenceContainer.h"
	35	#include "stochasticProcess.h"
	36	#include "tree.h"
	37	#include "talRandom.h"
	38	#include "gainLossUtils.h"
	39	#include "unObservableData.h"
	40
	41
	42	class optimizeGainLossModelVV {
	43	public:
	44	explicit optimizeGainLossModelVV(const tree& tr,
	45	vector<vector<stochasticProcess*> >& spVVec, const sequenceContainer &sc,
	46	distribution * gainDist, distribution * lossDist,
	47	const bool isReversible,
	48	MDOUBLE epsilonOptimization, const int numIterations,
	49	Vdouble* weights,
	50	unObservableData* unObservableData_p);
	51
	52	MDOUBLE getBestGainAlpha() {return _bestGainAlpha;}
	53	MDOUBLE getBestGainBeta() {return _bestGainBeta;}
	54	MDOUBLE getBestGainProbInvariant() {return _bestGainProbInvariant;}
	55
	56	MDOUBLE getBestLossAlpha() {return _bestLossAlpha;}
	57	MDOUBLE getBestLossBeta() {return _bestLossBeta;}
	58	MDOUBLE getBestLossProbInvariant() {return _bestLossProbInvariant;}
	59
	60	MDOUBLE getBestTheta() {return _bestTheta;}
	61	MDOUBLE getBestRateAlpha() {return _bestRateAlpha;}
	62	MDOUBLE getBestRateProbInvariant() {return _bestRateProbInvariant;}
	63
	64	MDOUBLE getBestL() {return _bestL;}
	65
	66	private:
	67	MDOUBLE _bestGainAlpha;
	68	MDOUBLE _bestGainBeta;
	69	MDOUBLE _bestGainProbInvariant;
	70
	71	MDOUBLE _bestLossAlpha; // for non-reversible model only
	72	MDOUBLE _bestLossBeta;
	73	MDOUBLE _bestLossProbInvariant;
	74
	75	MDOUBLE _bestRateAlpha;
	76	MDOUBLE _bestRateProbInvariant;
	77	MDOUBLE _bestTheta;
	78	MDOUBLE _bestL;
	79	MDOUBLE _bestGainLossRatio;
	80	unObservableData* _unObservableData_p;
	81	Vdouble* _weightsUniqPatterns;
	82	};
	83
	84	/********************************************************************************************
	85	*********************************************************************************************/
	86	/********************************************************************************************
	87	*********************************************************************************************/
	88	class C_evalParamVV {
	89	public:
	90	C_evalParamVV(const tree& tr,
	91	const vector<vector<stochasticProcess*> >& spVVec, const sequenceContainer &sc, int which_mu,
	92	const distribution* gainDist, const distribution* lossDist,
	93	bool isReversible,Vdouble* weights , const unObservableData* unObservableData_p)
	94	: _tr(tr),_sc(sc),_which_param(which_mu),_isReversible(isReversible),_weights(weights)
	95	{
	96	_gainDist=gainDist->clone();
	97	_lossDist=lossDist->clone();
	98	_spVVec.resize(_gainDist->categories());
	99	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	100	_spVVec[gainCategor].resize(_lossDist->categories());
	101	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	102	_spVVec[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone();
	103	}
	104	}
	105	if(unObservableData_p)
	106	_unObservableData_p = unObservableData_p->clone();
	107	else
	108	_unObservableData_p = NULL;
	109
	110	};
	111	virtual ~C_evalParamVV(){
	112	if(_spVVec[0][0]){
	113	for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){
	114	for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){
	115	delete _spVVec[gainCategor][lossCategor];
	116	}
	117	}
	118	}
	119	if(_gainDist)
	120	delete _gainDist;
	121	if(_lossDist)
	122	delete _lossDist;
	123	if(_unObservableData_p)
	124	delete _unObservableData_p;
	125
	126	}
	127	private:
	128	const tree& _tr;
	129	vector<vector<stochasticProcess*> > _spVVec;
	130	distribution* _gainDist;
	131	distribution* _lossDist;
	132	const sequenceContainer &_sc;
	133	int _which_param;
	134	bool _isReversible;
	135	unObservableData* _unObservableData_p;
	136	Vdouble* _weights;
	137
	138	public:
	139	enum paramName {gainAlpha,gainBeta,gainProbInvariant,lossAlpha,lossBeta,lossProbInvariant,rateAlpha,rateProbInvariant,theta,gainLossRatio};
	140
	141	MDOUBLE operator() (MDOUBLE param) {
	142	MDOUBLE gainLossRatioToCompleteByBeta = 1;
	143	MDOUBLE sumPijQij = 1;
	144	MDOUBLE previousAlpha = 1;
	145	MDOUBLE increaseToGainLossRatioInducedByAlphaModification = 1;
	146
	147	switch (_which_param) {
	148	case (C_evalParamVV::gainAlpha) :
	149	if(1){ // keep gainLossRatio
	150	previousAlpha = getRateAlpha(_gainDist);
	151	increaseToGainLossRatioInducedByAlphaModification = param/previousAlpha;
	152	updateGainBeta(getRateBeta(_gainDist) * increaseToGainLossRatioInducedByAlphaModification,_spVVec,_gainDist,_lossDist);
	153	}
	154	updateGainAlpha(param,_spVVec,_gainDist,_lossDist);
	155	break;
	156	case (C_evalParamVV::gainBeta) : updateGainBeta(param,_spVVec,_gainDist,_lossDist); break;
	157	case (C_evalParamVV::gainProbInvariant) : updateGainProbInvariant(param,_gainDist); break;
	158
	159	case (C_evalParamVV::lossAlpha) :
	160	if(1){ // keep gainLossRatio
	161	previousAlpha = getRateAlpha(_lossDist);
	162	increaseToGainLossRatioInducedByAlphaModification = param/previousAlpha;
	163	updateLossBeta(getRateBeta(_lossDist) * increaseToGainLossRatioInducedByAlphaModification,_spVVec,_gainDist,_lossDist);
	164	}
	165	updateLossAlpha(param,_spVVec,_gainDist,_lossDist);
	166	break;
	167	case (C_evalParamVV::lossBeta) : updateLossBeta(param,_spVVec,_gainDist,_lossDist); break;
	168	case (C_evalParamVV::lossProbInvariant) : updateLossProbInvariant(param,_lossDist); break;
	169
	170	case (C_evalParamVV::gainLossRatio) :
	171	if(gainLossOptions::_isOptimizeParamsWithLogMinMax) param = pow(10,param);
	172	gainLossRatioToCompleteByBeta = param * (getRateAlpha(_lossDist)/getRateAlpha(_gainDist));
	173	if(gainLossOptions::_isUpdateOnlyGainBetaForRatio)
	174	updateGainBeta(getRateBeta(_lossDist)/gainLossRatioToCompleteByBeta,_spVVec,_gainDist,_lossDist);
	175	else{
	176	updateGainBeta(sqrt(1.0/gainLossRatioToCompleteByBeta),_spVVec,_gainDist,_lossDist);
	177	updateLossBeta(sqrt(gainLossRatioToCompleteByBeta),_spVVec,_gainDist,_lossDist);
	178	}
	179	//norm_factor = normalizeQ(_spVVec, _gainDist, _lossDist);
	180	break;
	181	case (C_evalParamVV::rateAlpha) : updateRateAlpha(param,_spVVec,_gainDist,_lossDist); break;
	182	case (C_evalParamVV::rateProbInvariant) : updateRateProbInvariant(param,_spVVec,_gainDist,_lossDist); break;
	183	case (C_evalParamVV::theta) : updateTheta(param,_spVVec,_gainDist,_lossDist); break;
	184	}
	185	sumPijQij = normalizeQ(_spVVec, _gainDist, _lossDist);
	186
	187	if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist);
	188	MDOUBLE res = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weights,_unObservableData_p);
	189	normVec(sumPijQij,_spVVec, _gainDist, _lossDist); // reverse the normalization after likelihood computation.
	190	LOG(5,<<"with val= "<<param<<" which_param:: "<<_which_param<<" L="<<res<<endl);
	191	return -res;
	192	}
	193	};
	194
	195
	196	#endif

+48

-0

programs/gainLoss/rate4Triad.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "rate4Triad.h"
	17	#include "errorMsg.h"
	18	#include "logFile.h"
	19	#include "gainLossOptions.h"
	20
	21	using namespace std;
	22
	23	/********************************************************************************************
	24	rate4Triad
	25	*********************************************************************************************/
	26	rate4Triad::rate4Triad(const stochasticProcess* sp, const Vdouble& exp01V, const Vdouble& exp10V):
	27	_sp(sp), _exp01V(exp01V), _exp10V(exp10V)
	28	{
	29	if(!(_rateV.size()%3==0)){
	30	errorMsg::reportError("the length of the rates vector is not 'Triaded'");
	31	}
	32	}
	33
	34	/********************************************************************************************
	35	*********************************************************************************************/
	36	//void rate4Triad::computePosteriorExpectationOfChangePerTriad(){
	37	// LOGnOUT(4,<<"Starting calculePosteriorExpectationOfChange for Triad..."<<endl);
	38	//
	39	// ofstream posteriorExpectationStreamTriad(gainLossOptions::_outFilePosteriorExpectationOfChangeTriad.c_str());
	40	// posteriorExpectationStreamTriad<<"POS"<<"\t"<<"000-001"<<"\t"<<"000-010"<<endl;
	41	//
	42	//
	43	// // printOut the final results
	44	// for (int pos = 0; pos <_sc.seqLen(); pos+=3){
	45	// posteriorExpectationStreamTriad<<pos+1<<"\t"<<expV01[pos]<<"\t"<<expV10[pos]<<endl;
	46	// }
	47	//}

+46

-0

programs/gainLoss/rate4Triad.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___RATE_4_TRIAD
	19	#define ___RATE_4_TRIAD
	20
	21	#include "definitions.h"
	22
	23
	24
	25	class rate4Triad {
	26	public:
	27	explicit rate4Triad(const stochasticProcess* sp, const Vdouble& exp01V, const Vdouble& exp10V);
	28	virtual ~rate4Triad(){};
	29
	30	//void rate4Triad::computePosteriorExpectationOfChangePerTriad();
	31
	32
	33
	34
	35
	36
	37	private:
	38	const stochasticProcess* _sp;
	39	//const Vdouble &_rateV;
	40	const Vdouble &_exp01V;
	41	const Vdouble &_exp10V;
	42
	43	};
	44
	45	#endif

+120

-0

programs/gainLoss/rate4site.gainLossFunctions.txt less more

	0	/********************************************************************************************
	1	*********************************************************************************************/
	2	void rate4siteGL::printRatesML(ostream& out, const Vdouble & rate2print) {
	3	out<<"#Rates were calculated using Maximum Likelihood"<<endl;
	4	out<<"#SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	5	out<<"#SCORE: The conservation scores. lower value = higher conservation."<<endl;
	6	out<<"#MSA DATA: The number of aligned sequences having an amino acid (non-gapped) from the overall number of sequences at each position."<<endl;
	7	out<<endl;
	8	out<<"========================================================================================================================================================="<<endl;
	9	out<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	10	out<<"========================================================================================================================================================="<<endl;
	11
	12	#ifdef unix
	13	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	14	out<<pos+1<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<<setprecision(7)<<rate2print[pos]<<"\t";
	15	out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	16	}
	17	#else
	18	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	19	out<<left<<pos+1<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	20	out<<left<<setprecision(7)<<fixed<<rate2print[pos]<<"\t";
	21	out<<right<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	22	}
	23	#endif
	24	}
	25	/********************************************************************************************
	26	*********************************************************************************************/
	27	void rate4siteGL::printRatesBayes(ostream& out, const Vdouble & rate2print) {
	28	out<<"# Rates were calculated using the expectation of the posterior rate distribution"<<endl;
	29	out<<"# Prior distribution is Gamma with "<<gainLossOptions::_numberOfRateCategories<<" discrete categories"<<endl;
	30	out<<"# SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	31	out<<"# SCORE: The conservation scores. lower value = higher conservation."<<endl;
	32	out<<"# QQ-INTERVAL: the confidence interval for the rate estimates. The default interval is 25-75 percentiles"<<endl;
	33	out<<"# STD: the standard deviation of the posterior rate distribution."<<endl;
	34	out<<"# MSA DATA: The number of aligned sequences having an amino acid (non-gapped) from the overall number of sequences at each position."<<endl;
	35	MDOUBLE AlphaRate = getRateAlpha(_sp->distr());
	36	//if(dynamic_cast<gammaDistribution*>(_sp->distr()) ) {
	37	// AlphaRate = static_cast<gammaDistribution*>(_sp->distr())->getAlpha();
	38	//}
	39	//if(dynamic_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())){
	40	// AlphaRate = static_cast<generalGammaDistributionPlusInvariant*>(_sp->distr())->getAlpha();
	41	//}
	42	//if(dynamic_cast<gammaDistributionFixedCategories*>(_sp->distr())){
	43	// AlphaRate = static_cast<gammaDistributionFixedCategories*>(_sp->distr())->getAlpha();
	44	//}
	45	out<<"# The alpha parameter "<<AlphaRate<<endl;
	46	int k=0;
	47	while (k < _sp->categories()){
	48	out<<"# sp.rates(j) j= " <<k<<"\t"<<_sp->rates(k)<<"\t"<<_sp->ratesProb(k)<<endl;
	49	k++;
	50	}
	51
	52
	53	out<<endl;
	54	out<<"========================================================================================================================================================="<<endl;
	55	out<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"QQ-INTERVAL"<<"\t"<<"STD"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	56	out<<"========================================================================================================================================================="<<endl;
	57
	58	#ifdef unix
	59	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	60	out<<pos+1<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<<setprecision(7)<<rate2print[pos]<<"\t";
	61	out<<"["<<setprecision(4)<<_BayesianLowerBound[pos]<<","<<setprecision(4)<<_BayesianUpperBound[pos]<<"]"<<"\t";
	62	out<<setprecision(4)<<_BayesianSTD[pos]<<"\t";
	63	out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	64	}
	65	#else
	66	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	67	out<<left<<pos+1;
	68	out<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	69	out<<left<<setprecision(7)<<fixed<<rate2print[pos]<<"\t";
	70	out<<right<<"["<<setprecision(4)<<left<<_BayesianLowerBound[pos]<<","<<setprecision(4)<<right<<_BayesianUpperBound[pos]<<"]"<<"\t";
	71	out<<right<<setprecision(4)<<_BayesianSTD[pos];
	72	out<<right<<"\t"<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	73	}
	74	#endif
	75	}
	76	/********************************************************************************************
	77	*********************************************************************************************/
	78	void rate4siteGL::printAveAndStd(ostream& out) {
	79	out<<"#Average = "<<_ave<<endl;
	80	out<<"#Standard Deviation = "<<_std<<endl;
	81	}
	82	/********************************************************************************************
	83	computeAveAndStd
	84	*********************************************************************************************/
	85	void rate4siteGL::computeAveAndStd(){
	86	MDOUBLE sum = 0;
	87	MDOUBLE sumSqr=0.0;
	88	for (int i=0; i < _sc.seqLen(); ++i) {
	89	sum+=_rate[i];
	90	sumSqr+=(_rate[i]*_rate[i]);
	91	}
	92	_ave = sum/_sc.seqLen();
	93	_std= sumSqr-(sum*sum/_sc.seqLen());
	94	_std /= (_sc.seqLen()-1.0);
	95	_std = sqrt(_std);
	96	if (((_ave<1e-9)) && (_ave>(-(1e-9)))) _ave=0;
	97	if ((_std>(1-(1e-9))) && (_std< (1.0+(1e-9)))) _std=1.0;
	98	}
	99	/********************************************************************************************
	100	normalizeRates
	101	*********************************************************************************************/
	102	void rate4siteGL::normalizeRates() {
	103	int i=0;
	104	if (_std==0) errorMsg::reportError(" std = 0 in function normalizeRates",1);
	105	_normalizedRates.resize(_sc.seqLen(),0.0);
	106	for (i=0;i<_normalizedRates.size();++i) {
	107	_normalizedRates[i]=(_rate[i]-_ave)/_std;
	108	}
	109
	110	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::ebExp) {
	111	for (int k=0; k < _sc.seqLen(); ++k) {
	112	_BayesianUpperBound[k] = (_BayesianUpperBound[k] - _ave)/_std;
	113	_BayesianLowerBound[k] = (_BayesianLowerBound[k] - _ave)/_std;
	114	_BayesianSTD[k] = (_BayesianSTD[k])/_std;
	115	}
	116	}
	117	_ave = 0.0;
	118	_std = 1.0;
	119	}

+260

-0

programs/gainLoss/rate4siteGL.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "rate4siteGL.h"
	17	#include "gainLossUtils.h"
	18	#include "gainLossAlphabet.h"
	19	#include <cstring>
	20	/********************************************************************************************
	21	gainLoss4site
	22	*********************************************************************************************/
	23	rate4siteGL::rate4siteGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p):
	24	_tr(tr),_sp(sp),_sc(sc),_outDir(outDir),_unObservableData_p(unObservableData_p)
	25
	26	//init:
	27	{
	28	fillReferenceSequence();
	29	_alphaConf = 0.05;
	30	}
	31
	32
	33	rate4siteGL& rate4siteGL::operator=(const rate4siteGL &other){
	34	if (this != &other) { // Check for self-assignment
	35	}
	36	return *this;
	37	}
	38
	39
	40	/********************************************************************************************
	41	*********************************************************************************************/
	42	void rate4siteGL::run()
	43	{
	44	LOGnOUT(4,<<"Running rate4site..."<<endl);
	45	computeRate4site();
	46	computeAveAndStd(); // put them in ave, and std
	47	normalizeRates(); // change also the ave, the std the quantiles, etc.
	48	}
	49
	50	/********************************************************************************************
	51	*********************************************************************************************/
	52	void rate4siteGL::printRates()
	53	{
	54	string r4sNonNorm = _outDir + "//" + "rate4siteOrig.txt";
	55	ofstream nonNormalizedOutStream(r4sNonNorm.c_str());
	56	nonNormalizedOutStream.precision(PRECISION);//PRECISION
	57	printRates(nonNormalizedOutStream,_rates);
	58	nonNormalizedOutStream.close();
	59	}
	60	/********************************************************************************************
	61	*********************************************************************************************/
	62	void rate4siteGL::printRatesNormalized()
	63	{
	64	string r4sNorm = _outDir + "//" + "rate4site.txt";
	65	ofstream normalizedOutStream(r4sNorm.c_str());
	66	normalizedOutStream.precision(PRECISION);
	67	normalizedOutStream<<"# Rate values were normalized to Z score (mean rate=0, +/-1 rat= +/-standard error)"<<endl;
	68	printRates(normalizedOutStream,_normalizedRates);
	69	normalizedOutStream.close();
	70	}
	71
	72
	73	/********************************************************************************************
	74	computeRate4site
	75	*********************************************************************************************/
	76	Vdouble rate4siteGL::computeRate4site()
	77	{
	78	time_t t1;
	79	time(&t1);
	80	time_t t2;
	81
	82	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::ebExp) {
	83	LOGnOUT (4,<<"perform computeEB_EXP_siteSpecificRate... while computing posteriorProb PerCategory PerPosition"<<endl);
	84	_postProbPerCatPerPos.resize(_sp->categories());
	85	for (int rateIndex=0 ; rateIndex<_sp->categories(); ++rateIndex){
	86	_postProbPerCatPerPos[rateIndex].resize(_sc.seqLen());
	87	}
	88	computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_postProbPerCatPerPos,_unObservableData_p);
	89	}
	90	else if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) {
	91	LOGnOUT (4,<<"perform computeML_siteSpecificRate with maxRate= "<<gainLossOptions::_maxRateForML<<endl);
	92	computeML_siteSpecificRate(_rates,_Lrate,_sc, *_sp,_tr, gainLossOptions::_maxRateForML);
	93	}
	94	else
	95	errorMsg::reportError("non such method for rate inference, in function void rate4site::computeRate4site()");
	96
	97	time(&t2);
	98	LOGnOUT(4,<<"computeRate4site RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	99	return _rates;
	100	}
	101
	102	/********************************************************************************************
	103	printRates
	104	*********************************************************************************************/
	105	void rate4siteGL::printRates(ostream & out, const Vdouble & rate2print) {
	106
	107	if (gainLossOptions::_rateDistributionType == gainLossOptions::GAMMA_MIXTURE){
	108	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_sp->distr());
	109	pMixture->printParams(out);
	110	}
	111	switch (gainLossOptions::_rateEstimationMethod){
	112	case (gainLossOptions::ebExp):
	113	printRatesBayes(out,rate2print);
	114	break;
	115	case (gainLossOptions::mlRate):
	116	printRatesML(out,rate2print);
	117	break;
	118	}
	119	printAveAndStd(out);
	120	}
	121
	122	/********************************************************************************************
	123	*********************************************************************************************/
	124	void rate4siteGL::printRatesML(ostream& out, const Vdouble & rate2print) {
	125	out<<"#Rates were calculated using Maximum Likelihood"<<endl;
	126	out<<"#SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	127	out<<"#SCORE: The conservation scores. lower value = higher conservation."<<endl;
	128	out<<"#MSA DATA: The number of aligned sequences having a character from the overall number of sequences at each position."<<endl;
	129	out<<endl;
	130	out<<"========================================================================================================================================================="<<endl;
	131	out<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	132	out<<"========================================================================================================================================================="<<endl;
	133
	134	#ifdef unix
	135	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	136	out<<pos+1<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<<setprecision(7)<<rate2print[pos]<<"\t";
	137	out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	138	}
	139	#else
	140	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	141	out<<left<<pos+1<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	142	out<<left<<setprecision(7)<<fixed<<rate2print[pos]<<"\t";
	143	out<<right<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()<<endl; // note position start from 1.
	144	}
	145	#endif
	146	}
	147	/********************************************************************************************
	148	*********************************************************************************************/
	149	void rate4siteGL::printRatesBayes(ostream& out, const Vdouble & rate2print) {
	150	int precisionHigh = 5;
	151	int precisionLow = 3;
	152
	153	out<<"# Rates were calculated using the expectation of the posterior rate distribution"<<endl;
	154	out<<"# Prior distribution is Gamma with "<<gainLossOptions::_numberOfRateCategories<<" discrete categories"<<endl;
	155	//out<<"# SEQ: The presence(1) or Absence(0) in the reference sequence."<<"Displayed on sequence "<<_refSeq->name()<<endl;
	156	//out<<"# SCORE: The conservation scores. lower value = higher conservation."<<endl;
	157	//out<<"# QQ-INTERVAL: the confidence interval for the rate estimates. The default interval is 25-75 percentiles"<<endl;
	158	//out<<"# STD: the standard deviation of the posterior rate distribution."<<endl;
	159	//out<<"# MSA DATA: The number of aligned sequences having an amino acid (non-gapped) from the overall number of sequences at each position."<<endl;
	160	MDOUBLE AlphaRate = getRateAlpha(_sp->distr());
	161	out<<"# The alpha parameter "<<AlphaRate<<endl;
	162	int k=0;
	163	while (k < _sp->categories()){
	164	out<<"# sp.rates(j) j= " <<k<<"\t"<<_sp->rates(k)<<"\t"<<_sp->ratesProb(k)<<endl;
	165	k++;
	166	}
	167
	168	out<<endl;
	169	out<<"========================================================================================================================================================="<<endl;
	170	//out<<"# POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"QQ-INTERVAL"<<"\t"<<"STD"<<"\t"<<"MSA DATA"<<endl; // note position start from 1.
	171	out<<"# POS"<<"\t"<<"RATE"<<endl; // note position start from 1.
	172	//out<<"========================================================================================================================================================="<<endl;
	173
	174
	175	for (int pos=0; pos < _sc.seqLen(); ++pos) {
	176	out<<pos+1<<"\t"<<rate2print[pos]<<endl;
	177	}
	178
	179	//#ifdef unix
	180	// for (int pos=0; pos < _sc.seqLen(); ++pos) {
	181	// out<<pos+1<<"\t"
	182	// //<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"
	183	// out<<setprecision(precisionHigh)<<rate2print[pos]<<"\t";
	184	// //out<<"["<<setprecision(precisionLow)<<_BayesianLowerBound[pos]<<","<<setprecision(precisionLow)<<_BayesianUpperBound[pos]<<"]"<<"\t";
	185	// //out<<setprecision(precisionLow)<<_BayesianSTD[pos]<<"\t";
	186	// //out<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()
	187	// out<<endl; // note position start from 1.
	188	// }
	189	//#else
	190	// for (int pos=0; pos < _sc.seqLen(); ++pos) {
	191	// out<<left<<pos+1;
	192	// //out<<left<<"\t"<<_refSeq->getAlphabet()->fromInt((*_refSeq)[pos])<<"\t";
	193	// out<<left<<setprecision(precisionHigh)<<fixed<<rate2print[pos]<<"\t";
	194	// //out<<right<<"["<<setprecision(precisionLow)<<left<<_BayesianLowerBound[pos]<<","<<setprecision(precisionLow)<<right<<_BayesianUpperBound[pos]<<"]"<<"\t";
	195	// //out<<right<<setprecision(precisionLow)<<_BayesianSTD[pos];
	196	// //out<<right<<"\t"<<_sc.numberOfSequencesWithoutGaps(pos)<<"/"<<_sc.numberOfSeqs()
	197	// out<<endl; // note position start from 1.
	198	// }
	199	//#endif
	200	}
	201	/********************************************************************************************
	202	*********************************************************************************************/
	203	void rate4siteGL::printAveAndStd(ostream& out) {
	204	out<<"# Average = "<<_ave<<endl;
	205	out<<"# Standard Deviation = "<<_std<<endl;
	206	}
	207	/********************************************************************************************
	208	computeAveAndStd
	209	*********************************************************************************************/
	210	void rate4siteGL::computeAveAndStd(){
	211	MDOUBLE sum = 0;
	212	MDOUBLE sumSqr=0.0;
	213	for (int i=0; i < _sc.seqLen(); ++i) {
	214	sum+=_rates[i];
	215	sumSqr+=(_rates[i]*_rates[i]);
	216	}
	217	_ave = sum/_sc.seqLen();
	218	_std= sumSqr-(sum*sum/_sc.seqLen());
	219	_std /= (_sc.seqLen()-1.0);
	220	_std = sqrt(_std);
	221	if (((_ave<1e-9)) && (_ave>(-(1e-9)))) _ave=0;
	222	if ((_std>(1-(1e-9))) && (_std< (1.0+(1e-9)))) _std=1.0;
	223	}
	224	/********************************************************************************************
	225	normalizeRates
	226	*********************************************************************************************/
	227	void rate4siteGL::normalizeRates() {
	228	int i=0;
	229	if (_std==0){
	230	LOGnOUT(4,<<"ERROR:\n std = 0 in function normalizeRates\n");
	231	}
	232	_normalizedRates.resize(_sc.seqLen(),0.0);
	233	for (i=0;i<_normalizedRates.size();++i) {
	234	_normalizedRates[i]=(_rates[i]-_ave)/_std;
	235	}
	236
	237	if (gainLossOptions::_rateEstimationMethod == gainLossOptions::ebExp) {
	238	for (int k=0; k < _sc.seqLen(); ++k) {
	239	_BayesianUpperBound[k] = (_BayesianUpperBound[k] - _ave)/_std;
	240	_BayesianLowerBound[k] = (_BayesianLowerBound[k] - _ave)/_std;
	241	_BayesianSTD[k] = (_BayesianSTD[k])/_std;
	242	}
	243	}
	244	//_ave = 0.0;
	245	//_std = 1.0;
	246	}
	247
	248	/********************************************************************************************
	249	normalizeRates
	250	*********************************************************************************************/
	251	void rate4siteGL::fillReferenceSequence(){
	252	if (strcmp(gainLossOptions::_referenceSeq.c_str(),"non")==0) {
	253	_refSeq = &(_sc[0]);
	254	}
	255	else {
	256	int id1 = _sc.getId(gainLossOptions::_referenceSeq,true);
	257	_refSeq = (&_sc[id1]);
	258	}
	259	}

+78

-0

programs/gainLoss/rate4siteGL.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___RATE_4_site___GL
	19	#define ___RATE_4_site___GL
	20
	21	#include "definitions.h"
	22	#include "replacementModel.h"
	23	#include "gainLoss.h"
	24	#include "unObservableData.h"
	25
	26	/********************************************************************************************
	27	rate4siteGL
	28	*********************************************************************************************/
	29	class rate4siteGL{
	30	public:
	31	explicit rate4siteGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p);
	32
	33	rate4siteGL(const rate4siteGL& other) {*this = other;}
	34	rate4siteGL& operator=(const rate4siteGL &other);
	35	virtual ~rate4siteGL() {;}
	36	void run();
	37	VVdouble getLpostPerCat() {return _postProbPerCatPerPos;}
	38	Vdouble getRates() {return _rates;}
	39	Vdouble getNormalizedRates() {return _normalizedRates;}
	40
	41	void printRatesNormalized();
	42	void printRates();
	43
	44	protected:
	45	//func
	46	Vdouble computeRate4site();
	47	void computeAveAndStd();
	48	void normalizeRates();
	49	void printRates(ostream & out, const Vdouble & rate2print);
	50	void printRatesML(ostream& out, const Vdouble & rate2print);
	51	void printRatesBayes(ostream& out, const Vdouble & rate2print);
	52	void printAveAndStd(ostream& out= cout);
	53	void fillReferenceSequence();
	54
	55	protected:
	56	//members
	57	stochasticProcess *_sp;
	58	tree _tr;
	59	sequenceContainer _sc;
	60	sequence* _refSeq; // the reference sequence
	61	string _outDir;
	62
	63	Vdouble _rates;// the rates themselves
	64	Vdouble _Lrate;// the log likelihood of each position
	65	VVdouble _postProbPerCatPerPos; // the posterior probability for each category for each site
	66	Vdouble _normalizedRates; // the rates when their ave = 0 and std = 1.
	67	MDOUBLE _ave; // the average over all rates.
	68	MDOUBLE _std; // the std over all rates.
	69	Vdouble _BayesianSTD;// the std of the Bayesian rates
	70	Vdouble _BayesianLowerBound;// lower bound of rate in Bayesian inference
	71	Vdouble _BayesianUpperBound;// upper bound of rate in Bayesian inference
	72	MDOUBLE _alphaConf; // the alpha confidence interval of Bayesian rates (set to 0.5). interval - rates that are in the 95% area under the curve.
	73	unObservableData* _unObservableData_p; //
	74	};
	75
	76
	77	#endif

+46

-0

programs/gainLoss/runPerlReadSimResAllModels.PerBranchAndPerPos.andMP.TarRm.Bash less more

	0	#!/bin/bash
	1
	2
	3	Dir="/groups/pupko/cohenofi/HGT/runCOG.iTOL.38.3915/Batch.V9.93.PostExpSim.FewMPcostRatios.test"
	4	c="4"
	5	s=38
	6	g=3915
	7
	8
	9	for i in 1 2 3 4 5 6 7 8 9 10
	10	do
	11
	12
	13	for d in GL$c GL$c.MP1params GL$c.MP2params GL$c.MP3params GL$c.FixedParams GL$c.FixedParams
	14	do
	15
	16	for r in 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5
	17	do
	18
	19	echo "Perl readPostExpSimulation $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i MP with cost $r"
	20	perl /groups/pupko/cohenofi/pupkoSVN/trunk/scripts/gainLoss/readPostExpSimulation.pl $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/ T $s $g $r
	21	cp $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/outRes.txt $Dir/AcountForMissingData.$d/RESULTS/
	22	mv $Dir/AcountForMissingData.$d/RESULTS/outRes.txt $Dir/AcountForMissingData.$d/RESULTS/outResMP$i.$r.txt
	23
	24	done
	25
	26
	27	echo "Perl readPostExpSimulation $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i"
	28	perl /groups/pupko/cohenofi/pupkoSVN/trunk/scripts/gainLoss/readPostExpSimulation.pl $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/
	29	cp $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/outRes.txt $Dir/AcountForMissingData.$d/RESULTS/
	30	mv $Dir/AcountForMissingData.$d/RESULTS/outRes.txt $Dir/AcountForMissingData.$d/RESULTS/outRes$i.txt
	31
	32	echo "Perl readPostExpSimulationPerPos $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i"
	33	perl /groups/pupko/cohenofi/pupkoSVN/trunk/scripts/gainLoss/readPostExpSimulationPerPos.pl $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/
	34	cp $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i/outResPerPos.txt $Dir/AcountForMissingData.$d/RESULTS/
	35	mv $Dir/AcountForMissingData.$d/RESULTS/outResPerPos.txt $Dir/AcountForMissingData.$d/RESULTS/outResPerPos$i.txt
	36
	37	echo "tar $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i"
	38	tar -cf $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i.tar $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i
	39	echo "rm $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i"
	40	rm -R $Dir/AcountForMissingData.$d/RESULTS/SimulatedPostExp$i
	41
	42
	43	done
	44
	45	done

+445

-0

programs/gainLoss/sankoffReconstructGL.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "sankoffReconstructGL.h"
	17	#include "threeStateAlphabet.h"
	18	#include "treeIt.h"
	19	#include "matrixUtils.h"
	20	#include "sequence.h"
	21	#include "someUtil.h"
	22	#include "recognizeFormat.h"
	23	#include "seqContainerTreeMap.h"
	24	#include "treeUtil.h"
	25	#include "amino.h"
	26	#include "nucleotide.h"
	27	#include "integerAlphabet.h"
	28	#include "logFile.h"
	29	#include "gainLossOptions.h"
	30
	31	sankoffReconstructGL::sankoffReconstructGL(sequenceContainer& sc, tree& tr, string& outDir, MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent):
	32	_tr(tr),_sc(sc),_outDir(outDir),_costMatrixGainLossRatio(costMatrixGainLossRatio),_distanceFromRootForRecent(distanceFromRootForRecent)
	33	{
	34	initialize();
	35	//myLog::setLog(MPoptions::_logfile, 5);
	36	run();
	37	}
	38
	39	sankoffReconstructGL::~sankoffReconstructGL(){
	40	//if (_alph)
	41	// delete _alph;
	42	}
	43
	44	/********************************************************************************************
	45	initialize
	46	*********************************************************************************************/
	47	void sankoffReconstructGL::initialize() {
	48	//string paramStr = argv[1];
	49	//MPoptions::initOptions(paramStr);
	50	//startTree();
	51	//startSequenceContainer();
	52	_states.resize(_tr.getNodesNum(),-1000);
	53	_gainMPPerPos.resize(_sc.seqLen());
	54	_lossMPPerPos.resize(_sc.seqLen());
	55	resizeVVV(_sc.seqLen(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerPos);
	56	resizeVVV(_tr.getNodesNum(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerBranch);
	57	resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerPosPerNode);
	58	startCostMatrix();
	59	_costOfTree = 0.0;
	60	_numOfGains = 0;
	61	_numOfLosses = 0;
	62	}
	63
	64	/********************************************************************************************
	65	More functions
	66	*********************************************************************************************/
	67	//void sankoffReconstructGL::startTree(){
	68	// tree t(MPoptions::_treefile);
	69	// _tr = t;
	70	// if (!(MPoptions::_rootAt =="")){
	71	// tree::nodeP myroot = _tr.findNodeByName(MPoptions::_rootAt); //returns NULL if not found
	72	// if (myroot){
	73	// _tr.rootAt(myroot);
	74	// }
	75	// else {
	76	// errorMsg::reportError("Requested root name is not found");
	77	// }
	78	// }
	79	// else {
	80	// LOGnOUT(5,<<"Default rooting used, root name is "<<_tr.getRoot()->name()<<endl);
	81	// }
	82	// LOGnOUT(5,<<"sons of root are "<<endl);
	83	// for (int son=0; son<_tr.getRoot()->getNumberOfSons();++son){
	84	// LOGnOUT(5,<<_tr.getRoot()->getSon(son)->name()<<endl);
	85	// }
	86	//}
	87
	88	/********************************************************************************************
	89	*********************************************************************************************/
	90	//void sankoffReconstructGL::startSequenceContainer(){
	91	// switch (MPoptions::_alphabetType) {
	92	// case (MPoptions::amino):
	93	// _alph = new amino; break;
	94	// case (MPoptions::nuc):
	95	// _alph = new nucleotide; break;
	96	// case (MPoptions::integer):
	97	// _alph = new integerAlphabet(MPoptions::_alphabetSize); break;
	98	// case (MPoptions::threeState): default:
	99	// _alph = new threeStateAlphabet; break;
	100	//
	101	//
	102	// }
	103	// string strFile = MPoptions::_seqfile;
	104	// ifstream in(strFile.c_str());
	105	// _sc = recognizeFormat::read(in,_alph);
	106	// _states.resize(_tr.getNodesNum(),-1000);
	107	// checkThatNamesInTreeAreSameAsNamesInSequenceContainer(_tr,_sc);
	108	//
	109	//}
	110
	111	/********************************************************************************************
	112	*********************************************************************************************/
	113	void sankoffReconstructGL::startCostMatrix(){
	114	switch (gainLossOptions::_costMatrixType) {
	115	case (gainLossOptions::file):
	116	// if specified an input cost matrix:
	117	if (gainLossOptions::_costMatrixfile != "") {
	118	readMatrixFromFile(_costMatrix,gainLossOptions::_costMatrixfile);
	119	if (_costMatrix.size() != _sc.alphabetSize()) {
	120	errorMsg::reportError("error in sankoff::startCostMatrix, the cost matrix must be the same size as the alphabet");
	121	}
	122	} else
	123	errorMsg::reportError("error in sankoff::startCostMatrix, the cost matrix file is not specified after the -mf flag");
	124	break;
	125	case (gainLossOptions::diff) :
	126	resizeMatrix(_costMatrix,_sc.alphabetSize(),_sc.alphabetSize());
	127	for (int row=0; row<_costMatrix.size(); ++row){
	128	for (int col=0; col<_costMatrix.size(); ++col){
	129	_costMatrix[row][col]=(row-col);
	130	}
	131	}
	132	break;
	133	case (gainLossOptions::diffSquare) :
	134	resizeMatrix(_costMatrix,_sc.alphabetSize(),_sc.alphabetSize());
	135	for (int row=0; row<_costMatrix.size(); ++row){
	136	for (int col=0; col<_costMatrix.size(); ++col){
	137	_costMatrix[row][col]=(row-col)*(row-col);
	138	}
	139	}
	140	break;
	141	case (gainLossOptions::gainLossCost) :
	142	resizeMatrix(_costMatrix,_sc.alphabetSize(),_sc.alphabetSize());
	143	for (int row=0; row<_costMatrix.size(); ++row){
	144	for (int col=0; col<_costMatrix.size(); ++col){
	145	MDOUBLE cost = (row==col? 0: (row<col?_costMatrixGainLossRatio:1)); //gain 2(or other set value), loss 1
	146	_costMatrix[row][col]=cost;
	147	}
	148	}
	149	break;
	150	case (gainLossOptions::fitch) : default:
	151	//default: equal cost matrix (Fitch)
	152	resizeMatrix(_costMatrix,_sc.alphabetSize(),_sc.alphabetSize());
	153	for (int row=0; row<_costMatrix.size(); ++row){
	154	for (int col=0; col<_costMatrix.size(); ++col){
	155	_costMatrix[row][col]=(row==col?0.0:1.0);
	156	}
	157	}
	158	break;
	159	}
	160	}
	161
	162
	163	/********************************************************************************************
	164	*********************************************************************************************/
	165	void sankoffReconstructGL::run(){
	166	//ofstream oStream((MPoptions::_outfile).c_str());
	167	//oStream<<"Maximum parsimony reconstruction"<<endl;
	168	//oStream<<"For each position, the reconstructed tree is presetned with reconstructed data as BP at each node, followed by a matrix specifying the number of each transition found"<<endl<<endl;
	169	//oStream.close();
	170	LOGnOUT(4,<<" MaxParsimony with costMatrix - gainLossRatio 1:"<<_costMatrixGainLossRatio<<endl);
	171
	172	string MPprints = _outDir + "//" + "MPprints." + double2string(_costMatrixGainLossRatio)+ ".txt";
	173	ofstream MPprintsStream(MPprints.c_str());
	174	MPprintsStream<<"# Various MP prints: "<<endl;
	175
	176	string gainLossMPPerPosPerBranch = _outDir + "//" + "gainLossMP." + double2string(_costMatrixGainLossRatio)+ ".PerPosPerBranch.txt";
	177	ofstream gainLossMPPerPosPerBranchStream(gainLossMPPerPosPerBranch.c_str());
	178	gainLossMPPerPosPerBranchStream<<"# print with MP based on the cost matrix: "<<endl;
	179
	180	// per pos
	181	string gainLossMPPerPos = _outDir + "//" + "gainLossMP." + double2string(_costMatrixGainLossRatio)+ ".PerPos.txt";
	182	ofstream gainLossMPPerPosStream(gainLossMPPerPos.c_str());
	183	gainLossMPPerPosStream<<"# print with MP based on the cost matrix: "<<endl;
	184
	185	// per branch
	186	string gainLossMPPerBranch = _outDir + "//" + "gainLossMP." + double2string(_costMatrixGainLossRatio)+ ".PerBranch.txt";
	187	ofstream gainLossMPPerBranchStream(gainLossMPPerBranch.c_str());
	188	gainLossMPPerBranchStream<<"# print with MP based on the cost matrix: "<<endl;
	189
	190	// state per node, Sankoff reconstruction
	191	string gainLossMPAncestralReconstruct = _outDir + "//" + "gainLossMP." + double2string(_costMatrixGainLossRatio)+ ".AncestralReconstructSankoff.txt";
	192	ofstream gainLossMPAncestralReconstructStream(gainLossMPAncestralReconstruct.c_str());
	193	gainLossMPAncestralReconstructStream<<"# print with MP based on the cost matrix: "<<endl;
	194
	195	for (int row=0; row<_costMatrix.size(); ++row){
	196	for (int col=0; col<_costMatrix.size(); ++col){
	197	MPprintsStream<<"# "<<row<<"->"<<col<<" ="<<_costMatrix[row][col]<<endl;
	198	gainLossMPPerPosPerBranchStream<<"# "<<row<<"->"<<col<<" ="<<_costMatrix[row][col]<<endl;
	199	gainLossMPPerPosStream<<"# "<<row<<"->"<<col<<" ="<<_costMatrix[row][col]<<endl;
	200	gainLossMPPerBranchStream<<"# "<<row<<"->"<<col<<" ="<<_costMatrix[row][col]<<endl;
	201	gainLossMPAncestralReconstructStream<<"# "<<row<<"->"<<col<<" ="<<_costMatrix[row][col]<<endl;
	202	}
	203	}
	204
	205	gainLossMPPerPosPerBranchStream<<"G/L"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2Root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"probability"<<"\t"<<"expectation"<<endl;
	206	gainLossMPAncestralReconstructStream<<"POS"<<"\t"<<"Node"<<"\t"<<"State"<<endl;
	207	for (int pos = 0 ; pos<_sc.seqLen(); ++pos) {
	208	LOGnOUT(7,<<"Running position "<<pos+1<<endl<<"=========================="<<endl);
	209	_costOfTree += runPosition(pos, gainLossMPPerPosPerBranchStream, MPprintsStream,gainLossMPAncestralReconstructStream);
	210	}
	211	LOGnOUT(4,<<"Cost of tree is "<<_costOfTree<<" (with "<<_numOfGains+_numOfLosses<<" events)"<<endl);
	212	LOGnOUT(4,<<" Gain="<<_numOfGains<<endl);
	213	LOGnOUT(4,<<" Losses="<<_numOfLosses<<endl);
	214
	215	// Per Branch
	216	printMPPerBranch(gainLossMPPerBranchStream);
	217
	218	// Per Pos
	219	printMPPerPos(gainLossMPPerPosStream);
	220
	221	}
	222
	223	/********************************************************************************************
	224	*********************************************************************************************/
	225	// transitionTypeCount is printed to the outfile only if the _costMatrixType != diffSquare or diff
	226	// totalCosts is printed to the outfile only if _costMatrixType == diffSquare or diff
	227	MDOUBLE sankoffReconstructGL::runPosition(int pos, ofstream& gainLossMPPerPosPerBranchStream, ofstream& MPprints, ofstream& gainLossMPAncestralReconstructStream){
	228	// intialize _states veactor with values of leaves
	229	seqContainerTreeMap scTreeMap(_sc,_tr);
	230	vector <tree::nodeP> leaves;
	231	_tr.getAllLeaves(leaves,_tr.getRoot());
	232	for (int i=0; i< leaves.size();i++){
	233	int myleafId = (leaves[i])->id();
	234	int mySeqId = scTreeMap.seqIdOfNodeI(myleafId);
	235	_states[myleafId] = _sc[mySeqId][pos];
	236	}
	237	VVdouble upcosts;
	238	vector <VVint> backtrack;
	239	traverseUpMP(upcosts, backtrack);
	240	//ofstream oStream((MPoptions::_outfile).c_str(),ios::app);
	241	//oStream<<"======================================"<<endl;
	242	//oStream<<"POSITION "<<pos<<endl;
	243	//oStream<<"======================================"<<endl;
	244	VVint transitionTypeCount;
	245	VVdouble totalCosts;
	246
	247	MDOUBLE costoftree = traverseDownMP(upcosts, backtrack, transitionTypeCount,totalCosts);
	248	Vstring data;
	249	preparePrintData(data);
	250	//printDataOnTreeAsBPValues(oStream,data,_tr);
	251	//oStream<<endl<<"Cost of tree is "<<costoftree<<endl<<"Transition type count:"<<endl;
	252	LOGnOUT(7,<<"Cost of position "<< pos+1 <<" is "<<costoftree<<endl<<endl);
	253
	254
	255	treeIterTopDownConst tIt(_tr);
	256	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	257	gainLossMPAncestralReconstructStream<<pos+1<<"\t"<<mynode->name()<<"\t"<<_states[mynode->id()]<<endl;
	258	if(mynode->isRoot())
	259	continue;
	260	int stateAtNode = _states[mynode->id()];
	261	int stateAtFather = _states[mynode->father()->id()];
	262	if(stateAtNode > stateAtFather){
	263	gainLossMPPerPosPerBranchStream<<"gain"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<"1"<<"\t"<<stateAtNode-stateAtFather<<endl;
	264	_gainMPPerPos[pos]++;
	265	_MPPerPos[pos][0][1]++;
	266	_MPPerBranch[mynode->id()][0][1]++;
	267	_MPPerPosPerNode[pos][mynode->id()][0][1]++;
	268	_numOfGains++;
	269	}
	270	if(stateAtNode < stateAtFather){
	271	gainLossMPPerPosPerBranchStream<<"loss"<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<"1"<<"\t"<<-(stateAtNode-stateAtFather)<<endl;
	272	_lossMPPerPos[pos]++;
	273	_MPPerPos[pos][1][0]++;
	274	_MPPerBranch[mynode->id()][1][0]++;
	275	_MPPerPosPerNode[pos][mynode->id()][1][0]++;
	276	_numOfLosses++;
	277	}
	278	}
	279
	280	if ((gainLossOptions::_costMatrixType != gainLossOptions::diffSquare) &&
	281	(gainLossOptions::_costMatrixType != gainLossOptions::diff) )
	282	{
	283	for (int i = 0; i < transitionTypeCount.size(); i++) {
	284	for (int j = 0; j < transitionTypeCount[i].size(); j++) {
	285	MPprints<<transitionTypeCount[i][j]<<" ";
	286	}
	287	MPprints<<endl;
	288	}
	289	} else {
	290	for (int i=0; i< totalCosts.size();++i) {
	291	MPprints << "node " << i ;
	292	if (_tr.findNodeById(i)->isLeaf())
	293	MPprints << " (leaf)" ;
	294	if (_tr.findNodeById(i)->isRoot())
	295	MPprints << " (root)" ;
	296	MPprints <<" :" << endl ;
	297	for (int j=0; j < _costMatrix.size();++j)
	298	MPprints<< totalCosts[i][j] << " ";
	299	MPprints << endl;
	300	}
	301	}
	302	return costoftree;
	303	}
	304
	305	/********************************************************************************************
	306	*********************************************************************************************/
	307	void sankoffReconstructGL::traverseUpMP(VVdouble &upCosts, vector <VVint> &backtrack) {
	308	// upCosts[i][j] i for node, j for size of cost matrix
	309	// backtrack[i][j][k] remembers the state for which a min was obtained for node i, state j, from both sons (k=0 and k=1)
	310	int i;
	311	gainLossAlphabet alph;
	312	upCosts.resize(_tr.getNodesNum());
	313	for (i = 0; i < upCosts.size(); i++)
	314	upCosts[i].resize(_costMatrix.size(),0.0);
	315	backtrack.resize(_tr.getNodesNum());
	316	for (i = 0; i < backtrack.size(); i++) {
	317	backtrack[i].resize(_costMatrix.size());
	318	}
	319
	320	// fill upCosts, starting with leafs (0,Inf) according to the observed character
	321	treeIterDownTopConst tIt(_tr);
	322	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	323	if (mynode->isLeaf()) {
	324	for (int j = 0; j < _costMatrix.size(); j++) {
	325	upCosts[mynode->id()][j] = ( (_states[mynode->id()] == j \|\| _states[mynode->id()] == alph.unknown() ) ? 0 : VERYBIG);
	326	}
	327	}
	328	else {
	329	for (int k = 0; k < _costMatrix.size(); k++) { // this loop fills each cell in the vector for node mynode
	330	for (int son=0; son<mynode->getNumberOfSons(); ++son) { // go over all sons
	331	MDOUBLE minSon = VERYBIG;
	332	int argMinSon=-1; // for backtrack
	333	int idSon = (mynode->getSon(son))->id();
	334
	335	//for (int l = _costMatrix.size()-1; l >= 0; l--) { // loop to find the min, 1 is preferred
	336	for (int l = 0; l < _costMatrix.size(); l++) { // loop to find the min, 0 is preferred
	337	MDOUBLE sumSon = upCosts[idSon][l]+_costMatrix[k][l];
	338	if ( sumSon < minSon) {
	339	minSon = sumSon;
	340	argMinSon = l;
	341	}
	342	}
	343	if ((argMinSon==-1) \|\| (minSon==VERYBIG)){
	344	errorMsg::reportError("Error in sankoff::traverseUpMP, unknown reason");
	345	}
	346
	347	upCosts[mynode->id()][k]+=minSon;
	348	backtrack[mynode->id()][k].push_back(argMinSon);
	349	}
	350	}
	351	}
	352	}
	353	}
	354
	355	/********************************************************************************************
	356	*********************************************************************************************/
	357	// totalCosts is only filled for _costMatrixType==diffSquare or diff
	358	MDOUBLE sankoffReconstructGL::traverseDownMP(VVdouble &upCosts, vector <VVint> &backtrack,
	359	VVint &transitionTypeCount,VVdouble &totalCosts) {
	360	if (upCosts.size() == 0)
	361	errorMsg::reportError("error in sankoff::traverseDownMP, input vector upCosts must be filled (call traverseUpMP() first)");
	362	if (backtrack.size() == 0)
	363	errorMsg::reportError("error in sankoff::traverseDownMP, input vector backtrack must be filled (call traverseUpMP() first)");
	364	int sizeOfCosts = upCosts[0].size();
	365	totalCosts.resize(_tr.getNodesNum());
	366	for (int i = 0; i < totalCosts.size(); i++) {
	367	totalCosts[i].resize(_costMatrix.size(),0.0);
	368	}
	369
	370	MDOUBLE costOfTree = 0;
	371	int stateOfRoot;
	372	findMinInVector(upCosts[(_tr.getRoot())->id()], costOfTree, stateOfRoot); // first, reconstruct Root
	373	_states[(_tr.getRoot())->id()] = stateOfRoot;
	374
	375	transitionTypeCount.resize(sizeOfCosts);
	376	for (int i = 0; i < transitionTypeCount.size(); i++)
	377	transitionTypeCount[i].resize(sizeOfCosts,0);
	378
	379	treeIterTopDownConst tIt(_tr);
	380	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	381	if (mynode->isLeaf()) continue;
	382	int myId = mynode->id();
	383	for (int j=0; j<mynode->getNumberOfSons(); ++j) {
	384	int idSon = (mynode->getSon(j))->id();
	385	_states[idSon] = backtrack[myId][_states[myId]][j];
	386	transitionTypeCount[_states[myId]][_states[idSon]]++;
	387	if ((gainLossOptions::_costMatrixType == gainLossOptions::diffSquare) \|\|
	388	(gainLossOptions::_costMatrixType == gainLossOptions::diff)){
	389	for (int z=0; z <_costMatrix.size(); ++z) // go over all the states
	390	totalCosts[idSon][z] = upCosts[idSon][z] + _costMatrix[_states[myId]][z];
	391	}
	392	}
	393	// fill totalCosts of the root
	394	if (mynode->isRoot()) {
	395	if ((gainLossOptions::_costMatrixType == gainLossOptions::diffSquare) \|\|
	396	(gainLossOptions::_costMatrixType == gainLossOptions::diff)){
	397	for (int z=0; z <_costMatrix.size(); ++z) // go over all the states
	398	totalCosts[myId][z] = upCosts[myId][z];
	399	}
	400	}
	401	}
	402	return costOfTree;
	403	}
	404
	405	/********************************************************************************************
	406	*********************************************************************************************/
	407	//prepares the data to be printed as BP data on the tree
	408	void sankoffReconstructGL::preparePrintData(Vstring &data){
	409	data.resize(_tr.getNodesNum());
	410	for (int i=0; i< data.size(); ++i) {
	411	data[i] = double2string(_states[i]);
	412	data[i]+="[";
	413	data[i]+=_tr.findNodeById(i)->name();
	414	data[i]+="]";
	415	}
	416	}
	417
	418
	419	/********************************************************************************************
	420	*********************************************************************************************/
	421	void sankoffReconstructGL::printMPPerBranch(ostream& out)
	422	{
	423	treeIterTopDownConst tIt(_tr);
	424	out<<"# MP Gain and Loss counts"<<"\n";
	425	out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"exp01"<<"\t"<<"exp10"<<endl;
	426	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
	427	out<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<mynode->getDistance2ROOT()<<"\t"<<mynode->getMinimalDistance2OTU()<<"\t"<<mynode->getMinimalNumOfNodes2OTU()<<"\t"<<_MPPerBranch[mynode->id()][0][1]<<"\t"<<_MPPerBranch[mynode->id()][1][0]<<endl;
	428	}
	429	}
	430
	431
	432	/********************************************************************************************
	433	printProbExp()
	434	print perPos (over all branches)
	435	use the members _expV01, _expV10 for basic
	436	*********************************************************************************************/
	437	void sankoffReconstructGL::printMPPerPos(ostream& out)
	438	{
	439	out<<"POS"<<"\t"<<"MP01"<<"\t"<<"MP10"<<endl;
	440	for (int pos = 0; pos <_sc.seqLen(); ++pos){
	441	out<<pos+1<<"\t"<<_MPPerPos[pos][0][1]<<"\t"<<_MPPerPos[pos][1][0]<<endl;
	442	}
	443	}
	444

+89

-0

programs/gainLoss/sankoffReconstructGL.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___SANKOFF__GL__H
	19	#define ___SANKOFF__GL__H
	20
	21
	22	#include "tree.h"
	23	#include "logFile.h"
	24	#include "someUtil.h"
	25	#include "definitions.h"
	26	#include "stochasticProcess.h"
	27	#include "sequenceContainer.h"
	28	#include "gainLossUtils.h"
	29	#include <map>
	30
	31
	32	class sankoffReconstructGL {
	33
	34	public:
	35	explicit sankoffReconstructGL(sequenceContainer& sc, tree& tr, string& outDir, MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent);
	36	virtual ~sankoffReconstructGL() ;
	37	void traverseUpMP(VVdouble &upCosts, vector <VVint> &backtrack); // input as empty vector to be filled
	38	MDOUBLE traverseDownMP(VVdouble &upCosts, vector <VVint> &backtrack, VVint &transitionTypeCount, VVdouble &totalCosts); // input as already filled vector
	39	Vdouble getGainMPPerPos(){return _gainMPPerPos;}
	40	Vdouble getLossMPPerPos(){return _lossMPPerPos;}
	41	VVVdouble getMPPerPos(){return _MPPerPos;}
	42	VVVdouble getMPPerBranch(){return _MPPerBranch;}
	43	VVVVdouble getMPPerPosPerNode(){return _MPPerPosPerNode;}
	44	int getNumOfGainEvnetsMP(){return _numOfGains;}
	45	int getNumOfLossEvnetsMP(){return _numOfLosses;}
	46
	47
	48
	49	private:
	50	void initialize();
	51	void run();
	52	void startTree();
	53	void startSequenceContainer();
	54	void startCostMatrix();
	55	MDOUBLE runPosition(int pos, ofstream& gainLossMPPerPosPerBranchStream, ofstream& MPprints, ofstream& gainLossMPAncestralReconstructStream);
	56	void preparePrintData(Vstring &data);//prepares the data to be printed as BP data on the tree
	57
	58	void printMPPerBranch(ostream& out);
	59	void printMPPerPos(ostream& out);
	60
	61
	62	public:
	63
	64
	65	private:
	66	VVdouble _costMatrix;
	67	Vint _states; // the vector with the states of the leaves, to be filled with reconstructed states
	68	alphabet * _alph;
	69	tree _tr;
	70	sequenceContainer _sc;
	71	MDOUBLE _costOfTree;
	72	int _numOfGains;
	73	int _numOfLosses;
	74
	75	Vdouble _lossMPPerPos;
	76	Vdouble _gainMPPerPos;
	77	VVVdouble _MPPerPos;
	78	VVVdouble _MPPerBranch;
	79	VVVVdouble _MPPerPosPerNode;
	80
	81
	82	MDOUBLE _distanceFromRootForRecent;
	83	MDOUBLE _costMatrixGainLossRatio;
	84	string _outDir;
	85	};
	86
	87
	88	#endif

+140

-0

programs/gainLoss/simulateChangesAlongTree.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "simulateChangesAlongTree.h"
	17	#include "talRandom.h"
	18	#include "matrixUtils.h"
	19	#include "gainLoss.h"
	20
	21	#include <algorithm>
	22
	23
	24	simulateChangesAlongTree::simulateChangesAlongTree(const tree& inTree, const stochasticProcess& sp, alphabet* pAlph)
	25	: _tree(inTree), _sp(sp), _pAlph(pAlph)
	26	{
	27	}
	28
	29	simulateChangesAlongTree::~simulateChangesAlongTree()
	30	{
	31	}
	32
	33
	34
	35
	36	void simulateChangesAlongTree::init()
	37	{
	38	//init the vector of waiting times.
	39	_waitingTimeParams.clear();
	40	_waitingTimeParams.resize(_pAlph->size());
	41	int i, j;
	42	for (i = 0; i < _pAlph->size(); ++i)
	43	{
	44	_waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
	45
	46	}
	47
	48	//init _jumpProbs.
	49	//_jumpProbs[i][j] = Q[i][j] / -Q[i][i]
	50	_jumpProbs.clear();
	51	_jumpProbs.resize(_pAlph->size());
	52	for (i = 0; i < _pAlph->size(); ++i)
	53	{
	54	MDOUBLE sum = 0.0;
	55	_jumpProbs[i].resize(_pAlph->size());
	56	for (j = 0; j < _pAlph->size(); ++j)
	57	{
	58	if (i == j)
	59	_jumpProbs[i][j] = 0.0;
	60	else
	61	{
	62	_jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
	63	}
	64	sum += _jumpProbs[i][j];
	65	}
	66	if (! DEQUAL(sum, 1.0)){
	67	string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to ";
	68	err+=double2string(sum);
	69	errorMsg::reportError(err);
	70	}
	71	}
	72	int nodesNum = _tree.getNodesNum();
	73	_changesOccurred.clear();
	74	_changesOccurred.resize(nodesNum);
	75	for (int i=0; i<nodesNum; ++i)
	76	resizeMatrix(_changesOccurred[i], _pAlph->size(), _pAlph->size());
	77	_nodesContent.clear();
	78	_nodesContent.resize(nodesNum, 0);
	79	}
	80
	81	sequenceContainer simulateChangesAlongTree::simulatePosition(){
	82	init();
	83	Vdouble freqs(_pAlph->size(),0.0);
	84	for (int i = 0; i< freqs.size(); ++i)
	85	freqs[i]=_sp.freq(i);
	86	int rootState = giveRandomState(_pAlph->size(), freqs);
	87	//int rootState = giveRandomState(_pAlph, freqs);
	88
	89	_nodesContent[_tree.getRoot()->id()] = rootState;
	90	simulateOnce(_tree.getRoot(),0,rootState,0);
	91	simulateOnce(_tree.getRoot(),0,rootState,1);
	92	if (_tree.getRoot()->getNumberOfSons() > 2)
	93	simulateOnce(_tree.getRoot(),0,rootState,2);
	94	return _sc;
	95	}
	96
	97	void simulateChangesAlongTree::simulateOnce(tree::nodeP curNode,
	98	MDOUBLE disFromNode,
	99	int previousContent, int whichSon){
	100	tree::nodeP sonNode = curNode->getSon(whichSon);
	101	MDOUBLE avgWaitingTime = 1.0 / _waitingTimeParams[previousContent];
	102	MDOUBLE timeTillChange = talRandom::rand_exp(avgWaitingTime);
	103	disFromNode += timeTillChange;
	104	//int nextContent = giveRandomState(_pAlph, previousContent, _jumpProbs);
	105	int nextContent = giveRandomState(_pAlph->size(), previousContent, _jumpProbs);
	106
	107	while (disFromNode < sonNode->dis2father()) {
	108	_changesOccurred[sonNode->id()][previousContent][nextContent]++;
	109	previousContent=nextContent;
	110	MDOUBLE avgWaitingTime = 1.0 / _waitingTimeParams[previousContent];
	111	MDOUBLE timeTillChange = talRandom::rand_exp(avgWaitingTime);
	112	disFromNode += timeTillChange;
	113	//nextContent = giveRandomState(_pAlph, nextContent, _jumpProbs);
	114	nextContent = giveRandomState(_pAlph->size(), nextContent, _jumpProbs);
	115
	116	}
	117	while (disFromNode >= sonNode->dis2father()) {
	118	_nodesContent[sonNode->id()] = previousContent;
	119	if (sonNode->isLeaf()) {
	120	//string name = "leaf_" + int2string(sonNode->id()) + "_" + sonNode->name();
	121	string name = sonNode->name();
	122	sequence seq(int2string(previousContent),name, "", sonNode->id(), _pAlph);
	123	_sc.add(seq);
	124	return;
	125	}
	126	simulateOnce(sonNode, 0, previousContent, 1);
	127	disFromNode-=sonNode->dis2father();
	128	curNode = sonNode;
	129	sonNode = curNode->getSon(0);
	130	}
	131	_changesOccurred[sonNode->id()][previousContent][nextContent]++;
	132	simulateOnce(curNode, disFromNode, nextContent, 0);
	133	}
	134
	135	VVint simulateChangesAlongTree::getChangesForBranch(int nodeID){
	136	if (nodeID>_changesOccurred.size())
	137	errorMsg::reportError("error in simulateChangesAlongTree::getChangesForBranch, nodeID doesn't exist");
	138	return _changesOccurred[nodeID];
	139	}⏎

+69

-0

programs/gainLoss/simulateChangesAlongTree.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___SIMULATE_CHANGES__
	19	#define ___SIMULATE_CHANGES__
	20
	21	#include "definitions.h"
	22	#include "tree.h"
	23	#include "stochasticProcess.h"
	24	#include "alphabet.h"
	25	#include "sequenceContainer.h"
	26
	27	#include <map>
	28	#include <vector>
	29	using namespace std;
	30
	31	/******************************************************************
	32	This class simulates jumps (events) along a
	33	given tree, with the aim of creating a dataset (seqeunceContainer)
	34	in which we know the exact number of transitions along the tree
	35	*******************************************************************/
	36
	37	class simulateChangesAlongTree {
	38	public:
	39	simulateChangesAlongTree(const tree& inTree, const stochasticProcess& sp, alphabet* pAlph);
	40	virtual ~simulateChangesAlongTree();
	41	sequenceContainer simulatePosition();
	42	VVint getChangesForBranch(int nodeID);
	43	int getNodeContent(int nodeId) {return _nodesContent[nodeId];}
	44	void removeAllSequnces(){
	45	_sc.removeAll();
	46	};
	47
	48	private:
	49	void init();
	50	void simulateOnce(tree::nodeP curNode, MDOUBLE disFromNode, int previousContent, int whichSon = 0);
	51
	52
	53	private:
	54	tree _tree;
	55	stochasticProcess _sp;
	56	alphabet* _pAlph;
	57
	58	Vdouble _waitingTimeParams;//each entry is the lambda parameter of the exponential distribution modeling the waiting time for "getting out" of state i
	59	//_jumpProbs[i][j] is the probability of jumping from state i to state j (given that a change has ocured).
	60	VVdouble _jumpProbs;
	61
	62	VVVint _changesOccurred; // number of times changes from i to j occurred , for each branch
	63	Vint _nodesContent; // the actual state at each node, retrieval according to node id
	64	sequenceContainer _sc;
	65
	66	};
	67
	68	#endif

+316

-0

programs/gainLoss/simulateOnePos.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16	#include "definitions.h"
	17	#include "simulateOnePos.h"
	18	#include "tree.h"
	19	#include "stochasticProcess.h"
	20	#include "alphabet.h"
	21	#include "simulateTree.h"
	22	#include "threeStateAlphabet.h"
	23	#include "recognizeFormat.h"
	24	#include "evaluateCharacterFreq.h"
	25	#include "trivialAccelerator.h"
	26	#include "uniDistribution.h"
	27	#include "sequence.h"
	28	#include "simulateChangesAlongTree.h"
	29	#include "treeIt.h"
	30	#include "fastaFormat.h"
	31	#include "gainLoss.h"
	32
	33	#include <fstream>
	34	#include <string>
	35	using namespace std;
	36
	37	/********************************************************************************************
	38	*********************************************************************************************/
	39	simulateOnePos::simulateOnePos(string simSeqFile, ostream* resFile, ostream* simulatedEvents, int simNum, string treeFile
	40	, MDOUBLE sumGainLoss, MDOUBLE theta
	41	, bool is3states, stochasticProcess* sp, tree* pTree
	42	, Vdouble* init_cpN_vals, Vdouble* freq_cpN)
	43	: _pAlph(NULL),_simulateNullModel(false),_simNum(simNum),_theta(theta),_sumGainLoss(sumGainLoss)
	44	,_is3states(is3states), _init_cpN_vals(init_cpN_vals),_freq_cpN(freq_cpN),_simulatedEvents(simulatedEvents),_resFile(resFile) {
	45	if(pTree!=NULL)
	46	init(pTree);
	47	else
	48	init(treeFile);
	49
	50
	51	if (_simulateNullModel)
	52	simulateOnePos_cpN_Model(simSeqFile);
	53	else
	54	simulateOnePosLGT(sp,simSeqFile);
	55	}
	56	/********************************************************************************************
	57	*********************************************************************************************/
	58	simulateOnePos::~simulateOnePos()
	59	{
	60	if (_sp)
	61	delete _sp;
	62	if (_pAlph)
	63	delete _pAlph;
	64	//if (_out)
	65	// delete _out;
	66	//if (_res)
	67	// delete _res;
	68	//if (_outTree)
	69	// delete _outTree;
	70	}
	71
	72	/********************************************************************************************
	73	*********************************************************************************************/
	74	void simulateOnePos::init(string strTree)
	75	{
	76	_tree = tree(strTree);
	77	if (!(_rootAt =="")){
	78	tree::nodeP myroot = _tree.findNodeByName(_rootAt); //returns NULL if not found
	79	if (myroot){
	80	_tree.rootAt(myroot);
	81	//*_res<<"# tree rooted at "<<myroot->name()<<" id, "<<myroot->id()<<endl;
	82	*_simulatedEvents<<"# tree rooted at "<<myroot->name()<<" id, "<<myroot->id()<<endl;
	83	}
	84	else {
	85	//*_res<<"# default rooting used "<<endl;
	86	*_simulatedEvents<<"# default rooting used "<<endl;
	87	}
	88	}
	89	if(_is3states)
	90	_pAlph = new threeStateAlphabet();
	91	else
	92	_pAlph = new gainLossAlphabet();
	93	_alphVecDist.resize(_pAlph->size());
	94	_changesOccurred.resize(_tree.getNodesNum());
	95	for (int i=0; i<_tree.getNodesNum(); ++i)
	96	resizeMatrix(_changesOccurred[i], _pAlph->size(), _pAlph->size());
	97
	98	}
	99	/********************************************************************************************
	100	*********************************************************************************************/
	101	void simulateOnePos::init(tree* pTree)
	102	{
	103	_tree = *pTree;
	104	if(_is3states)
	105	_pAlph = new threeStateAlphabet();
	106	else
	107	_pAlph = new gainLossAlphabet();
	108	_alphVecDist.resize(_pAlph->size());
	109	_changesOccurred.resize(_tree.getNodesNum());
	110	for (int i=0; i<_tree.getNodesNum(); ++i)
	111	resizeMatrix(_changesOccurred[i], _pAlph->size(), _pAlph->size());
	112
	113	}
	114
	115	/********************************************************************************************
	116	*********************************************************************************************/
	117	void simulateOnePos::simulateOnePos_cpN_Model(string strOutFile) {
	118	Vdouble freq(2,0.0);/// FILL IN!!!
	119	freq[0]= 0.6;
	120	freq[1]= 0.4;
	121
	122	MDOUBLE init_gain = 0.0; // No HGT
	123	MDOUBLE init_loss = 3.23;
	124	bool _isHGT_normal_Pij = true;
	125	bool _isHGT_with_Q = true;
	126	//gainLossModel glm(init_gain,freq,_isHGT_normal_Pij,_isHGT_with_Q);
	127	gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q);
	128	trivialAccelerator pijAcc(&glm);
	129	uniDistribution uniDistr;
	130	_sp = new stochasticProcess(&uniDistr,&pijAcc,false);
	131
	132	// simulate:
	133	simulateTree st1(_tree, *_sp, _pAlph);
	134	Vdouble rates(1,1.0);
	135	st1.generate_seqWithRateVector(rates,1);
	136
	137	_sc = st1.toSeqDataWithoutInternalNodes();
	138	ofstream seq_sim(strOutFile.c_str());
	139	seq_sim.precision(PRECISION);
	140	fastaFormat::write(seq_sim,_sc);
	141	seq_sim.close();
	142	}
	143
	144	/********************************************************************************************
	145	*********************************************************************************************/
	146	void simulateOnePos::simulateOnePosLGT(stochasticProcess* sp, string strOutFile)
	147	{
	148	if(!sp){
	149	if(_is3states){
	150	Vdouble init_cpN_vals(4);
	151	if(_init_cpN_vals){
	152	init_cpN_vals = *_init_cpN_vals;
	153	}
	154	else{
	155	init_cpN_vals[0]=0.25; //gain (0->1)
	156	init_cpN_vals[1]=1; //more (1->more)
	157	init_cpN_vals[2]=1; // less (more->1)
	158	init_cpN_vals[3]=0.5; // loss (1->0)
	159	}
	160	if(_simNum==0)// printed once only
	161	LOGnOUT(3,<<"Rate values: gain (0->1)="<<init_cpN_vals[0]<<" more (1->more)="<<init_cpN_vals[1]
	162	<<" less (more->1)="<<init_cpN_vals[2]<<" loss (1->0)="<<init_cpN_vals[3]<<"\n");
	163	Vdouble freq_cpN(3);
	164	if(_freq_cpN)
	165	freq_cpN = *_freq_cpN;
	166	else{
	167	freq_cpN[0]=0.5;
	168	freq_cpN[1]=0.2;
	169	freq_cpN[2]=1 - (freq_cpN[0] + freq_cpN[1]);
	170	}
	171	bool useMarkovLimiting = false;
	172	if(_simNum==0){
	173	LOGnOUT(3,<<"Freq values: 0="<<freq_cpN[0]<<" 1="<<freq_cpN[1]<<" more="<<freq_cpN[2]<<" loss (1->0)="<<init_cpN_vals[3]<<"\n");
	174	LOGnOUT(3,<<"Freq useMarkovLimiting="<<useMarkovLimiting<<"\n");}
	175
	176	oneTwoMoreModel glm_cpN(init_cpN_vals[0],init_cpN_vals[1],
	177	init_cpN_vals[2],init_cpN_vals[3],freq_cpN,useMarkovLimiting);
	178	trivialAccelerator pijAcc_cpN(&glm_cpN);
	179	uniDistribution uniDistr_cpN;
	180	bool isRevers = false;
	181	_sp = new stochasticProcess(&uniDistr_cpN,&pijAcc_cpN,false);
	182	MDOUBLE sumQii=(static_cast<oneTwoMoreModel*>(_sp->getPijAccelerator()->getReplacementModel()))->sumPijQij();
	183	(static_cast<oneTwoMoreModel*>(_sp->getPijAccelerator()->getReplacementModel()))->norm(1/sumQii);
	184	//cout<<" sumQii before norm="<<sumQii<<"\n";
	185	}
	186	else{
	187	// frequencies taken as estimated from the stationary distribution of the stochastic process
	188	LOGnOUT(3,<<"ERROR: simulateOnePosLGT with no stochastic process. Use constant default parameters\n");
	189	Vdouble freq(2,0.0);
	190	freq[0]= 0.6;
	191	freq[1]= 0.4;
	192	MDOUBLE init_gain = 0.942; // taken from original runs of COG data under the lgt model
	193	MDOUBLE init_loss = 5.23;
	194	bool _isHGT_normal_Pij = true;
	195	bool _isHGT_with_Q = true;
	196	//gainLossModel glm(init_gain,freq,_isHGT_normal_Pij,_isHGT_with_Q);
	197	gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q);
	198	trivialAccelerator pijAcc(&glm);
	199	uniDistribution uniDistr;
	200	_sp = new stochasticProcess(&uniDistr,&pijAcc,false);
	201	MDOUBLE sumQii = 1.0;
	202	sumQii = normalizeQ(_sp);
	203	}
	204	}
	205	else{
	206	_sp = sp->clone();
	207	}
	208
	209	simulateChangesAlongTree sim(_tree,*_sp,_pAlph);
	210	_sc = sim.simulatePosition();
	211	_alphVecDist = _sc.getAlphabetDistribution();
	212	bool isFinishOneRun = false;
	213	do{
	214	if(isFinishOneRun)
	215	LOGnOUT(6,<<"The number of 1s simulated "<< _alphVecDist[1]<<" was less than "<<gainLossOptions::_minNumOfOnes<<"\n");
	216	for(int alph = 0 ; alph<_pAlph->size() ;++alph){
	217	LOGnOUT(6,<<_alphVecDist[alph]<<" ");
	218	}
	219	LOGnOUT(6,<<"\n");
	220	sim.removeAllSequnces();
	221	_sc = sim.simulatePosition();
	222	_alphVecDist = _sc.getAlphabetDistribution();
	223	isFinishOneRun = true;
	224	}
	225	while(_alphVecDist[1]< gainLossOptions::_minNumOfOnes);
	226	_occurFraction = (float)_alphVecDist[1]/(float)_sc.numberOfSeqs();
	227
	228	ofstream seq_sim(strOutFile.c_str());
	229	seq_sim.precision(PRECISION);
	230	fastaFormat::write(seq_sim,_sc);
	231	seq_sim.close();
	232
	233
	234
	235	treeIterTopDownConst tit(_tree);
	236	int totalNumChangesInTree = 0;
	237	//*_res<<"# print values by simulations "<<endl;
	238	//*_res<<"G/L"<<"\t"<<"SIM"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"sumGainLoss"<<"\t"<<"rootFq"<<"\t"<<"occur"<<"\t"<<"events"<<endl;
	239	if(_simNum+1==1){ // print only at first position
	240	*_simulatedEvents<<"# print values by simulations "<<endl;
	241	*_simulatedEvents<<"G/L"<<"\t"<<"SIM"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"sumGainLoss"<<"\t"<<"rootFq"<<"\t"<<"occur"<<"\t"<<"events"<<endl;
	242	*_resFile<<"branch"<<"\t"<<"positions"<<"\t"<<"state"<<endl;
	243	}
	244	for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) {
	245	*_resFile<<myN->name()<<"\t"<<_simNum+1<<"\t"<< sim.getNodeContent(myN->id())<<endl;
	246	if(myN->isRoot())
	247	continue;
	248	VVint changesInNode = sim.getChangesForBranch(myN->id());
	249	_changesOccurred[myN->id()] = changesInNode;
	250	//*_res<<"Node id="<<myN->id()<<" name="<<myN->name()<< " content=" << sim.getNodeContent(myN->id()) << endl;
	251	for (int i=0; i<changesInNode.size(); ++i) {
	252	for (int j=0; j<changesInNode.size(); ++j) {
	253	totalNumChangesInTree+=changesInNode[i][j];
	254	//if(changesInNode[i][j]>0) // DEBUG
	255	// cout<<"total number of changes: "<<totalNumChangesInTree<<" "<<myN->name()<<" "<<i<<" "<<j<<"\n";
	256
	257	//*_res<<changesInNode[i][j]<<" ";
	258	if((i==0)&&(j==1)&&(changesInNode[i][j]>0)){
	259	//*_res<<"gain"<<"\t"<<_simNum+1<<"\t"<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()
	260	// <<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()
	261	// <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<occur<<"\t"<<changesInNode[i][j]<<endl;
	262	*_simulatedEvents<<"gain"<<"\t"<<_simNum+1<<"\t"<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()
	263	<<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()
	264	<<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<<changesInNode[i][j]<<endl;
	265	}
	266	if((i==1)&&(j==0)&&(changesInNode[i][j]>0)){ //NOTE: in both gain and loss use changesInNode[i][j] for event indication
	267	//*_res<<"loss"<<"\t"<<_simNum+1<<"\t"<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()
	268	// <<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()
	269	// <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<<changesInNode[i][j]<<endl;
	270	*_simulatedEvents<<"loss"<<"\t"<<_simNum+1<<"\t"<<myN->name()<<"\t"<<myN->dis2father()<<"\t"<<myN->getDistance2ROOT()
	271	<<"\t"<<myN->getMinimalDistance2OTU()<<"\t"<<myN->getMinimalNumOfNodes2OTU()
	272	<<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<<changesInNode[i][j]<<endl;
	273	}
	274	}
	275	//*_res<<endl;
	276	}
	277
	278	//*_res<<"TOTAL Number of changes along the tree were "<<totalNumChangesInTree<<endl;
	279	}
	280	//printTreeWithNodeIdBPStyle(*_outTree); // WARN - the removal of this print was not tested
	281	}
	282
	283	// copied from the original covarion code , from the file checkov.cpp
	284	void simulateOnePos::printTreeWithNodeIdBPStyle(ostream &out) const{
	285	recursivePrintTree(out,_tree.getRoot());
	286	out<<";";
	287	}
	288
	289	// similar to the file checkov.cpp from the original covarion code
	290	// The bootstrap values is the nodes id.
	291	void simulateOnePos::recursivePrintTree(ostream &out,const tree::nodeP &myNode) const {
	292	if (myNode->isLeaf()) {
	293	out << myNode->name() << "_" << myNode->id();
	294	out << ":"<< myNode->dis2father();
	295	return;
	296	} else {
	297	out <<"(";
	298	for (int i=0;i<myNode->getNumberOfSons();++i) {
	299	if (i>0) out <<",";
	300	recursivePrintTree(out, myNode->getSon(i));
	301	}
	302	out <<")";
	303	if (myNode->isRoot()==false) {
	304	out<<":"<< myNode->dis2father();
	305	out << "["<<myNode->id()<<"]";
	306	}
	307	}
	308	}
	309
	310
	311	VVint simulateOnePos::getChangesForBranch(int nodeID){
	312	if (nodeID>_changesOccurred.size())
	313	errorMsg::reportError("error in simulateChangesAlongTree::getChangesForBranch, nodeID doesn't exist");
	314	return _changesOccurred[nodeID];
	315	}

+83

-0

programs/gainLoss/simulateOnePos.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___SIMULATE_1POS__
	19	#define ___SIMULATE_1POS__
	20
	21	#include "definitions.h"
	22	#include "tree.h"
	23	#include "stochasticProcess.h"
	24	#include "sequenceContainer.h"
	25	#include "alphabet.h"
	26	#include "threeStateModel.h"
	27	#include "oneTwoMoreModel.h"
	28
	29
	30	using namespace std;
	31
	32	/******************************************************************
	33	Simulate one position using the 3stateLGT stochastic process
	34	*******************************************************************/
	35	class simulateOnePos{
	36	public:
	37	//simulateOnePos();
	38	simulateOnePos(string simSeqFile, ostream* resFile, ostream* simulatedEvents, int simNum, string treeFile
	39	, MDOUBLE sumGainLoss, MDOUBLE theta
	40	, bool is3states=false, stochasticProcess* sp=NULL, tree* pTree=NULL
	41	, Vdouble* init_cpN_vals=NULL, Vdouble* freq_cpN=NULL);
	42	virtual ~simulateOnePos();
	43
	44	VVint getChangesForBranch(int nodeID);
	45	sequenceContainer getSequenceContainer(){return _sc;};
	46	MDOUBLE getOccurFraction(){return _occurFraction;};
	47
	48
	49	private:
	50	void init(string strTree);
	51	void init(tree* pTree);
	52	void simulateOnePosLGT(stochasticProcess* sp, string strOutFile);
	53
	54	void simulateOnePos_cpN_Model(string strOutFile);
	55	void printTreeWithNodeIdBPStyle(ostream &out) const;
	56	void recursivePrintTree(ostream &out,const tree::nodeP &myNode) const;
	57
	58	private:
	59	tree _tree;
	60	stochasticProcess *_sp;
	61	sequenceContainer _sc; // as simulated
	62	int _simNum;
	63	MDOUBLE _sumGainLoss;
	64	MDOUBLE _theta;
	65	MDOUBLE _occurFraction;
	66	alphabet* _pAlph;
	67	vector<int> _alphVecDist;
	68	ostream *_simulatedEvents;
	69	ostream *_resFile;
	70
	71	bool _simulateNullModel;
	72	bool _is3states;
	73	Vdouble* _init_cpN_vals;
	74	Vdouble* _freq_cpN;
	75
	76	string _rootAt;
	77	VVVint _changesOccurred; // number of times changes from i to j occurred , for each branch
	78	};
	79
	80
	81	#endif
	82

+215

-0

programs/gainLoss/siteSpecificGL.cpp less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include "siteSpecificGL.h"
	18	#include "definitions.h"
	19	#include "numRec.h"
	20	#include "matrixUtils.h"
	21	#include "seqContainerTreeMap.h"
	22	#include "gainLossUtils.h"
	23	#include "gainLossModel.h"
	24	#include "gainLossOptions.h"
	25
	26
	27
	28	// THE BAYESIAN EB_EXP PART OF gain and loss ESTIMATION. //
	29
	30	/*************************************
	31	This function computes the expectation of
	32	the posterior gain and loss distribution for a specific site
	33	as well as the confidence interval
	34	*************************************/
	35	// per all sites computation
	36	void computeEB_EXP_siteSpecificGL(Vdouble & GainLossV,
	37	Vdouble & stdV,
	38	Vdouble & lowerBoundV,
	39	Vdouble & upperBoundV,
	40	VVdouble & posteriorsV,
	41	const sequenceContainer& sc,
	42	const vector<vector<stochasticProcess*> >& spVVec,
	43	const tree& tr,
	44	const distribution * gainDist,
	45	const distribution * lossDist,
	46	const distribution * distPrim,
	47	const MDOUBLE alphaConf,
	48	VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos]
	49	unObservableData* unObservableData_p)
	50	{
	51	LOG(5,<<"Calculating posterior and expectation of posterior values for all sites"<<endl);
	52	int seqLen = sc.seqLen();
	53	GainLossV.resize(seqLen);
	54	stdV.resize(seqLen);
	55	lowerBoundV.resize(seqLen);
	56	upperBoundV.resize(seqLen);
	57	int numOfSPs = gainDist->categories()*lossDist->categories();
	58	resizeMatrix(posteriorsV,seqLen,numOfSPs);
	59	//computePijGam cpg;
	60	//cpg._V.resize(numOfSPs);
	61	//for (int i=0; i < numOfSPs; ++i) {
	62	// int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories());
	63	// int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories());
	64	// cpg._V[i].fillPij(tr,*spVVec[gainIndex][lossIndex]);
	65	//}
	66	for (int pos=0; pos < sc.seqLen(); ++pos) {
	67	computeEB_EXP_siteSpecificGL(pos, sc, spVVec, tr, gainDist,lossDist,distPrim,posteriorsV[pos], //cpg
	68	GainLossV[pos], stdV[pos], lowerBoundV[pos], upperBoundV[pos], alphaConf, postProbPerSpPerCatPerPos,unObservableData_p);
	69	}
	70	}
	71
	72
	73	/********************************************************************************************
	74	*********************************************************************************************/
	75	void computeEB_EXP_siteSpecificGL(int pos,
	76	const sequenceContainer& sc,
	77	const vector<vector<stochasticProcess*> >& spVVec,
	78	//const computePijGam& cpg,
	79	const tree &tr,
	80	const distribution * gainDist,
	81	const distribution * lossDist,
	82	const distribution * distPrim,
	83	Vdouble & posteriorV,
	84	MDOUBLE& GainLossExpectation,
	85	MDOUBLE & stdGainLoss,
	86	MDOUBLE & lowerConf,
	87	MDOUBLE & upperConf,
	88	const MDOUBLE alphaConf,
	89	VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos]
	90	unObservableData* unObservableData_p) // alpha of 0.05 is considered 0.025 for each side.
	91	{
	92	bool isLpostPerSpPerCatComputed =false;
	93	if(postProbPerSpPerCatPerPos[0][0][pos]>0)
	94	isLpostPerSpPerCatComputed =true;
	95
	96
	97	// here we compute the posterior P(r\|data)
	98	int numOfRateCat = (*spVVec[0][0]).categories(); // ver2
	99	int numOfSPs = gainDist->categories()*lossDist->categories();
	100
	101	posteriorV.resize(distPrim->categories(),0.0);
	102	// ver2
	103	VVdoubleRep PosteriorVVRateCat;
	104	resizeMatrix(PosteriorVVRateCat,numOfSPs,numOfRateCat);
	105
	106	doubleRep dRepTotalLikelihood(0.0);// temporary dblRep for total likelihood
	107
	108	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	109	int gainIndex =fromIndex2gainIndex(spIndex,gainDist->categories(),lossDist->categories());
	110	int lossIndex =fromIndex2lossIndex(spIndex,gainDist->categories(),lossDist->categories());
	111
	112	//int primIndex;
	113	//if(distPrim == gainDist)
	114	// primIndex = gainIndex;
	115	//else
	116	// primIndex = lossIndex;
	117
	118	computePijGam pi;
	119	pi.fillPij(tr,*spVVec[gainIndex][lossIndex]);
	120
	121	// ver1 - no rate dist in rate computation
	122	//dblRepPosteriorV[primIndex] += likelihoodComputation::getLofPos(pos,tr,sc,pi,spVVec[gainIndex][lossIndex]) gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex);
	123
	124	// ver2 - with rate dist
	125	for (int rateInd=0; rateInd < numOfRateCat; ++rateInd) {
	126	PosteriorVVRateCat[spIndex][rateInd] += likelihoodComputation::getLofPos(pos,tr,sc,pi[rateInd],*spVVec[gainIndex][lossIndex],unObservableData_p)
	127	* gainDist->ratesProb(gainIndex) * lossDist->ratesProb(lossIndex) * spVVec[gainIndex][lossIndex]->ratesProb(rateInd);
	128	}
	129	}
	130
	131	// here we compute sigma r * P(r \| data)
	132	GainLossExpectation = 0.0;
	133	MDOUBLE sumOfSquares = 0.0; // this is the sum of squares. this will be used to compute the variance
	134
	135	// ver1 - no rate dist in rate computation
	136	//for (int i=0; i < distPrim->categories(); ++i) {
	137	// dblRepTotalLikelihood+=dblRepPosteriorV[i];
	138	//}
	139	//for (int j=0; j < distPrim->categories(); ++j) {
	140	// dblRepPosteriorV[j]/=dblRepTotalLikelihood; // so that posteriorV is probability.
	141	// if(unObservableData_p){
	142	// dblRepPosteriorV[j] = dblRepPosteriorV[j]/(1- exp(unObservableData_p->getlogLforMissingData())); // Note: each postProbCat corrected by unObs of all cat
	143	// }
	144	// posteriorV[j] = convert(dblRepPosteriorV[j]); // revert back to DOUBLE
	145	// MDOUBLE tmp = posteriorV[j]*distPrim->rates(j);
	146	// GainLossExpectation += tmp;
	147	// sumOfSquares += (tmp*distPrim->rates(j));
	148	//}
	149
	150	// ver2
	151	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	152	for (int i=0; i < numOfRateCat; ++i) {
	153	dRepTotalLikelihood+=PosteriorVVRateCat[spIndex][i];
	154	}
	155	}
	156
	157
	158	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	159	int gainIndex =fromIndex2gainIndex(spIndex,gainDist->categories(),lossDist->categories());
	160	int lossIndex =fromIndex2lossIndex(spIndex,gainDist->categories(),lossDist->categories());
	161
	162	int primIndex;
	163	if(distPrim == gainDist)
	164	primIndex = gainIndex;
	165	else
	166	primIndex = lossIndex;
	167
	168	for (int i=0; i < numOfRateCat; ++i) {
	169	PosteriorVVRateCat[spIndex][i]/=convert(dRepTotalLikelihood); // so that posteriorV is probability.
	170	posteriorV[primIndex] += convert(PosteriorVVRateCat[spIndex][i]);
	171	MDOUBLE tmp = convert(PosteriorVVRateCat[spIndex][i]) * distPrim->rates(primIndex) * spVVec[0][0]->rates(i); // the rateVal
	172	GainLossExpectation += tmp;
	173	sumOfSquares += (tmp * distPrim->rates(primIndex) * spVVec[0][0]->rates(i)); // ???
	174	}
	175	}
	176	////////////////////////////////////////////////////////////////////////// ?
	177	if(!isLpostPerSpPerCatComputed){
	178	for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
	179	for (int rateInd=0; rateInd < numOfRateCat; ++rateInd) {
	180	postProbPerSpPerCatPerPos[spIndex][rateInd][pos] = convert(PosteriorVVRateCat[spIndex][rateInd]);
	181	}
	182	}
	183	}
	184	MDOUBLE variance = sumOfSquares - GainLossExpectation*GainLossExpectation; // variance
	185	//if (!(variance!=0))
	186	// errorMsg::reportError("Error in computeEB_EXP_siteSpecificGainLoss, variance = 0");
	187	stdGainLoss = sqrt(variance); // standard deviation of inferred Ka/Ks
	188
	189	// detecting the confidence intervals.
	190	MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
	191	MDOUBLE cdf = 0.0; // cumulative density function.
	192	int k=0;
	193	while (k < distPrim->categories()){
	194	cdf += posteriorV[k];
	195	if (cdf >oneSideConfAlpha) {
	196	lowerConf = distPrim->rates(k);
	197	break;
	198	}
	199	k++;
	200	}
	201	while (k < distPrim->categories()) {
	202	if (cdf >(1.0-oneSideConfAlpha)) {
	203	upperConf = distPrim->rates(k);
	204	break;
	205	}
	206	++k;
	207	cdf += posteriorV[k];
	208	}
	209	if (k==distPrim->categories())
	210	upperConf = distPrim->rates(k-1);
	211	}
	212
	213
	214

+67

-0

programs/gainLoss/siteSpecificGL.h less more

	0	/*
	1	Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il.
	2
	3	This program is free software: you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation, either version 3 of the License, or
	6	(at your option) any later version.
	7
	8	This program is distributed in the hope that it will be useful,
	9	but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	GNU General Public License for more details.
	12
	13	You should have received a copy of the GNU General Public License
	14	along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17
	18	#ifndef ___SITE_SPECIFIC_GL__
	19	#define ___SITE_SPECIFIC_GL__
	20
	21	#include "definitions.h"
	22	#include "likelihoodComputation.h"
	23	#include "seqContainerTreeMap.h"
	24
	25	// per all sites computation
	26	void computeEB_EXP_siteSpecificGL(Vdouble & GainLossV,
	27	Vdouble & stdV,
	28	Vdouble & lowerBoundV,
	29	Vdouble & upperBoundV,
	30	VVdouble & posteriorsV,
	31	const sequenceContainer& sc,
	32	const vector<vector<stochasticProcess*> >& sp,
	33	const tree& tr,
	34	const distribution * gainDist,
	35	const distribution * lossDist,
	36	const distribution * distPrim,
	37	const MDOUBLE alphaConf,
	38	VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos]
	39	unObservableData* unObservableData_p);
	40
	41	// per one site
	42	void computeEB_EXP_siteSpecificGL(int pos,
	43	const sequenceContainer& sc,
	44	const vector<vector<stochasticProcess*> >& sp,
	45	//const computePijGam& cpg,
	46	const tree &tr,
	47	const distribution * gainDist,
	48	const distribution * lossDist,
	49	const distribution * distPrim,
	50	Vdouble & posteriorV,
	51	MDOUBLE& GainLossExpectation,
	52	MDOUBLE & stdForce,
	53	MDOUBLE & lowerConf,
	54	MDOUBLE & upperConf,
	55	const MDOUBLE alphaConf,
	56	VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos]
	57	unObservableData* unObservableData_p);
	58
	59
	60
	61
	62
	63
	64
	65
	66	#endif

+6

-0

programs/gainLoss/stochasticProcessLayers.txt less more

	0	stochasticProcess contains:
	1	distribution (rate dist. e.g., gamma)
	2	trivialAccelerator contains:
	3	replacementModel = gainLossModel (or others)
	4
	5	⏎

+19

-0

programs/indelCoder/Makefile less more

	0	#! /usr/local/bin/gmake
	1	# $Id: Makefile cohenofi $
	2
	3	# In order to compile with doubleRep run make like this: make doubleRep
	4
	5	Libsources= indelCoder.cpp indelCoderOptions.cpp indelCoderProject.cpp indelCoderUtils.cpp character.cpp gaps.cpp
	6
	7	#Libsources=
	8	LIBNAME = indelCoder
	9
	10	# LibCsources= cmdline.c
	11	# LibCsources += getopt.c getopt1.c
	12
	13	EXEC = indelCoder
	14
	15
	16
	17	include ../Makefile.generic
	18

+82

-0

programs/indelCoder/character.cpp less more

	0	#include "character.h"
	1	#include "gaps.h"
	2
	3	void character::checkForTriangleInequality(int st1, int st2){
	4	int longestGapStIndex = getLongestGapStIndex();
	5	if(_stepmatrix[st1][st2] > _stepmatrix[st1][longestGapStIndex]+_stepmatrix[st2][longestGapStIndex]){
	6	_isTriangleInequalityCorrectionNeeded = true;
	7	_stepmatrixTriagleInCorrected = _stepmatrix;
	8	++_stepmatrixTriagleInCorrected[st1][ longestGapStIndex];
	9	++_stepmatrixTriagleInCorrected[longestGapStIndex][st1];
	10	++_stepmatrixTriagleInCorrected[st2][ longestGapStIndex];
	11	++_stepmatrixTriagleInCorrected[longestGapStIndex][st2];
	12	}
	13	};
	14
	15	int character::getLongestGapStIndex(){
	16	int longestGapStIndex;
	17	int longestGapNumOfZeros = 0;
	18	int characterLength = _states[0].size();
	19	for(int st = 0; st<_states.size(); ++st){
	20	int gapNumOfZeros = 0;
	21	for(int ind = 0; ind<characterLength; ++ind){
	22	gapNumOfZeros += _states[st][ind];
	23	}
	24	if(gapNumOfZeros > longestGapNumOfZeros)
	25	longestGapStIndex = st;
	26	}
	27	return longestGapStIndex;
	28	};
	29
	30	//********************************************************************************************
	31	//computeNumOfSteps
	32	// Foreach c in character_1:character_M
	33	// Foreach st_x and st_y in state_0:state_c_ST (There are ST states in character c) (go over all state combinations)
	34	// Do A to E steps for the pair st_x and st_y:
	35	// A) translate into 01 to X set of 5'-3' coordinats of the X gaps within st_x and st_y
	36	// B) ignore 0-0 (cost_c_x_y =- #0-0 colomns)
	37	// C) merge adjacent 0-1 and 1-0 ((cost_c_x_y =- #adjacent 0-1 and 1-0 colomns)
	38	// D) ignore 1-1 (cost_c_x_y =- #1-1 colomns)
	39	//********************************************************************************************
	40	int character::computeNumOfSteps(int st1, int st2){
	41	int numOfSteps =_states[st1].size();
	42	vector<int> state1(_states[st1].size());
	43	state1 = _states[st1];
	44	vector<int> state2(_states[st2].size());
	45	state2 = _states[st2];
	46
	47	vector<int>::iterator iter1 = state1.begin();
	48	vector<int>::iterator iter2 = state2.begin();
	49	vector<int>::iterator iterLastCounted1 = iter1;
	50	vector<int>::iterator iterLastCounted2 = iter2;
	51
	52	LOGnOUT(6,<<" step "<<st1<<" "<<st2<<endl); // DEBUG
	53	int i = 0;
	54	while( iter1!=state1.end() ){ // both same length
	55	if(iter1 == iter2
	56	\|\| (iter1 != state1.begin() && iter1 == (iter1-1) && iter2 == (iter2-1))
	57	\|\| (i>0 && iter1 == iterLastCounted1 && iter2 == iterLastCounted2 && *(iter1-1)==0 )
	58	)
	59	{
	60	LOGnOUT(6,<<i<<" "<<iter1<<" "<<iter2<<endl); // DEBUG
	61	//state1.erase(iter1);
	62	//state2.erase(iter2);
	63	--numOfSteps;
	64	}
	65	else{
	66	iterLastCounted1 = iter1;
	67	iterLastCounted2 = iter2;
	68	LOGnOUT(6,<<"Count step "<<i<<" "<<iter1<<" "<<iter2<<endl); // DEBUG
	69	}
	70	++iter1;
	71	++iter2;
	72	++i;
	73	}
	74
	75	if(state1.size() != state2.size())
	76	cout<<"error"<<endl;
	77
	78	//numOfSteps = state1.size();
	79	return numOfSteps;
	80	};
	81

+159

-0

programs/indelCoder/character.h less more

	0	#ifndef ___CHARACTER__
	1	#define ___CHARACTER__
	2
	3	#include "definitions.h"
	4	#include "gaps.h"
	5	#include "matrixUtils.h"
	6	#include "indelCoderOptions.h"
	7
	8
	9	using namespace std;
	10
	11
	12	class character {
	13	public:
	14
	15	explicit character(int coord_5p, int coord_3p, int numOfSquencs, int numOfStates=0):_coord_5p(coord_5p), _coord_3p(coord_3p), _numOfSequences(numOfSquencs)
	16	{
	17	_numOfStates = 1;
	18	_isTriangleInequalityCorrectionNeeded = false;
	19	//_sc_states.resize(numOfSquencs); // No need - done later
	20	};
	21
	22	~character() {};
	23
	24	int getCoord5(){return _coord_5p;}
	25	int getCoord3(){return _coord_3p;}
	26	void setCoord3(int coord_3p){ _coord_3p = coord_3p;}
	27	int getNumOfGaps(){return _gaps.numOfGaps();}
	28	int getNumOfStates(){return _numOfStates;}
	29	vector<int> getGapsIndices() const {return _gapsIndices;}
	30
	31
	32	void addGap(gaps::gap* gap_p, int gapIndex){
	33	_gaps.insertNewGap(gap_p);
	34	_gapsIndices.push_back(gapIndex);
	35	}
	36
	37	void addZeroState(){
	38	vector<int> zeroState((int)(_coord_3p-_coord_5p+1),1); // vector of ones, length of character
	39	_states.push_back(zeroState);
	40	};
	41
	42	void addState(vector<int> state){
	43	_states.push_back(state);
	44	++_numOfStates;
	45	};
	46
	47	void resizeSc_states(){resizeMatrix(_sc_states,_numOfSequences,(int)(_coord_3p-_coord_5p+1)); oneMatrix(_sc_states); };
	48	void resizeStepMatrix(){resizeMatrix(_stepmatrix,_numOfStates,_numOfStates); };
	49	void setGapsInSc_states(int seqId, int coord5, int coord3){
	50	for (int i = coord5; i<=coord3; ++i){
	51	_sc_states[seqId][i-_coord_5p] = 0;
	52	}
	53	};
	54	vector< vector<int> > getScStates(){return _sc_states;};
	55
	56	vector< vector<int> > getStates(){return _states;};
	57
	58
	59	//********************************************************************************************
	60	//isTriangleInequalityCorrectionNeeded
	61	//********************************************************************************************
	62	bool isTriangleInequalityCorrectionNeeded(){return _isTriangleInequalityCorrectionNeeded;};
	63	void checkForTriangleInequality(int st1, int st2);
	64	int getLongestGapStIndex();
	65
	66
	67	int computeNumOfSteps(int st1, int st2);
	68
	69
	70	//*******************************************************************************************
	71	//printScStates
	72	//*******************************************************************************************
	73	void printScStates(){
	74	cout<<"ScStates:"<<endl;
	75	for(int s=0; s<_sc_states.size(); ++s){
	76	for(int alp=0; alp<_sc_states[0].size(); ++alp){
	77	cout<<_sc_states[s][alp];
	78	}
	79	cout<<endl;
	80	}
	81	}
	82
	83	//********************************************************************************************
	84	//printStates
	85	//*********************************************************************************************
	86	void printStates(){
	87	cout<<"States:"<<endl;
	88	for(int st=0; st<_numOfStates; ++st){
	89	for(int alp=0; alp<_states[0].size(); ++alp){
	90	cout<<_states[st][alp];
	91	}
	92	cout<<endl;
	93	}
	94	}
	95
	96
	97	//********************************************************************************************
	98	//determinationStepsMatrix
	99	//*********************************************************************************************
	100	void determinationStepsMatrix(){
	101	resizeStepMatrix();
	102	for(int st1 = 0; st1< _numOfStates; ++st1){
	103	for(int st2= st1; st2< _numOfStates; ++st2){
	104	if(st1==st2)
	105	_stepmatrix[st1][st2] = 0;
	106	else{
	107	_stepmatrix[st1][st2] = computeNumOfSteps(st1,st2);
	108	_stepmatrix[st2][st1] = _stepmatrix[st1][st2];
	109	}
	110	if(indelCoderOptions::_isCheckForTriangleInequality)
	111	checkForTriangleInequality(st1,st2);
	112	}
	113	}
	114	}
	115
	116	//********************************************************************************************
	117	//printStepsMatrix
	118	//*********************************************************************************************
	119	void printStepsMatrix(ostream& out = cout, bool isCorrectdForTriangleInEq = false){
	120	out<<" ";
	121	for(int st1 = 0; st1< _numOfStates; ++st1){
	122	out<<st1<<" ";
	123	}
	124	out<<endl;
	125
	126	for(int st1 = 0; st1< _numOfStates; ++st1){
	127	out<<"["<<st1<<"] ";
	128	for(int st2= 0; st2< _numOfStates; ++st2){
	129	if(isCorrectdForTriangleInEq)
	130	out<<_stepmatrixTriagleInCorrected[st1][st2]<<" ";
	131	else
	132	out<<_stepmatrix[st1][st2]<<" ";
	133	}
	134	out<<endl;
	135	}
	136	}
	137
	138
	139
	140
	141	private:
	142	int _coord_5p;
	143	int _coord_3p;
	144	int _numOfStates;
	145	int _numOfSequences;
	146
	147	gaps _gaps; // gaps included in this character
	148	vector<int> _gapsIndices; // since all gaps are indexed, here you find the indices of the gaps included in this character
	149	vector< vector<int> > _stepmatrix;
	150	vector< vector<int> > _sc_states; // matrix - species X lengthOfCharacter
	151	vector< vector<int> > _states;
	152
	153	bool _isTriangleInequalityCorrectionNeeded;
	154	vector< vector<int> > _stepmatrixTriagleInCorrected;
	155
	156	};
	157
	158	#endif

+1

-0

programs/indelCoder/gaps.cpp less more

0

#include "gaps.h"

+107

-0

programs/indelCoder/gaps.h less more

	0	#ifndef ___GAP__
	1	#define ___GAP__
	2
	3
	4	#include "definitions.h"
	5
	6	#include <iostream>
	7	using namespace std;
	8
	9
	10
	11	class gaps {
	12	public:
	13
	14	explicit gaps() {};
	15	~gaps() {
	16	for(int i=0; i<_gaps.size();++i){
	17	gap* gap2delete = _gaps[i];
	18	delete gap2delete;
	19	}
	20	};
	21	////////////////////////////////////////////////////////////////////////// inner class
	22	class gap {
	23	public:
	24
	25	explicit gap(int coord_5p, int coord_3p, int seqID, int coord_5Abs):
	26	_coord_5p(coord_5p), _coord_3p(coord_3p), _seqID(seqID),_coord_5Abs(coord_5Abs) {};
	27	~gap() {};
	28	int getCoord5() const {return _coord_5p;};
	29	int getCoord3()const {return _coord_3p;};
	30	int getSeqID() const {return _seqID;};
	31	int getCoord5Abs() const {return _coord_5Abs;};
	32	int getLength() const {return _coord_3p-_coord_5p+1;};
	33
	34	private:
	35	int _coord_5p;
	36	int _coord_3p;
	37	int _seqID;
	38	int _coord_5Abs;
	39	};
	40	////////////////////////////////////////////////////////////////////////// end
	41
	42
	43	gap* operator[](const int i) {return _gaps[i];} // get the ID of the gap. Return the gap itself.
	44
	45	int numOfGaps(){return _gaps.size();}
	46
	47	/********************************************************************************************
	48	insertNewGap
	49	// Sort the vector containing all indels by I =(i1,i2), K =(k1,k2), I<K iff i1<k1 or i1=k1 and i2<k2
	50	*********************************************************************************************/
	51	void insertNewGap(int coord_5p, int coord_3p, int seqID, int coord_5Abs)
	52	{
	53	gap* gap_p = new gap(coord_5p, coord_3p,seqID, coord_5Abs);
	54	//_gaps.push_back(gap_p);
	55
	56	vector<gap*>::iterator iter;
	57	int position = 0;
	58	iter = _gaps.begin();
	59	while( iter!=_gaps.end() &&
	60	( (*iter)->getCoord5() < coord_5p \|\|
	61	((iter)->getCoord5() <= coord_5p && (iter)->getCoord3() < coord_3p) ) )
	62	{
	63	iter++;
	64	position++;
	65	}
	66	_gaps.insert(iter, gap_p);
	67	};
	68
	69	//////////////////////////////////////////////////////////////////////////
	70	void insertNewGap(gap* gap_p){
	71	vector<gap*>::iterator iter;
	72	int position = 0;
	73	iter = _gaps.begin();
	74	while( iter!=_gaps.end() &&
	75	( (*iter)->getCoord5() < gap_p->getCoord5() \|\|
	76	((iter)->getCoord5() <= gap_p->getCoord5() && (iter)->getCoord3() < gap_p->getCoord3()) ) )
	77	{
	78	iter++;
	79	position++;
	80	}
	81	_gaps.insert(iter, gap_p);
	82	};
	83
	84	void insertNewGapNotSorted(gap* gap_p){
	85	_gaps.push_back(gap_p);
	86	};
	87
	88	//////////////////////////////////////////////////////////////////////////
	89	void printGaps(){
	90	vector<gap*>::iterator iter;
	91	iter = _gaps.begin();
	92	while( iter!=_gaps.end())
	93	{
	94	cout<<"Gap "<<(iter)->getCoord5()<<" "<<(iter)->getCoord3()<<endl;
	95	iter++;
	96	}
	97	};
	98
	99	private:
	100	vector<gap*> _gaps;
	101
	102
	103	};
	104
	105
	106	#endif

+626

-0

programs/indelCoder/indelCoder.cpp less more

	0	#include "indelCoder.h"
	1	#include "indelCoderUtils.h"
	2
	3
	4	using namespace std;
	5
	6
	7	/********************************************************************************************
	8	run
	9	*********************************************************************************************/
	10	void indelCoder::run(){
	11	startSequenceContainer(); // note: only Amino seq is implemented
	12	readSequenceIntoGaps(); // Find and sort all gaps in MSA
	13	printGapsInfo();
	14	switch (indelCoderOptions::_codingType)
	15	{
	16	case (indelCoderOptions::SIC):
	17	delimitationOfCharactersSIC();
	18	break;
	19	case (indelCoderOptions::MCIC):
	20	LOGnOUT(2,<<endl<< "WARNING: The MCIC implementation is incomplete.\n Please re-run using SIC coding\n"<<endl);
	21	return;
	22	delimitationOfCharacters(indelCoderOptions::_codingType);
	23	break;
	24	case (indelCoderOptions::MCIC2):
	25	LOGnOUT(2,<<endl<< "WARNING: The MCIC2 implementation is incomplete.\n Please re-run using SIC coding\n"<<endl);
	26	return;
	27	delimitationOfCharacters(indelCoderOptions::_codingType);
	28	break;
	29	default:
	30	errorMsg::reportError("unknown type in codingType - {SIC, MCIC, MCIC2}");
	31	}
	32
	33	//if(indelCoderOptions::_isMCIC2)
	34	// delimitationOfCharactersMCIC2();
	35	//else
	36	// delimitationOfCharacters();
	37
	38	resizeMatrix(_matrix,_sc.numberOfSeqs(),_characters.size());
	39	if(indelCoderOptions::_codingType==indelCoderOptions::SIC)
	40	determinationCharacterStateSIC();
	41	else{
	42	determinationCharacterState();
	43	determinationStepsMatrix();
	44	}
	45	//printCharacters(); //DEBUG
	46	printFasta();
	47	printNexus();
	48	printIndelSummary(); // Tal's own format in which all indel information is provided.
	49	}
	50
	51	/********************************************************************************************
	52	printCharacters
	53	*********************************************************************************************/
	54	void indelCoder::printCharacters(){
	55	for(int i = 0; i < _characters.size(); ++i)
	56	{
	57	cout<<"Character "<<_characters[i]->getCoord5()<<" "<<_characters[i]->getCoord3()<<" "<<_characters[i]->getNumOfGaps()<<" "<<_characters[i]->getNumOfStates()<<endl;
	58	//_characters[i]->printScStates(); //DEBUG
	59	_characters[i]->printStates();
	60	_characters[i]->printStepsMatrix();
	61	}
	62	}
	63
	64	/********************************************************************************************
	65	startSequenceContainer
	66	*********************************************************************************************/
	67	void indelCoder::startSequenceContainer(){
	68	amino alph; // note: we can add parameter with Alphabet type
	69	ifstream in(indelCoderOptions::_seqFile.c_str());
	70	_sc = recognizeFormat::read(in,&alph);
	71	_gapsVperSc.resize(_sc.numberOfSeqs());
	72	LOGnOUT(4,<<"Seq "<<indelCoderOptions::_seqFile.c_str()<<endl);
	73	LOGnOUT(4,<<"numberOfSeqs="<<_sc.numberOfSeqs()<<endl);
	74	LOGnOUT(4,<<"seqLen="<<_sc.seqLen()<<endl);
	75	}
	76
	77	/********************************************************************************************
	78	readSequenceIntoGaps
	79	// Sort the vector containing all indels by I =(i1,i2), K =(k1,k2), I<K iff i1<k1 or i1=k1 and i2<k2
	80	*********************************************************************************************/
	81	void indelCoder::readSequenceIntoGaps(){
	82	LOGnOUT(4,<<endl<< "Step (1) readSequenceIntoGaps..."<<endl);
	83	LOGnOUT(5,<< " All MSA gaps are sorted by coordinates"<<endl);
	84	LOGnOUT(5,<< " Sort by: I =(i1,i2), K =(k1,k2), I<K iff i1<k1 or i1=k1 and i2<k2"<<endl);
	85	int gapSign = -1;
	86	int UnknownSign = _sc.getAlphabet()->unknown(); // Note that within amino class, 'X' is also coded as unknown
	87	int coord5=0;
	88	int coord3=0;
	89	int seqID=0;
	90	int coord5abs=0; //coord5MinusNumOfGapPositionsFromGenomeStart
	91	for(int s=0; s<_sc.numberOfSeqs(); ++s){
	92	cout<<_sc[s].id()<<" "<<_sc[s].name()<<"\n";
	93	int numOfGapPositionsFromGenomeStart = 0;
	94	int seqLength =_sc.seqLen();
	95	for(int pos=0; pos<seqLength; ++pos){
	96	if(_sc[s][pos] == gapSign){
	97	coord5 = pos;
	98	coord5abs = coord5-numOfGapPositionsFromGenomeStart;
	99	++numOfGapPositionsFromGenomeStart;
	100	while(pos<(seqLength-1) && _sc[s][pos+1] == gapSign){
	101	++pos;
	102	++numOfGapPositionsFromGenomeStart;
	103	}
	104	coord3 = pos;
	105	seqID = _sc[s].id();
	106	//cout<<"new gap found "<<seqID<<" "<<coord5<<" "<<coord3<<endl;
	107	if(indelCoderOptions::_isOmitLeadingAndEndingGaps && (coord5abs==0 \|\| coord3==seqLength-1)){
	108	LOGnOUT(4,<< "Skip Leading/Ending Gap. seq="<< s<<" coord5="<<coord5abs<<" coord3="<<coord3<<endl);
	109	_unknowns.insertNewGap(coord5, coord3,seqID, coord5abs);
	110	}else{
	111	_gaps.insertNewGap(coord5, coord3,seqID, coord5abs);
	112	_gapsVperSc[seqID].insertNewGap(coord5, coord3,seqID, coord5abs); // used additionally were gaps are pre-sorted by seq
	113	}
	114	}
	115	if(_sc[s][pos] == UnknownSign){
	116	coord5 = pos;
	117	coord5abs = coord5-numOfGapPositionsFromGenomeStart;
	118	while(pos<(seqLength-1) && _sc[s][pos+1] == UnknownSign){
	119	++pos;
	120	}
	121	coord3 = pos;
	122	seqID = _sc[s].id();
	123	_unknowns.insertNewGap(coord5, coord3,seqID, coord5abs);
	124	}
	125	}
	126	}
	127	LOGnOUT(4,<<endl<< "There are "<<_gaps.numOfGaps()<<" gaps"<<endl);
	128	//_gaps.printGaps(); //DEBUG
	129	}
	130
	131	/********************************************************************************************
	132	delimitationOfCharacters
	133	// 1) delimitation of the characters.
	134	// Each character is a region of the alignment that is fully represented by one indel,
	135	// and this indel is the longest one in these coordinates.
	136
	137	// Start with the first gap(indel) in the sorted vector to define the first character, following characters
	138	// character_1 is defined by gap_1
	139	// For i in gap_1:gap_N (N gaps in the sorted vector)
	140	// while gap_i(3') < character_j(3')
	141	// gap_i is within character_j
	142	// else j++
	143	*********************************************************************************************/
	144	void indelCoder::delimitationOfCharacters(indelCoderOptions::codingType type){
	145	LOGnOUT(4,<<endl<< "Step (2) delimitationOfCharacters... Complex Coding "<<indelCoderOptions::getCodingType(type)<<endl);
	146	LOGnOUT(5,<< " Finding the required positions(=characters) in the coded sequence"<<endl);
	147	LOGnOUT(5,<< " The number of characters <= the number of found gaps (each character may include several gaps)"<<endl);
	148
	149	if(type == indelCoderOptions::MCIC)
	150	LOGnOUT(5,<< " gap_i is extending previous character if it's start is the same but ends further"<<endl);
	151	if(type == indelCoderOptions::MCIC2)
	152	LOGnOUT(5,<< " gap_i is extending previous character if it's start is included in the previous character but ends further"<<endl);
	153
	154	int i=0;
	155	character* character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	156	_characters.push_back(character_p);
	157	while( i<_gaps.numOfGaps())
	158	{
	159	// gap_i is included in previous character if it's start(5) & end(3) coord are within the start & end coord of the character
	160	while( _gaps[i]->getCoord5()>=character_p->getCoord5() && _gaps[i]->getCoord3()<=character_p->getCoord3()){
	161	character_p->addGap(_gaps[i],i+1);
	162	i++;
	163	if(i>=_gaps.numOfGaps())
	164	break;
	165	}
	166	if(i>=_gaps.numOfGaps())
	167	break;
	168
	169	bool condition;
	170	if(type == indelCoderOptions::MCIC)
	171	condition = _gaps[i]->getCoord5()==character_p->getCoord5(); // gap_i is extending previous character if it's start is the same but ends(3) further
	172	if(type == indelCoderOptions::MCIC2)
	173	condition = _gaps[i]->getCoord5()<=character_p->getCoord3(); // gap_i is extending previous character if it's start is included in the previous character but ends(3) further
	174	while(condition && _gaps[i]->getCoord3()>character_p->getCoord3() ){
	175	character_p->setCoord3(_gaps[i]->getCoord3());
	176	character_p->addGap(_gaps[i],i+1);
	177	i++;
	178	if(i>=_gaps.numOfGaps())
	179	break;
	180	}
	181
	182	// new character is required for this gap
	183	if(i<_gaps.numOfGaps() && _gaps[i]->getCoord5() > character_p->getCoord5() && _gaps[i]->getCoord3() > character_p->getCoord3()){
	184	character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	185	_characters.push_back(character_p);
	186	character_p->addGap(_gaps[i],i+1);
	187	i++;
	188	//cout<<" Character "<<i<<" "<< character_p->getCoord5()<<" "<< character_p->getCoord3()<<" " <<endl;
	189	//break;
	190	}
	191	}
	192	LOGnOUT(4,<<endl<< "There were "<<_characters.size()<<" characters"<<endl);
	193	}
	194
	195	/********************************************************************************************
	196	delimitationOfCharactersMCIC2
	197	*********************************************************************************************/
	198	//void indelCoder::delimitationOfCharactersMCIC2(){
	199	// LOGnOUT(4,<<endl<< "Step (2) delimitationOfCharacters... Complex Coding (MCIC2)"<<endl);
	200	// LOGnOUT(5,<< " Finding the required positions(=characters) in the coded sequence"<<endl);
	201	// LOGnOUT(5,<< " The number of characters <= the number of found gaps (each character may include several gaps)"<<endl);
	202	//
	203	// int i=0;
	204	// character* character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	205	// _characters.push_back(character_p);
	206	// while( i<_gaps.numOfGaps())
	207	// {
	208	// //coord5_c = _gaps[i]->getCoord5();
	209	// //coord3_c = _gaps[i]->getCoord3();
	210	// while( _gaps[i]->getCoord5()>=character_p->getCoord5() && _gaps[i]->getCoord3()<=character_p->getCoord3()){
	211	// character_p->addGap(_gaps[i],i+1);
	212	// i++;
	213	// if(i>=_gaps.numOfGaps())
	214	// break;
	215	// }
	216	// // gap_i is extending previous character it's start is included in the previous character
	217	// while(i<_gaps.numOfGaps()
	218	// && _gaps[i]->getCoord5()<=character_p->getCoord3() && _gaps[i]->getCoord3()>character_p->getCoord3() ){
	219	// character_p->setCoord3(_gaps[i]->getCoord3());
	220	// character_p->addGap(_gaps[i],i+1);
	221	// i++;
	222	// if(i>=_gaps.numOfGaps())
	223	// break;
	224	// }
	225	// // new character is required for this gap
	226	// if(i<_gaps.numOfGaps() && _gaps[i]->getCoord5() > character_p->getCoord5() && _gaps[i]->getCoord3() > character_p->getCoord3()){
	227	// character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	228	// _characters.push_back(character_p);
	229	// character_p->addGap(_gaps[i],i+1);
	230	// i++;
	231	// //cout<<" Character "<<i<<" "<< character_p->getCoord5()<<" "<< character_p->getCoord3()<<" " <<endl;
	232	// //break;
	233	// }
	234	// }
	235	// LOGnOUT(4,<<endl<< "There were "<<_characters.size()<<" characters"<<endl);
	236	//}
	237
	238
	239	/********************************************************************************************
	240	delimitationOfCharactersSIC
	241	*********************************************************************************************/
	242	void indelCoder::delimitationOfCharactersSIC(){
	243	LOGnOUT(4,<<endl<< "Step (2) delimitationOfCharacters... Simple Coding (SIC)"<<endl);
	244	LOGnOUT(5,<< " Finding the required positions(=characters) in the coded sequence"<<endl);
	245	LOGnOUT(5,<< " The number of characters <= the number of found gaps (each character may include several gaps)"<<endl);
	246
	247	int i=0;
	248	character* character_p=NULL;
	249	if (_gaps.numOfGaps()>0) {
	250	character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	251	_characters.push_back(character_p);
	252	}
	253	while( i<_gaps.numOfGaps())
	254	{
	255	while( _gaps[i]->getCoord5()==character_p->getCoord5() && _gaps[i]->getCoord3()==character_p->getCoord3()){
	256	character_p->addGap(_gaps[i],i+1);
	257	i++;
	258	if(i>=_gaps.numOfGaps())
	259	break;
	260	}
	261	// new character is required for this gap
	262	if(i<_gaps.numOfGaps() ){
	263	character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs());
	264	_characters.push_back(character_p);
	265	character_p->addGap(_gaps[i],i+1);
	266	i++;
	267	//cout<<" Character "<<i<<" "<< character_p->getCoord5()<<" "<< character_p->getCoord3()<<" " <<endl;
	268	//break;
	269	}
	270	}
	271	LOGnOUT(4,<<endl<< "There were "<<_characters.size()<<" characters"<<endl);
	272	}
	273
	274
	275	/********************************************************************************************
	276	determinationCharacterState
	277	// 2) determination of the character state of each character.
	278	// Each sequence presenting a different indel pattern at the corresponding character region is coded as a different state.
	279	// state_0 is defined by no-gaps in this region
	280	// Foreach j in character_1:character_M (M where found in previous step)
	281	// Foreach s in seq1:seqS (S taxons in the MSA)
	282	// if s gaps coordinates are equal to one of the gaps coordinates of previous states
	283	// next;
	284	// else
	285	// st++, where state_st is defined by X set of 5'-3' coordinates of the X gaps within s
	286	*********************************************************************************************/
	287	void indelCoder::determinationCharacterState(){
	288	LOGnOUT(4,<<endl<< "Step (3) determinationCharacterState... "<<endl);
	289
	290	// loop over characters
	291	for(int c = 0; c < _characters.size(); ++c)
	292	{
	293	int coord_5p = _characters[c]->getCoord5();
	294	int coord_3p = _characters[c]->getCoord3();
	295	_characters[c]->addZeroState(); // the default state - ones in length of the character
	296	//cout<<" Char "<<" "<<coord_5p<<" "<< coord_3p<<endl;
	297	_characters[c]->resizeSc_states(); // the _sc_states matrix (#species X length) is init with ones
	298
	299	// loop over taxa
	300	for(int s = 0; s < _sc.numberOfSeqs(); ++s){
	301	int seqID = _sc[s].id();
	302	if(seqID != s)
	303	cout<<"error: seqID not eq s";
	304	//cout<<"SeqID vs. s "<<seqID<<" "<<s<<endl; // DEBUG
	305
	306	// loop over gaps - ToDo - highly wasteful! - fix
	307	for(int g = 0; g <_gapsVperSc[seqID].numOfGaps(); ++g){
	308	int coord_5_gap = _gapsVperSc[seqID][g]->getCoord5();
	309	int coord_3_gap = _gapsVperSc[seqID][g]->getCoord3();
	310	if(coord_5_gap>=coord_5p && coord_3_gap<= coord_3p){
	311	// if the gap of the species is included in character - update _sc_state with zeros to designate this gap
	312	_characters[c]->setGapsInSc_states( seqID, coord_5_gap, coord_3_gap);
	313	}
	314	}
	315
	316	bool isNewState = true;
	317	if(_characters[c]->getScStates()[s]==_characters[c]->getStates()[0] ){
	318	isNewState = false;
	319	_matrix[s][c] = 0; // no gaps for species s in character c
	320	}
	321	else{
	322	for(int sq = s-1; sq>=0; --sq){
	323	if(_characters[c]->getScStates()[s] == _characters[c]->getScStates()[sq] ){
	324	isNewState = false; // this state was already found, no need for new
	325	_matrix[s][c] = _matrix[sq][c]; // gap in species s, same state as previously found state in species sq
	326	}
	327	}
	328	}
	329	if(isNewState){ // state was not found in previous species, need new
	330	_characters[c]->addState(_characters[c]->getScStates()[s]);
	331	_matrix[s][c] = _characters[c]->getNumOfStates()-1; // gap in species s, new state type
	332	}
	333	}
	334	}
	335	}
	336
	337	/********************************************************************************************
	338	determinationCharacterState
	339	// 2) determination of the character state of each character.
	340	// Each sequence presenting a different indel pattern at the corresponding character region is coded as a different state.
	341	// state_0 is defined by no-gaps in this region
	342	// state_1 for species with this (exact) gap
	343	// state_? for species with gap overlapping this one
	344	*********************************************************************************************/
	345	void indelCoder::determinationCharacterStateSIC(){
	346	LOGnOUT(4,<<endl<< "Step (3) determinationCharacterState... "<<endl);
	347
	348	resizeMatrix(_matrix,_sc.numberOfSeqs(),_characters.size()); // all zeroes
	349	// loop over characters
	350	for(int c = 0; c < _characters.size(); ++c)
	351	{
	352	int coord_5_char = _characters[c]->getCoord5();
	353	int coord_3_char = _characters[c]->getCoord3();
	354
	355	// loop over all gaps
	356	for(int g = 0; g <_gaps.numOfGaps(); ++g){
	357	int coord_5_gap = _gaps[g]->getCoord5();
	358	int coord_3_gap = _gaps[g]->getCoord3();
	359	int s = _gaps[g]->getSeqID(); // ????
	360	string nameG = _sc.name(_gaps[g]->getSeqID());
	361
	362	if(coord_5_gap==coord_5_char && coord_3_gap==coord_3_char){
	363	_matrix[s][c] = 1;
	364	}
	365	else if( //(coord_5_gap>=coord_5_char && coord_3_gap<= coord_3_char) \|\| // gap (in genome 'g') is within the char (indel=c)
	366	(coord_5_gap<=coord_5_char && coord_3_gap>= coord_3_char ) //\|\| // char (indel=c) is within the gap (in genome 'g')
	367	//(coord_5_gap<=coord_5_char && coord_3_gap>=coord_5_char ) \|\| // 5' of char is within gap (partial overlap)
	368	//(coord_5_gap<=coord_3_char && coord_3_gap>=coord_3_char ) // 3' of char is within gap (partial overlap)
	369	)
	370	_matrix[s][c] = 2; // same as '?', need to find&replace
	371	}
	372	for(int g = 0; g <_unknowns.numOfGaps(); ++g){
	373	int coord_5_gap = _unknowns[g]->getCoord5();
	374	int coord_3_gap = _unknowns[g]->getCoord3();
	375	int s = _unknowns[g]->getSeqID(); // ????
	376	if(coord_5_gap<=coord_5_char && coord_3_gap>= coord_3_char )
	377	_matrix[s][c] = 2; // same as '?', need to find&replace
	378
	379	if(_matrix[s][c] == 1 && (coord_5_char==(coord_3_gap+1) \|\| coord_3_char==(coord_5_gap-1)) ) // the indel is flaked by unKnown, thus it is ?
	380	_matrix[s][c] = 2; // same as '?', need to find&replace
	381	}
	382
	383	}
	384
	385
	386	}
	387
	388
	389
	390	/********************************************************************************************
	391	determinationStepsMatrix
	392	// 3) determination of the number of steps between every 2 character states.
	393	// Each pair of sequences is compared separately for the corresponding character area and the minimum number of steps between every 2 character states is then determined
	394
	395	// cost_c_x_y is initiated with lenght of character c
	396	// Foreach c in character_1:character_M
	397	// Foreach st_x and st_y in state_0:state_c_ST (There are ST states in character c) (go over all state combinations)
	398	// Do A to E steps for the pair st_x and st_y:
	399	// A) translate into 01 to X set of 5'-3' coordinats of the X gaps within st_x and st_y
	400	// B) ignore 0-0 (cost_c_x_y =- #0-0 colomns)
	401	// C) merge adjacent 0-1 and 1-0 ((cost_c_x_y =- #adjacent 0-1 and 1-0 colomns)
	402	// D) ignore 1-1 (cost_c_x_y =- #1-1 colomns)
	403	*********************************************************************************************/
	404	void indelCoder::determinationStepsMatrix(){
	405	LOGnOUT(4,<<endl<< "determinationStepsMatrix..."<<endl);
	406	for(int c = 0; c < _characters.size(); ++c)
	407	{
	408	_characters[c]->determinationStepsMatrix();
	409	}
	410	}
	411	/********************************************************************************************
	412	// print to out file the required data
	413	*********************************************************************************************/
	414	void indelCoder::printGapsInfo(){
	415	string fileGapsString = "gapsInfo.txt";
	416	ofstream fileGapsStream(fileGapsString.c_str());
	417
	418	fileGapsStream<<"# Start coordinate are with the genome as reference (not MSA)."<<endl;
	419	fileGapsStream<<"# Count starts from zero (first position = 1)."<<endl;
	420	fileGapsStream<<"seqID"<<"\t"<<"seqName"<<"\t"<<"start"<<"\t"<<"length"<<endl;
	421	int gapNum = 1;
	422	for(int s = 0; s < _sc.numberOfSeqs(); ++s){
	423	int seqID = _sc[s].id();
	424	string seqName = _sc[s].name();
	425	for(int g = 0; g <_gapsVperSc[seqID].numOfGaps(); ++g){
	426	fileGapsStream<<seqID<<"\t"<<seqName<<"\t"<<_gapsVperSc[seqID][g]->getCoord5Abs()+1<<"\t"<<_gapsVperSc[seqID][g]->getLength()<<endl;
	427	}
	428	}
	429	}
	430
	431
	432	/********************************************************************************************
	433	// print to out file the required data as fasta file
	434	*********************************************************************************************/
	435	void indelCoder::printFasta(){
	436	//string fileString = indelCoderOptions::_outDir + "//" + "outFileCodedSeq.fa";
	437	//ofstream fileStream(fileString.c_str());
	438	ofstream fileStream(indelCoderOptions::_indelOutputFastaFile.c_str());
	439	bool isSIC = indelCoderOptions::_codingType == indelCoderOptions::SIC;
	440	for(int s=0; s<_sc.numberOfSeqs();++s){
	441	fileStream<<">"<<_sc.name(s)<<"\n";
	442	for(int c=0; c<_matrix[0].size(); ++c ){
	443	if(isSIC && _matrix[s][c]==2)
	444	fileStream<<'?';
	445	else
	446	fileStream<<_matrix[s][c];
	447	}
	448	fileStream<<endl; // prev- 2 endl
	449	}
	450	fileStream.close();
	451	}
	452
	453
	454	/********************************************************************************************
	455	// 4) print to out file the required data
	456	// 4.1) MATRIX of S species over M characters
	457	// 4.2) foreach character of more than 2 states print the transition costs stepmatrix
	458	*********************************************************************************************/
	459	void indelCoder::printNexus() {
	460	bool isSIC = indelCoderOptions::_codingType == indelCoderOptions::SIC;
	461	string fileNexusString = indelCoderOptions::_nexusFileName;
	462	ofstream fileNexusStream(fileNexusString.c_str());
	463	fileNexusStream<<"#NEXUS"<<endl<<endl;
	464	fileNexusStream<<"[!matrix with indels coded according to "<<indelCoderOptions::getCodingType(indelCoderOptions::_codingType)<<" coding]"<<endl;
	465	//if(indelCoderOptions::_isMCIC2){fileNexusStream<<"2";}
	466	//fileNexusStream<<"]"<<endl;
	467	fileNexusStream<<"[! "<< PROG_INFO <<" ]"<<endl<<endl;
	468
	469	fileNexusStream<<"BEGIN CHARACTERS;"<<endl;
	470	fileNexusStream<<"DIMENSIONS newtaxa ntax="<<_sc.numberOfSeqs()<<" NCHAR="<<_sc.seqLen()+_characters.size()<<";"<<endl<<endl;
	471
	472	fileNexusStream<<"FORMAT "<<endl;
	473	fileNexusStream<<" DATATYPE = standard"<<endl;
	474	fileNexusStream<<" GAP = - "<<endl;
	475	fileNexusStream<<" MISSING = ?"<<endl;
	476	fileNexusStream<<" SYMBOLS="<<'"'<<"0123456789A#C$EFG.IJ&L%>OPQ/'TU:*X<Z"<<'"'<<endl;
	477	fileNexusStream<<" EQUATE="<<'"'<<"R={AG} Y={CT} M={AC} K={GT} S={CG} W={AT} H={ACT} B={CGT} V={ACG} D={AGT} N={ACGT} r={AG} y={CT} m={AC} k={GT} s={CG} w={AT} h={ACT} b={CGT} v={ACG} d={AGT} n={ACGT}"<<'"'<<endl;
	478	fileNexusStream<<"INTERLEAVE;"<<endl;
	479
	480
	481	//string fileGapString = indelCoderOptions::_outDir + "//" + "outGapInfoCHARSTATELABELS.txt";
	482	//ofstream gapStream(fileGapString.c_str());
	483	//gapStream<<"# each character is listing all included gaps.\n";
	484	//gapStream<<"# date for each gap: seqID(num start from 0), coord5Abs(not by MSA), length.\n";
	485
	486	fileNexusStream<<"CHARSTATELABELS"<<endl;
	487	int seqLeng = _sc.seqLen();
	488	int c=0;
	489	for(c=0; c<_characters.size();++c){
	490	fileNexusStream<<"\t"<<c+1+seqLeng<<" "<<"ind_pos_"<<_characters[c]->getCoord5()+1<<"_to_"<<_characters[c]->getCoord3()+1<<" ";
	491	fileNexusStream<<"/absent ";
	492	// gapStream<<c<<" character\t"<<_characters[c]->getCoord5()+1<<" to "<<_characters[c]->getCoord3()+1<<"\tincluding gaps:";
	493	for(int g=0; g<_characters[c]->getGapsIndices().size(); ++g){
	494	int gapNum = _characters[c]->getGapsIndices()[g];
	495	fileNexusStream<<" indel_"<<gapNum;
	496	//gapStream<<"\tgap "<<gapNum<<": "<<_gaps[gapNum-1]->getSeqID();
	497	//gapStream<<", "<<_gaps[gapNum-1]->getCoord5Abs()<<", "<<_gaps[gapNum-1]->getLength()<<";";
	498	}
	499	fileNexusStream<<endl;
	500	//gapStream<<endl;
	501	}
	502
	503	fileNexusStream<<"MATRIX"<<endl<<endl;
	504	for(int s=0; s<_sc.numberOfSeqs();++s){
	505	fileNexusStream<<""<<_sc.name(s)<<""; //prev- "Species_"
	506	for(c=0; c<_matrix[0].size(); ++c ){
	507	if(isSIC && _matrix[s][c]==2)
	508	fileNexusStream<<'?';
	509	else
	510	fileNexusStream<<_matrix[s][c];
	511	}
	512	fileNexusStream<<endl; // prev- 2 endl
	513	}
	514
	515	fileNexusStream<<";"<<endl<<endl;
	516	fileNexusStream<<"END;"<<endl<<endl<<endl;
	517	fileNexusStream<<"BEGIN ASSUMPTIONS; [below are the cost matrices of character change between the indel character state, these matrix exist only for characters that have more than two states]"
	518	<<endl<<endl<<endl<<endl;
	519
	520	if(!isSIC){
	521	for(c=0; c<_characters.size();++c){
	522	int numOfStates = _characters[c]->getNumOfStates();
	523	if(numOfStates>2){
	524	fileNexusStream<<"[char "<<c+1+seqLeng<<", indel char "<<c+1<<" "<<"("<<_characters[c]->getCoord5()+1<<"-"<<_characters[c]->getCoord3()+1<<"):";
	525	fileNexusStream<<"0 (absent)";
	526	for(int st=1; st<numOfStates; ++st ){
	527	fileNexusStream<<", "<<st<<" (indel_"<<_characters[c]->getGapsIndices()[st] <<")";
	528	}
	529	fileNexusStream<<" ]"<<endl;
	530
	531	fileNexusStream<<"usertype stepmatrix"<<c+1+seqLeng<<" (stepmatrix)="<<numOfStates<<endl<<endl;
	532	if(indelCoderOptions::_isCheckForTriangleInequality){
	533	if(_characters[c]->isTriangleInequalityCorrectionNeeded()){
	534	_characters[c]->printStepsMatrix(fileNexusStream,_characters[c]->isTriangleInequalityCorrectionNeeded());
	535	fileNexusStream<<"\n[prior to adjustment to satisfy triangle inequality:]\n";
	536	fileNexusStream<<"[";
	537	_characters[c]->printStepsMatrix(fileNexusStream);
	538	fileNexusStream<<"]\n";
	539	}
	540	else{
	541	_characters[c]->printStepsMatrix(fileNexusStream);
	542	}
	543	}
	544	else{
	545	_characters[c]->printStepsMatrix(fileNexusStream);
	546	}
	547	fileNexusStream<<";"<<endl<<endl<<endl;
	548	}
	549	}
	550	fileNexusStream<<endl<<endl<<endl;
	551
	552	fileNexusStream<<"[below is the line that says to which indel correspond which matrix]"<<endl<<endl;
	553
	554	fileNexusStream<<"typeset complexIndelCoding = ";
	555	for(c=0; c<_characters.size()-1;++c){
	556	int numOfStates = _characters[c]->getNumOfStates();
	557	if(numOfStates>2){
	558	fileNexusStream<<"stepmatrix"<<c<<" :"<<c<<",";
	559	}
	560	}
	561	fileNexusStream<<"stepmatrix"<<c<<" :"<<c<<";"<<endl<<endl;
	562	fileNexusStream<<";"<<endl<<endl;
	563	fileNexusStream<<"END;"<<endl<<endl<<endl<<endl;
	564	}
	565
	566
	567	fileNexusStream<<"BEGIN SETS;"<<endl<<endl;
	568	fileNexusStream<<"CHARSET indels = 1-"<<_characters.size()<<";"<<endl<<endl;
	569	fileNexusStream<<"END;"<<endl<<endl<<endl;
	570	fileNexusStream<<"BEGIN PAUP;"<<endl<<endl;
	571	fileNexusStream<<"assume typeset=complexIndelCoding;"<<endl<<endl;
	572	fileNexusStream<<"END;"<<endl<<endl;
	573
	574	fileNexusStream<<"[Indels:"<<endl;
	575	fileNexusStream<<"No. extension"<<endl;
	576	//for(int c=0; c<_gaps.numOfGaps();++c){
	577	// fileNexusStream<<c+1<<"\t"<<_gaps[c]->getCoord5()<<"-"<<_gaps[c]->getCoord3()<<endl;
	578	//}
	579	for(c=0; c<_characters.size();++c){
	580	fileNexusStream<<c+1<<"\t"<<_characters[c]->getCoord5()+1<<"-"<<_characters[c]->getCoord3()+1<<endl;
	581	}
	582	fileNexusStream<<"]"<<endl;
	583	}
	584
	585	void indelCoder::printIndelSummary() {
	586	bool isSIC = indelCoderOptions::_codingType == indelCoderOptions::SIC;
	587	//string fileGapString = indelCoderOptions::_outDir + "//" + "outGapInfoCHARSTATELABELS.txt";
	588	//ofstream gapStream(fileGapString.c_str());
	589	ofstream gapStream(indelCoderOptions::_indelOutputInfoFile.c_str());
	590
	591
	592	gapStream<<"# each character is listing all included gaps.\n";
	593	gapStream<<"# Character: Start position relative to MSA (first pos of gap, count from 0); End position relative to MSA (+1); length.\n";
	594	gapStream<<"# Gap: seqID(num start from 0); coord5Abs (relative to genome. not by MSA,first pos of gap, count from 0); length.\n";
	595
	596	int seqLeng = _sc.seqLen();
	597	for(int c=0; c<_characters.size();++c){
	598	gapStream<<"character number: "<<c<<endl;
	599	gapStream<<"Start position relative to MSA: "<<_characters[c]->getCoord5()<<endl;
	600	gapStream<<"End position relative to MSA: "<<_characters[c]->getCoord3()+1<<endl;
	601	gapStream<<"Length: "<<_characters[c]->getCoord3()-_characters[c]->getCoord5()+1<<endl;
	602
	603	vector<bool> isSpeciesWithGap(_sc.numberOfSeqs(),false);
	604
	605	for(int g=0; g<_characters[c]->getGapsIndices().size(); ++g){
	606	int gapNum = _characters[c]->getGapsIndices()[g];
	607	int speciesSeqID = _gaps[gapNum-1]->getSeqID();
	608	isSpeciesWithGap[speciesSeqID] = true;
	609	gapStream<<"Found in species: "<<_sc.name(speciesSeqID);
	610	gapStream<<" Start position relative to genome: "<<_gaps[gapNum-1]->getCoord5Abs();
	611	gapStream<<" Length: "<<_gaps[gapNum-1]->getLength()<<endl;
	612	//gapStream<<"\tgap "<<gapNum<<": "<<_gaps[gapNum-1]->getSeqID();
	613	//gapStream<<", "<<_gaps[gapNum-1]->getCoord5Abs()<<", "<<_gaps[gapNum-1]->getLength()<<";";
	614
	615	}
	616	gapStream<<"NOT FOUND in species: ";
	617	for(int i=0; i<_sc.numberOfSeqs(); ++i){
	618	if(!isSpeciesWithGap[i] && _matrix[i][c]!=2)
	619	gapStream<<_sc.name(i)<<",";
	620	}
	621	gapStream<<"\n";
	622	gapStream<<"ENDCHARACTER"<<endl<<endl;
	623	}
	624	}
	625

+75

-0

programs/indelCoder/indelCoder.h less more

	0	#ifndef ___INDELCODER_
	1	#define ___INDELCODER_
	2
	3	#include "gaps.h"
	4	#include "character.h"
	5	#include "indelCoderOptions.h"
	6
	7	#include "amino.h"
	8	#include "sequenceContainer.h"
	9	#include "recognizeFormat.h"
	10
	11	#include "logFile.h"
	12	#include "talRandom.h"
	13	#include <ctime>
	14	#include <vector>
	15
	16	using namespace std;
	17	// The implementation of IndelCoding scheme MCIC (Muller)
	18	// for "Large-scale parsimony analysis of metazoan indels in protein-coding genes"
	19	// (Parsimony tree reconstruction using indel information - supporting the Ecdysozoa hypothesis - sponges evolutionary close to animals)
	20	// coded as gaps only those gaps in the alignments that were shorter than 50 amino-acids and those which did not start at the N-terminus or end at the C-terminus of the alignment.
	21	//
	22	//
	23	// Simple Indel Coding – SIC (Simmons and Ochoterena 2000)
	24	// each indel receives a separate 2-state character of presence/absence.
	25	// Any overlapping indels that exceed the boundaries of this indel are scored as missing data for that indel character.
	26	//
	27	// Modified Complex Indel Coding – MCIC (Muller 2006).
	28	// MCIC differs from SIC only in the treatment of overlapping indels.
	29	// uses multistate characters to code overlapping indels and assigns a distinct symmetrical step matrix to those gaps.
	30
	31	// Note:
	32	// (*) implemented for Amino Acids seq. (Later, we can add parameter with Alphabet type)
	33
	34
	35	class indelCoder {
	36
	37	public:
	38	explicit indelCoder(){};
	39	virtual ~indelCoder(){};
	40
	41	void startSequenceContainer();
	42	void readSequenceIntoGaps();
	43	//void delimitationOfCharacters();
	44	void delimitationOfCharacters(indelCoderOptions::codingType type);
	45	void delimitationOfCharactersSIC();
	46	//void delimitationOfCharactersMCIC2();
	47
	48	void determinationCharacterState();
	49	void determinationCharacterStateSIC();
	50
	51	void determinationStepsMatrix();
	52	void printCharacters();
	53	void printNexus();
	54	void printFasta();
	55	void printGapsInfo();
	56	void printIndelSummary();
	57	void run();
	58
	59	private:
	60	sequenceContainer _sc;
	61	vector< vector<int> > _matrix;
	62	gaps _gaps;
	63	gaps _unknowns;
	64	vector<gaps> _gapsVperSc;
	65	vector<character*> _characters;
	66	};
	67
	68
	69
	70
	71
	72
	73	#endif
	74

+169

-0

programs/indelCoder/indelCoder.vcproj less more

	0	<?xml version="1.0" encoding="windows-1255"?>
	1	<VisualStudioProject
	2	ProjectType="Visual C++"
	3	Version="7.10"
	4	Name="indelCoder"
	5	ProjectGUID="{EDA6E266-80F6-40B9-AABC-4A6CDBAA0245}"
	6	Keyword="Win32Proj">
	7	<Platforms>
	8	<Platform
	9	Name="Win32"/>
	10	</Platforms>
	11	<Configurations>
	12	<Configuration
	13	Name="Debug\|Win32"
	14	OutputDirectory="Debug"
	15	IntermediateDirectory="Debug"
	16	ConfigurationType="1"
	17	CharacterSet="2">
	18	<Tool
	19	Name="VCCLCompilerTool"
	20	Optimization="0"
	21	AdditionalIncludeDirectories="..\..\libs\phylogeny"
	22	PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;NDEBUG;"
	23	MinimalRebuild="TRUE"
	24	BasicRuntimeChecks="3"
	25	RuntimeLibrary="5"
	26	RuntimeTypeInfo="TRUE"
	27	UsePrecompiledHeader="0"
	28	WarningLevel="3"
	29	Detect64BitPortabilityProblems="TRUE"
	30	DebugInformationFormat="4"/>
	31	<Tool
	32	Name="VCCustomBuildTool"/>
	33	<Tool
	34	Name="VCLinkerTool"
	35	OutputFile="$(OutDir)/indelCoder.exe"
	36	LinkIncremental="2"
	37	GenerateDebugInformation="TRUE"
	38	ProgramDatabaseFile="$(OutDir)/indelCoder.pdb"
	39	SubSystem="1"
	40	TargetMachine="1"/>
	41	<Tool
	42	Name="VCMIDLTool"/>
	43	<Tool
	44	Name="VCPostBuildEventTool"/>
	45	<Tool
	46	Name="VCPreBuildEventTool"/>
	47	<Tool
	48	Name="VCPreLinkEventTool"/>
	49	<Tool
	50	Name="VCResourceCompilerTool"/>
	51	<Tool
	52	Name="VCWebServiceProxyGeneratorTool"/>
	53	<Tool
	54	Name="VCXMLDataGeneratorTool"/>
	55	<Tool
	56	Name="VCWebDeploymentTool"/>
	57	<Tool
	58	Name="VCManagedWrapperGeneratorTool"/>
	59	<Tool
	60	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	61	</Configuration>
	62	<Configuration
	63	Name="Release\|Win32"
	64	OutputDirectory="Release"
	65	IntermediateDirectory="Release"
	66	ConfigurationType="1"
	67	CharacterSet="2">
	68	<Tool
	69	Name="VCCLCompilerTool"
	70	AdditionalIncludeDirectories="..\..\libs\phylogeny"
	71	PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
	72	RuntimeLibrary="4"
	73	RuntimeTypeInfo="TRUE"
	74	UsePrecompiledHeader="0"
	75	WarningLevel="3"
	76	Detect64BitPortabilityProblems="TRUE"
	77	DebugInformationFormat="3"/>
	78	<Tool
	79	Name="VCCustomBuildTool"/>
	80	<Tool
	81	Name="VCLinkerTool"
	82	OutputFile="$(OutDir)/indelCoder.exe"
	83	LinkIncremental="1"
	84	GenerateDebugInformation="TRUE"
	85	SubSystem="1"
	86	OptimizeReferences="2"
	87	EnableCOMDATFolding="2"
	88	TargetMachine="1"/>
	89	<Tool
	90	Name="VCMIDLTool"/>
	91	<Tool
	92	Name="VCPostBuildEventTool"/>
	93	<Tool
	94	Name="VCPreBuildEventTool"/>
	95	<Tool
	96	Name="VCPreLinkEventTool"/>
	97	<Tool
	98	Name="VCResourceCompilerTool"/>
	99	<Tool
	100	Name="VCWebServiceProxyGeneratorTool"/>
	101	<Tool
	102	Name="VCXMLDataGeneratorTool"/>
	103	<Tool
	104	Name="VCWebDeploymentTool"/>
	105	<Tool
	106	Name="VCManagedWrapperGeneratorTool"/>
	107	<Tool
	108	Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
	109	</Configuration>
	110	</Configurations>
	111	<References>
	112	</References>
	113	<Files>
	114	<Filter
	115	Name="Source Files"
	116	Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
	117	UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
	118	<File
	119	RelativePath=".\character.cpp">
	120	</File>
	121	<File
	122	RelativePath=".\gaps.cpp">
	123	</File>
	124	<File
	125	RelativePath=".\indelCoder.cpp">
	126	</File>
	127	<File
	128	RelativePath=".\indelCoderOptions.cpp">
	129	</File>
	130	<File
	131	RelativePath=".\indelCoderProject.cpp">
	132	</File>
	133	<File
	134	RelativePath=".\indelCoderUtils.cpp">
	135	</File>
	136	</Filter>
	137	<Filter
	138	Name="Header Files"
	139	Filter="h;hpp;hxx;hm;inl;inc;xsd"
	140	UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
	141	<File
	142	RelativePath=".\character.h">
	143	</File>
	144	<File
	145	RelativePath=".\gaps.h">
	146	</File>
	147	<File
	148	RelativePath=".\indelCoder.h">
	149	</File>
	150	<File
	151	RelativePath=".\indelCoderOptions.h">
	152	</File>
	153	<File
	154	RelativePath=".\indelCoderUtils.h">
	155	</File>
	156	</Filter>
	157	<Filter
	158	Name="Resource Files"
	159	Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
	160	UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
	161	</Filter>
	162	<File
	163	RelativePath=".\test\Debug.params">
	164	</File>
	165	</Files>
	166	<Globals>
	167	</Globals>
	168	</VisualStudioProject>

+169

-0

programs/indelCoder/indelCoderOptions.cpp less more

	0	/********************************************************************************************
	1	indelCoderOptions - a class that contains all the parameters for the indelCoderProjest as static
	2	use the 'Parameters' class to read info from txt file.
	3	initDefault. (+Parameters::addParameter)
	4	getParamsFromFile. ->with alterations of defults for consistancy
	5	verifyConsistParams.
	6	*********************************************************************************************/
	7	#include "indelCoderOptions.h"
	8	#include "errorMsg.h"
	9	#include "someUtil.h"
	10	#include "Parameters.h"
	11	#include <iostream>
	12	#include <cmath>
	13
	14	using namespace std;
	15
	16	// recognize all the static members defined at .h
	17	string indelCoderOptions::_seqFile;
	18	string indelCoderOptions::_logFile;
	19	int indelCoderOptions::_logValue;
	20	//string indelCoderOptions::_outDir;
	21	string indelCoderOptions::_indelOutputInfoFile;
	22	string indelCoderOptions::_indelOutputFastaFile;
	23	string indelCoderOptions::_nexusFileName;
	24
	25	indelCoderOptions::codingType indelCoderOptions::_codingType;
	26	//bool indelCoderOptions::_isMCIC2;
	27
	28	bool indelCoderOptions::_isCheckForTriangleInequality;
	29	bool indelCoderOptions::_isOmitLeadingAndEndingGaps;
	30
	31
	32
	33
	34	/********************************************************************************************
	35	*********************************************************************************************/
	36	void indelCoderOptions::initOptions(const string& paramFileName)
	37	{
	38	//getOutDirFromFile(paramFileName); // first set _outDir to be used next
	39	//createDir("", indelCoderOptions::_outDir);
	40	ifstream params(paramFileName.c_str());
	41	if(params.good())
	42	Parameters::readParameters(params);
	43	params.close();
	44	initDefault();
	45	getParamsFromFile(paramFileName);
	46	//verifyConsistParams();
	47	}
	48
	49
	50
	51	/********************************************************************************************
	52	*********************************************************************************************/
	53	//void indelCoderOptions::getOutDirFromFile(const string& paramFileName)
	54	//{
	55	// _outDir = "INDEL_CODER_RES";
	56	// Parameters::addParameter("_outDir", _outDir);
	57	//
	58	// _outDir = Parameters::getString("_outDir");
	59	//}
	60
	61	/********************************************************************************************
	62	initDefault
	63	*********************************************************************************************/
	64	void indelCoderOptions::initDefault()
	65	{
	66	// all the default values are stored in the gainLossOptions:: static members
	67	//################### Basic parameters:
	68	// input (general)
	69	_seqFile = ""; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions)
	70	_indelOutputInfoFile= "";
	71	_indelOutputFastaFile="";
	72	_nexusFileName="";
	73	// output
	74	//_outDir = "RESULTS"; // concatenated after current dir location 'pwd'
	75	_logFile = "log.txt"; // print-outs of the running progress including the estimated parameters optimization
	76	_logValue = 4; // verbosity level - ~4 - normal, >7 - load of info
	77	//_isMCIC2 = true;
	78	_codingType =SIC;
	79	_isCheckForTriangleInequality = false;
	80	_isOmitLeadingAndEndingGaps = true; // The typical approach is to omit (SeqState)
	81
	82	Parameters::addParameter("_seqFile", _seqFile);
	83	Parameters::addParameter("_logFile", _logFile);
	84	Parameters::addParameter("_indelOutputInfoFile", _indelOutputInfoFile);
	85	Parameters::addParameter("_indelOutputFastaFile", _indelOutputFastaFile);
	86	Parameters::addParameter("_nexusFileName", _nexusFileName);
	87
	88	Parameters::addParameter("_logValue", _logValue);
	89	Parameters::addParameter("_codingType", getCodingType(_codingType));
	90	//Parameters::addParameter("_isMCIC2", (_isMCIC2 == true) ? 1 : 0);
	91	Parameters::addParameter("_isCheckForTriangleInequality", (_isCheckForTriangleInequality == true) ? 1 : 0);
	92	Parameters::addParameter("_isOmitLeadingAndEndingGaps", (_isOmitLeadingAndEndingGaps == true) ? 1 : 0);
	93
	94	}
	95
	96
	97	/********************************************************************************************
	98	getParamsFromFile
	99	*********************************************************************************************/
	100	void indelCoderOptions::readParameters(const string& paramFileName)
	101	{
	102	ifstream params(paramFileName.c_str());
	103	if(params.good())
	104	Parameters::readParameters(params); // only place where params are read, updateParameter(paramName, param.c_str()) used
	105	params.close();
	106	}
	107	/********************************************************************************************
	108	getParamsFromFile
	109	*********************************************************************************************/
	110	void indelCoderOptions::getParamsFromFile(const string& paramFileName)
	111	{
	112	readParameters(paramFileName);
	113	_logFile = Parameters::getString("_logFile");
	114	_seqFile = Parameters::getString("_seqFile");
	115	_indelOutputFastaFile = Parameters::getString("_indelOutputFastaFile");
	116	_nexusFileName = Parameters::getString("_nexusFileName");
	117	_indelOutputInfoFile = Parameters::getString("_indelOutputInfoFile");
	118	if(_seqFile=="") errorMsg::reportError("_seqFile is needed");
	119	if(_indelOutputFastaFile=="") errorMsg::reportError("_indelOutputFastaFile is needed");
	120	if(_nexusFileName=="") errorMsg::reportError("_nexusFileName is needed");
	121
	122	if(_indelOutputInfoFile=="") errorMsg::reportError("_indelOutputInfoFile is needed");
	123	//_isMCIC2 = (Parameters::getInt("_isMCIC2") == 1) ? true : false;
	124	_codingType = getCodingType(Parameters::getString("_codingType"));
	125
	126	_isCheckForTriangleInequality = (Parameters::getInt("_isCheckForTriangleInequality") == 1) ? true : false;
	127	_isOmitLeadingAndEndingGaps = (Parameters::getInt("_isOmitLeadingAndEndingGaps") == 1) ? true : false;
	128	_logValue = Parameters::getInt("_logValue");
	129	}
	130
	131	/********************************************************************************************
	132	enum distributionType {SIC, MCIC, MCIC2};
	133	*********************************************************************************************/
	134	string indelCoderOptions::getCodingType(codingType type)
	135	{
	136	string res = "";
	137	switch (type)
	138	{
	139	case SIC:
	140	res = "SIC";
	141	break;
	142	case MCIC:
	143	res = "MCIC";
	144	break;
	145	case MCIC2:
	146	res = "MCIC2";
	147	break;
	148	default:
	149	errorMsg::reportError("unknown type in codingType - {SIC, MCIC, MCIC2}");
	150	}
	151	return res;
	152	}
	153	//////////////////////////////////////////////////////////////////////////
	154	indelCoderOptions::codingType indelCoderOptions::getCodingType(const string& str)
	155	{
	156	if (str == "SIC")
	157	return SIC;
	158	if (str == "MCIC")
	159	return MCIC;
	160	if (str == "MCIC2")
	161	return MCIC2;
	162	else
	163	errorMsg::reportError("unknown type in codingType - {SIC, MCIC, MCIC2}");
	164	return SIC;
	165	}
	166
	167
	168

+49

-0

programs/indelCoder/indelCoderOptions.h less more

	0	#ifndef __indelCoderOptionsParams_OPTION
	1	#define __indelCoderOptionsParams_OPTION
	2
	3	#include "definitions.h"
	4	#include <string>
	5	#include <fstream>
	6
	7	using namespace std;
	8
	9
	10	class indelCoderOptions{
	11	public:
	12	enum codingType {SIC, MCIC, MCIC2};
	13
	14	public:
	15	virtual ~indelCoderOptions();
	16
	17	static void initOptions(const string& paramFileName);
	18	static void initDefault();
	19	static void readParameters(const string& paramFileName);
	20	static void getParamsFromFile(const string& paramFileName);
	21	static void getOutDirFromFile(const string& paramFileName);
	22	static void verifyConsistParams();
	23
	24	static string getCodingType(codingType type);
	25	static codingType getCodingType(const string& str);
	26
	27
	28	public:
	29	//################### Basic parameters:
	30	// input (general)
	31	static string _seqFile; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions)
	32	static string _indelOutputInfoFile; // a file in which all the indel information is given (not just the 0/1 codes)
	33	static string _indelOutputFastaFile; // a file in which ajust the 0/1 coding is given
	34	static string _nexusFileName; // a file in which the 0/1 coding is given in nexus format
	35	//static string _outDir; // _outDir = "RESULTS", concatenated after current dir location 'pwd'
	36	static string _logFile; // print-outs of the running progress including the estimated parameters optimization
	37	static int _logValue; // verbosity level - ~4 - normal, >7 - load of info
	38
	39	//static bool _isMCIC2;
	40	static codingType _codingType; // SIC, MCIC, MCIC2
	41
	42	static bool _isCheckForTriangleInequality;
	43	static bool _isOmitLeadingAndEndingGaps; // ignore gaps that either start at 5' or end at 3'
	44
	45	private:
	46
	47	};
	48	#endif

+32

-0

programs/indelCoder/indelCoderProject.cpp less more

	0	#include "indelCoder.h"
	1	#include "indelCoderOptions.h"
	2	#include "indelCoderUtils.h"
	3	#include "Parameters.h"
	4
	5
	6
	7	using namespace std;
	8
	9	int main(int argc, char **argv){
	10
	11	//printICProgramInfo();
	12	//time_t t1,t2;
	13	//time(&t1);
	14	if (argc == 1) {printICHelp();// here the -h option will be printed
	15	return 0;
	16	}
	17	string paramStr = argv[1];
	18	indelCoderOptions::initOptions(paramStr);
	19
	20	myLog::setLog(indelCoderOptions::_logFile, indelCoderOptions::_logValue);
	21
	22	//Parameters::dump(cout);
	23
	24	indelCoder gl;
	25	gl.run();
	26
	27	//time(&t2);
	28	//LOGnOUT(4,<<endl<<"TOTAL RUNNING TIME = "<<(t2-t1)/60.0<<" minutes"<<endl);
	29	return 0;
	30	}
	31

+33

-0

programs/indelCoder/indelCoderUtils.cpp less more

	0
	1	#include "indelCoderUtils.h"
	2	#include "indelCoder.h"
	3
	4
	5
	6
	7	void printICHelp(){
	8	cout <<"+-------------------------------------------+"<<endl;
	9	cout <<"*** The indelCoder project. "<<endl;
	10	cout <<"use a parameter file with these options: "<<endl;
	11	cout <<"+-------------------------------------------+"<<endl;
	12	cout <<"_seqFile "<<endl;
	13	cout <<"\|------------------------------------------\|"<<endl;
	14	cout <<"_logFile "<<endl;
	15	cout <<"_logValue "<<endl;
	16	//cout <<"_outDir "<<endl;
	17	cout <<"...(a partial list) "<<endl;
	18	cout <<"+------------------------------------------+"<<endl;
	19	}
	20
	21	void printICProgramInfo(){
	22	LOGnOUT(3,<<"+=================================================================+"<<endl);
	23	LOGnOUT(3,<<"+ The indelCoder project: "<<endl);
	24	LOGnOUT(3,<<"+ Transforming a multiple sequence alignment (MSA) "<<endl);
	25	LOGnOUT(3,<<"+ of amino acids into 0/1 characters "<<endl);
	26	LOGnOUT(3,<<"+ Implementation of Indel Coding scheme SIC (Simmons et al. 2002) "<<endl);
	27	LOGnOUT(3,<<"+ "<<PROG_INFO<<" "<<endl);
	28	LOGnOUT(3,<<"+ Ofir Cohen - ofircohe@tau.ac.il "<<endl);
	29	LOGnOUT(3,<<"+ Tal Pupko - talp@post.tau.ac.il "<<endl);
	30	LOGnOUT(3,<<"+ Dorothee Huchon - huchondp@post.tau.ac.il "<<endl);
	31	LOGnOUT(3,<<"+=================================================================+"<<endl);
	32	}

+15

-0

programs/indelCoder/indelCoderUtils.h less more

	0	#ifndef ___INDELCODER_UTILS__
	1	#define ___INDELCODER_UTILS__
	2
	3
	4	#include "logFile.h"
	5
	6	const string PROG_INFO = static_cast<string>("Version: 1.72 last updated: 03/12/2012");
	7
	8
	9	void printICHelp();
	10	void printICProgramInfo();
	11
	12
	13
	14	#endif

+675

-0

www/bioSequence_scripts_and_constants/BIOSEQUENCE_FUNCTIONS.pm less more

	0	#!/usr/bin/perl
	1
	2	package BIOSEQUENCE_FUNCTIONS; #don't forget: a package must end with a return value (1; in the end)!!!!!
	3
	4	use strict;
	5	use GENERAL_CONSTANTS;
	6
	7	#------------------------------------------------------------------------------------
	8	sub subtract_time_from_now{
	9	# receieves the begin time in format of: HH:MN:SS DD-MO-YEAR
	10	# returns the the time (in hours) passed from the time of calculation to the begin time.
	11	# if an error was found during calculation: returns "no"
	12	# error will be found in case the time that passed is more than 1 month different.
	13
	14	my $begin_time = shift;
	15	$begin_time .= " ".shift;
	16	my %date1;
	17	my %date2;
	18	my $date1_ref;
	19	my $date2_ref;
	20	my @time_difference;
	21	my $dir_counter = 0;
	22
	23	$begin_time =~ m/(\d+):(\d+):(\d+) (\d+)-(\d+)-(\d+)/;
	24	%date1 = (Year => $6, Month => $5, Day => $4, Hour => $1, Minute => $2, Second => $3);
	25	%date2 = (Year => "", Month => "", Day => "", Hour => "", Minute => "", Second => "");
	26	&convert_currentTime(\%date2);
	27
	28	@time_difference = &compare_time(\%date1, \%date2);
	29	#if ($time_difference[0] eq "no") {
	30	# return "no";
	31	#}
	32	if ($time_difference[0] =~ m/error/) {
	33	return $time_difference[0];
	34	}
	35	else{
	36	return $time_difference[1];
	37	}
	38	}
	39	#------------------------------------------------------------------------------------
	40	# the routine converts the "Begin/End" time line from Selecton's log files to a numeric string.
	41	# it insertes the new values to the hash' reference .
	42	sub convertTime
	43	{
	44	my $inputTimeString = $_[0];
	45	my $answer = $_[1]; #reference to hash
	46	my %months =
	47	( Jan => "01", Feb => "02", Mar => "03", Apr => "04", May => "05", Jun => "06",
	48	Jul => "07",Aug => "08", Sep => "09", Oct => "10", Nov => "11", Dec => "12");
	49
	50	if ($inputTimeString =~ m/(\d+):(\d+):(\d+),\s+\w+\s(\w+)\s(\d+),\s(\d+)/)
	51	{
	52	my $HH = &convertNum($1);
	53	my $MN = &convertNum($2);
	54	my $SS = &convertNum($3);
	55	my $MM = $months{$4};
	56	my $DD = &convertNum($5);
	57	my $YYYY = $6;
	58
	59	$answer->{Year} = $YYYY;
	60	$answer->{Month} = $MM;
	61	$answer->{Day} = $DD;
	62	$answer->{Hour} = $HH;
	63	$answer->{Minute} = $MN;
	64	$answer->{Second} = $SS;
	65	}
	66	}#convertTime
	67	#__________________________________________________________
	68	# converts a number from one digit to 2 digits
	69	sub convertNum
	70	{
	71	my $input_num = shift;
	72	if ($input_num < 10)
	73	{return "0".$input_num;}
	74	else
	75	{return $input_num;}
	76	}
	77
	78	#__________________________________________________________
	79	# calculates the time differences by comparing seperately months, days, minutes and seconds.
	80	# this functions assumes that the year is the same year.
	81	# input: references to 2 hashs with time's details
	82	# output: string with time difference, messured by hours:minutes:seconds
	83
	84	sub compare_time()
	85	{
	86	my $time1 = $_[0]; #refernce to the time array
	87	my $time2 = $_[1]; #refernce to the time array
	88	my $time_difference;
	89	my $no_of_Days_passed;
	90	my $no_of_hours_passed;
	91	my %days_each_month = ('01' => '31', '02' => '28', '03' => '31', '04' => '30', '05' => '31', '06' => '30',
	92	'07' => '31', '08' => '31', '09' => '30', '10' => '31', '11' => '30', '12' => '31');
	93
	94	if ($time1->{Month} eq $time2->{Month}) {#same month
	95	if ($time1->{Day} eq $time2->{Day}) {#same day
	96	if ($time2->{Hour} >= $time1->{Hour}) {#compare hour: h2>h1
	97	$time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, 0);
	98	}
	99	else{
	100	#return("no");
	101	return("error: H1 is: $time1->{Hour} H2 is: $time2->{Hour} it is the same day, therefor it is impossible that H1>H2. \n");
	102	}
	103	}
	104	else {# different day
	105	if ($time2->{Day} >= $time1->{Day}){
	106	$no_of_Days_passed = ($time2->{Day}-$time1->{Day});
	107	$time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, $no_of_Days_passed);
	108	}
	109	else{
	110	#return("no");
	111	return("error: D1 is: $time1->{Day} D2 is: $time2->{Day}, it is impossible in the same month that D1>D2.\n");
	112	}
	113	}
	114	}
	115	else {#different month
	116	#if ($time2->{Month} >= $time1->{Month}){
	117	if (($time2->{Month} - $time1->{Month})>1 or ($time2->{Month} - $time1->{Month})<0){
	118	#return("no");
	119	return("error: M1 is: $time1->{Month}, M2 is: $time2->{Month}. The program doesn't allow a difference bigger than 1 month.\n");
	120	}
	121	else {# 1 month difference
	122	$no_of_Days_passed = ($time2->{Day} + $days_each_month{$time1->{Month}} - $time1->{Day}); $time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, $no_of_Days_passed);
	123	}
	124	#}
	125	#else{
	126	#return("no");#, "error: M1 is: $time1->{Month}, M2 is: $time2->{Month}. It is impossible for M1 to be bigger within the same year\n");
	127	#}
	128	}
	129	return ("yes", $time_difference);
	130	} # finish: compare_time()
	131
	132	#__________________________________________________________
	133	# does the part of calculating minutes and seconds difference.
	134	# input: hours difference (just for formating the string output) M1, M2, D1, D2
	135	# output: string output, sent to the compare_time() function for display
	136	sub calculate_time_difference()
	137	{
	138	my $hour1 = $_[0];
	139	my $hour2= $_[1];
	140	my $minute1 = $_[2];
	141	my $minute2 = $_[3];
	142	my $second1 = $_[4];
	143	my $second2 = $_[5];
	144	my $days_passed = $_[6];
	145	my $minutes_passed;
	146	my $seconds_passed;
	147	my $hours_passed;
	148	my $reduce_minute = "no";
	149	my $reduce_hour = "no";
	150	my $reduce_day = "no";
	151
	152	# seconds
	153	if ($second2>=$second1)
	154	{$seconds_passed = $second2-$second1;}
	155	else
	156	{$seconds_passed = 60+$second2-$second1;
	157	$reduce_minute = "yes";}
	158	#minutes
	159	if ($minute2>=$minute1)
	160	{$minutes_passed = $minute2-$minute1;}
	161	else
	162	{$minutes_passed = 60+$minute2-$minute1;
	163	$reduce_hour = "yes";}
	164	if ($reduce_minute eq "yes")
	165	{
	166	if ($minutes_passed == 0)
	167	{$minutes_passed = 59;}
	168	else
	169	{$minutes_passed -=1;}
	170	}
	171	#hours
	172	if ($hour2>=$hour1)
	173	{$hours_passed = $hour2-$hour1;}
	174	else
	175	{$hours_passed = 24+$hour2-$hour1;
	176	$reduce_day = "yes";}
	177	if ($reduce_hour eq "yes")
	178	{
	179	if($hours_passed == 0)
	180	{$hours_passed = 23;}
	181	else
	182	{$hours_passed -=1;}
	183	}
	184	#days
	185	if ($days_passed > 0)
	186	{
	187	if($reduce_day eq "yes")
	188	{$days_passed-=1;}
	189	$hours_passed += 24*$days_passed;
	190	}
	191	$hours_passed = &convertNum($hours_passed);
	192	$minutes_passed = &convertNum($minutes_passed);
	193	$seconds_passed = &convertNum($seconds_passed);
	194	return "$hours_passed:$minutes_passed:$seconds_passed";
	195	}
	196	#------------------------------------------------------------------------------------
	197	sub convert_currentTime {
	198	my $answer = shift; #reference to hash
	199	my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime();
	200	my $year = 1900 + $yearOffset;
	201	$second = &convertNum($second);
	202	$minute = &convertNum($minute);
	203	$hour = &convertNum($hour);
	204	$month = &convertNum($month+1);
	205	$dayOfMonth = &convertNum($dayOfMonth);
	206
	207	$answer->{Year} = $year;
	208	$answer->{Month} = $month;
	209	$answer->{Day} = $dayOfMonth;
	210	$answer->{Hour} = $hour;
	211	$answer->{Minute} = $minute;
	212	$answer->{Second} = $second;
	213
	214	#print "Current time is: ".$answer->{Hour}.":".$answer->{Minute}.":".$answer->{Second}." ".$answer->{Day}."-".$answer->{Month}."-".$answer->{Year}."\n";
	215
	216	}
	217	#---------------------------------------------
	218	sub check_if_user_is_allowed{
	219
	220	my $server_name = shift;
	221	my $user_ip = shift;
	222	my $user_email = shift;
	223
	224	my $file_to_open;
	225
	226	my %ip_total = ();
	227	my ($ip, $_mail, $redirect_html);
	228
	229	if ($server_name eq "consurf"){
	230	$redirect_html = GENERAL_CONSTANTS::CONSURF_REDIRECT_PAGE;
	231	$file_to_open = GENERAL_CONSTANTS::CONSURF_RUNNING_JOBS;
	232	}
	233	elsif ($server_name eq "selecton"){
	234	$redirect_html = GENERAL_CONSTANTS::SELECTON_REDIRECT_PAGE;
	235	$file_to_open = GENERAL_CONSTANTS::SELECTON_RUNNING_JOBS;
	236	}
	237	elsif ($server_name eq "conseq"){
	238	$redirect_html = GENERAL_CONSTANTS::CONSEQ_REDIRECT_PAGE;
	239	$file_to_open = GENERAL_CONSTANTS::CONSEQ_RUNNING_JOBS;
	240	}
	241	elsif ($server_name eq "pepitope"){
	242	$redirect_html = GENERAL_CONSTANTS::PEPITOPE_REDIRECT_PAGE;
	243	$file_to_open = GENERAL_CONSTANTS::PEPITOPE_RUNNING_JOBS;
	244	}
	245	if (-e $file_to_open and !(-z $file_to_open)){
	246	open RUN_LIST, $file_to_open;
	247	flock RUN_LIST, 2;
	248	while (<RUN_LIST>){
	249	chomp;
	250	if(/^(null_)?\d+ (.+) (.+)$/){
	251	$ip = $2;
	252	$_mail = $3;
	253	if (exists $ip_total{$ip}){
	254	$ip_total{$ip}++;}
	255	else{
	256	$ip_total{$ip} = 1;}
	257	if (exists $ip_total{$_mail}){
	258	$ip_total{$_mail}++;}
	259	else{
	260	$ip_total{$_mail} = 1;}
	261	}
	262	#redirects unwanted visitors to the site
	263	if ($ip =~ /66\.232\.100\.62/ or $ip =~ /83\.97\.\177\.107/ or $ip =~ /91\.74\.160\.18/){
	264	#print "Location: http://www.tau.ac.il/lifesci/\n\n";
	265	exit;
	266	}
	267	}
	268	close RUN_LIST;
	269	if ((exists $ip_total{$user_ip} && $ip_total{$user_ip} >=7) or (exists $ip_total{$user_email} && $ip_total{$user_email} >= 7)){
	270	# output a message to the user that he cannot continue the run
	271	print "Location: $redirect_html\n\n";
	272	exit;
	273	}
	274	}
	275	}
	276	#---------------------------------------------
	277	# the values for this statistics were determined in a statistical test we did on November 2007,
	278	# on Selecton seccsful runs for 3 months on the bioinfo machine
	279	#sub selecton_estimated_run_time1{
	280	# my $seq_times_length = shift;
	281	# my $model = shift;
	282	#
	283	# my ($time_in_minutes, $time_in_hours, $time_in_days);
	284	# # set the time according to each model's parameters
	285	# $time_in_minutes = $seq_times_length*0.0251 + 20.345 if ($model eq "M8");
	286	# $time_in_minutes = $seq_times_length*0.0256 + 17.391 if ($model eq "MEC");
	287	# # to be on the safe side - we add 20% for the time
	288	# $time_in_minutes = int($time_in_minutes*1.2);
	289	# # calculate time in DD:HH:MM:SS format
	290	# $time_in_minutes = int($time_in_minutes); # remove numbers after the "."
	291	#
	292	# return(&time_in_days_from_minutes($time_in_minutes));
	293	#}
	294	#---------------------------------------------
	295	# the values for this statistics were determined in a statistical test we did on October 2009, on Selecton seccsful runs for a few month on biocluster.
	296	# the file can be found at: /bioseq/Selecton/total_models_statistics.csv
	297	sub selecton_estimated_run_time{
	298	my $seq_length = shift;
	299	my $num_of_seq = shift;
	300	my $model = shift;
	301
	302	my ($time_in_minutes, $time_in_hours, $time_in_days);
	303	# set the time according to each model's parameters
	304	if ($model eq "MEC"){
	305	$time_in_minutes = $seq_length$num_of_seq0.0035 + 12.677 ;
	306	}
	307	elsif ($model eq "M8"){
	308	if($num_of_seq<11){
	309	$time_in_minutes = $seq_length$num_of_seq0.022 + 3.5198;
	310	}
	311	elsif($num_of_seq>10 and $num_of_seq<21){
	312	$time_in_minutes = $seq_length$num_of_seq0.0025 + 14.82;
	313	}
	314	elsif($num_of_seq>20 and $num_of_seq<31){
	315	$time_in_minutes = $seq_length$num_of_seq0.0021 + 35.153;
	316	}
	317	elsif($num_of_seq>30 and $num_of_seq<41){
	318	$time_in_minutes = $seq_length$num_of_seq0.0026 + 48.412;
	319	}
	320	elsif($num_of_seq>40 and $num_of_seq<51){
	321	$time_in_minutes = $seq_length$num_of_seq0.0024 + 65.947;
	322	}
	323	else{
	324	$time_in_minutes = $seq_length$num_of_seq0.003 + 91.341;
	325	}
	326	}
	327
	328	# to be on the safe side - we triple the time
	329	$time_in_minutes = int($time_in_minutes*3);
	330	# calculate time in DD:HH:MM:SS format
	331	$time_in_minutes = int($time_in_minutes); # remove numbers after the "."
	332
	333	return(&time_in_days_from_minutes($time_in_minutes));
	334	}
	335	#---------------------------------------------
	336	# input: int represents sum of minutes
	337	# output: time in format: HH:MM:SS (maybe change in the future to time in format: DD:HH:MM:SS)
	338	sub time_in_days_from_minutes{
	339	my $minutes = shift;
	340	my $hours = 0;
	341	my $days = 0;
	342	my $ret = "";
	343
	344	if($minutes <=59){
	345	$ret = $minutes.":00";
	346	}
	347	elsif ($minutes >59){
	348	$hours = int($minutes/60);
	349	$minutes = $minutes%60;
	350	$minutes = new_num($minutes);
	351	# ---- if the format needed inculdes only hours
	352	$hours = new_num($hours);
	353	$ret = $hours.":".$minutes.":00";
	354	## --- if the format needed inculdes days in seperate
	355	#if($hours <= 23){
	356	# $hours = new_num($hours);
	357	# $ret = $hours.":".$minutes.":00";
	358	#}
	359	#else{
	360	# $days = int($hours/24);
	361	# $hours = $hours%24;
	362	# $hours = new_num($hours);
	363	# $days = new_num($days);
	364	# $ret = $days.":".$hours.":".$minutes.":00";
	365	#}
	366	}
	367	return $ret;
	368	}
	369	#---------------------------------------------
	370	# gives the number in minimum 2 digits
	371	sub new_num{
	372	my $num = shift;
	373	($num < 10) ? return "0".$num : return $num;
	374	}
	375	#---------------------------------------------
	376	# returns the time in format hh:mm:ss dd:mn:yyy
	377	sub printTime {
	378	my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime();
	379	my $year = 1900 + $yearOffset;
	380
	381	$second = &new_num($second);
	382	$minute = &new_num($minute);
	383	$hour = &new_num($hour);
	384	$month = &new_num($month+1);
	385	$dayOfMonth = &new_num($dayOfMonth);
	386
	387	return "$hour:$minute:$second $dayOfMonth-".$month."-$year";
	388	}
	389	#---------------------------------------------
	390	sub printYear {
	391	my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime();
	392	my $year = 1900 + $yearOffset;
	393	return $year;
	394	}
	395	#---------------------------------------------
	396	sub printMonth {
	397	my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
	398	# localtime returns array. in its 5th cell (4 when coutin from 0) is the number denotin the current month minus 1
	399	# example: in December, $time[4] = 11. So in the above @months array, $months[11] = Dec.
	400	my @time = localtime();
	401	return $months[$time[4]];
	402	}
	403	#---------------------------------------------
	404	# input: the server name and run_name
	405	# the routine will remove this run_name from the list of running jobs
	406	# please note: the var $server should be spelled: "Selecton", "ConSurf"
	407	sub remove_job_from_running_log{
	408
	409	my $server = shift;
	410	my $run_name = shift;
	411	my $log;
	412
	413	if($server eq "Selecton") {
	414	$log = GENERAL_CONSTANTS::SELECTON_RUNNING_JOBS;}
	415	elsif($server eq "ConSurf"){
	416	$log = GENERAL_CONSTANTS::CONSURF_RUNNING_JOBS;}
	417	elsif($server eq "ConSeq"){
	418	$log = GENERAL_CONSTANTS::CONSEQ_RUNNING_JOBS;}
	419	elsif($server eq "pepitope"){
	420	$log = GENERAL_CONSTANTS::PEPITOPE_RUNNING_JOBS;}
	421
	422	# remove the job from the running jobs list
	423	open LIST, "+>>".$log;
	424	flock LIST, 2;
	425	seek LIST, 0, 0; #rewind the pointer to the beginning
	426	my @all_lines_in_list = <LIST>; # read the contents into the array
	427	truncate LIST, 0; # remove all the information, The 0 represents the size of the file that we want
	428	foreach (@all_lines_in_list){
	429	chomp;
	430	unless(/$run_name/){
	431	print LIST $_."\n";
	432	}
	433	}
	434	flock LIST, 8;
	435	close LIST;
	436	}
	437	#---------------------------------------------
	438	# prints the job in the queuing jobs list
	439	sub enqueue_job{
	440	my $job_num = shift;
	441	my $server = shift;
	442	my $run_name = shift;
	443	my $ret = "ok";
	444
	445	unless (open LIST, ">>".GENERAL_CONSTANTS::QUEUING_JOBS){
	446	$ret = "Could not open file ".GENERAL_CONSTANTS::QUEUING_JOBS.". Reason: $!\nThe job was not listed in the queuing_jobs list.\n".printTime();
	447	}
	448	else{
	449	flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock".
	450	print LIST "$job_num $server $run_name ".printTime()."\n";
	451	flock LIST, 8;
	452	close LIST;
	453	}
	454	return $ret;
	455	}
	456
	457	#------------------------------------------------------
	458	# prints the job in the bioseq node running jobs list
	459	sub enqueue_job_to_bioseq_node{
	460	my $job_num = shift;
	461	my $server = shift;
	462	my $run_name = shift;
	463	my $ret = "ok";
	464
	465	unless (open LIST, ">>".GENERAL_CONSTANTS::JOBS_ON_BIOSEQ_NODE){
	466	$ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_ON_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node running job list.\n".printTime();
	467	}
	468	else{
	469	flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock".
	470	print LIST "$job_num $server $run_name ".printTime()."\n";
	471	flock LIST, 8;
	472	close LIST;
	473	}
	474	return $ret;
	475	}
	476	#------------------------------------------------------
	477	# prints the job in the bioseq node waiting jobs list
	478	sub waiting_jobs_for_bioseq_node{
	479	my $server = shift;
	480	my $run_name = shift;
	481	my $ret = "ok";
	482
	483	unless (open LIST, ">>".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE){
	484	$ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node waiting job list.\n".printTime();
	485	}
	486	else{
	487	flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock".
	488	print LIST "$server $run_name ".printTime()."\n";
	489	flock LIST, 8;
	490	close LIST;
	491	}
	492	return $ret;
	493	}
	494	#------------------------------------------------------
	495	# remove the job from the bioseq node waiting jobs list
	496	sub remove_job_from_bioseq_node_waiting_list{
	497	my $server = shift;
	498	my $run_name = shift;
	499	my $ret = "ok";
	500
	501	unless (open LIST, "+>>".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE){
	502	$ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node waiting job list.\n".printTime();
	503	}
	504	else{
	505	flock LIST, 2;
	506	seek LIST, 0, 0; #rewind the pointer to the beginning
	507	my @all_lines_in_list = <LIST>; # read the contents into the array
	508	truncate LIST, 0; # remove all the information, The 0 represents the size of the file that we want
	509	foreach my $line (@all_lines_in_list){
	510	chomp;
	511	if (($line=~/$run_name/) and ($line=~/$server/))
	512	{
	513	$line = ""; # removing this line from the lines array
	514	}
	515	elsif ($line =~/([A-Za-z0-9])+/)
	516	{
	517	print LIST "$line\n";
	518	}
	519	}
	520	flock LIST, 8;
	521	close LIST;
	522	}
	523	return $ret;
	524	}
	525	#---------------------------------------------
	526	# input: path to pdb file
	527	# output: 3 options:
	528	# 1. --PDB_NOT_OPEN if couldn't open the pdb file
	529	# 2. --NO_CHAINS if no chain was founded in column 22
	530	# 3. string with all the chains founded in this pdb.
	531
	532	sub which_chain_in_pdb_and_seqres{
	533	my $input_pdb = shift;
	534	my $chain_founded;
	535	my %all_chains;
	536	my @ret;
	537	my $seqres_found = "--SEQRES_no";
	538
	539	unless (open PDB, $input_pdb){
	540	@ret = ("--PDB_NOT_OPEN $input_pdb $!");
	541	return \@ret;}
	542	while (<PDB>){
	543	if (/^ATOM/){
	544	$chain_founded = substr $_, 21, 1;
	545	if (!(exists $all_chains{$chain_founded})){
	546	$all_chains{$chain_founded} = 1;
	547	}
	548	}
	549	if ($seqres_found eq "--SEQRES_no" && /^SEQRES/){
	550	$seqres_found = "--SEQRES_yes";
	551	}
	552	}
	553	close PDB;
	554	$chain_founded = "";
	555	foreach my $key (keys %all_chains){
	556	$chain_founded.=$key;
	557	}
	558	if($chain_founded !~ /\S/){
	559	@ret = ("--NO_CHAINS", $seqres_found);}
	560	else{
	561	@ret = ($chain_founded, $seqres_found);}
	562	return \@ret;
	563	}
	564	#---------------------------------------------
	565	# input : 1. path to a pdb file, where there is no chain identifier in the 22 column of ATOM and 12 column of SEQRES
	566	# 2. one letter denotes a chain identifier to add
	567	# output : the same file, in the same path, where the letter given as input is added to the previously empty 22 column.
	568	sub add_chain_to_pdb{
	569	my $input_pdb = shift;
	570	my $chain_id_to_add = shift;
	571
	572	my ($beg_line, $end_line, $line);
	573
	574	open PDB_IN, "+>>".$input_pdb;
	575	seek PDB_IN, 0, 0;
	576	my @all_lines_in_pdb = <PDB_IN>;
	577	truncate PDB_IN, 0;
	578	foreach(@all_lines_in_pdb){
	579	if (/^ATOM/){
	580	$line = $_;
	581	$beg_line = substr $line, 0, 21;
	582	$end_line = substr $line, 22, length($line);
	583	$_ = $beg_line.$chain_id_to_add.$end_line;
	584	}
	585	elsif (/^SEQRES/){
	586	$line = $_;
	587	$beg_line = substr $line, 0, 11;
	588	$end_line = substr $line, 12, length($line);
	589	$_ = $beg_line.$chain_id_to_add.$end_line;
	590	}
	591	print PDB_IN $_;
	592	}
	593	close PDB_IN;
	594	}
	595	#---------------------------------------------
	596	sub convertNewline{
	597	# runs dos2unix, the program that converts plain text files in DOS/MAC format to UNIX format.
	598	my $inputFilePath = shift;
	599	my $WorkingDir = shift;
	600	my $dos2unix="cd $WorkingDir;dos2unix -q $inputFilePath";
	601	system "$dos2unix";
	602	# if the input file was in mac format, the simple dos2unix will not work.
	603	# read the file - if it is only one line, it might mean that the new line characters
	604	# are not read well (for example: ^M). Trying to run dos2unix again, saying the format is mac
	605	$WorkingDir.='/' unless $WorkingDir =~ /\/$/;
	606	if (open FILE, $WorkingDir.$inputFilePath){
	607	my $num_of_lines = 0;
	608	while (<FILE>){
	609	$num_of_lines++;
	610	}
	611	close FILE;
	612	if ($num_of_lines==1){
	613	$dos2unix="cd $WorkingDir;dos2unix -c mac $inputFilePath -q ";
	614	system "$dos2unix";
	615	}
	616	}
	617
	618	}
	619	#---------------------------------------------
	620	sub removeEndLineExtraChars{
	621	# remove extra chars on end of lines (^M,spaces);
	622	my $inputFilePath = shift;
	623	my $WorkingDir = shift;
	624	$WorkingDir.='/' unless $WorkingDir =~ /\/$/;
	625	my @lines;
	626	if (open FILE, $WorkingDir.$inputFilePath){
	627	@lines=<FILE>;
	628	close (FILE);
	629	}
	630	if (open (NEWFILE,">$WorkingDir$inputFilePath")){
	631	my $line;
	632	foreach $line (@lines){
	633	# $line=~s/(\r)$/\n/;
	634	$line=~s/(\s+)$//;
	635	print NEWFILE "$line\n";
	636	}
	637	close NEWFILE;
	638	}
	639	}
	640	#---------------------------------------------
	641	sub check_file_type{
	642
	643	my $FileName=shift;
	644	my $Type="PLAIN_TEXT";
	645	if (-e "$FileName")
	646	{
	647	#$Type="Executable" if (-x $FileName); #Executable
	648	$Type="Binary" if (-c $FileName); #Contains Special Chars;
	649	$Type="Binary" if (-B $FileName); #Binary
	650
	651	if (-T $FileName and $Type ne "BINARY") # Potentially Text File but maybe not: The first block or so of the file is examined for odd characters such as strange control codes or characters with the high bit set. If too many strange characters (>30%) are found, it's a -B file; otherwise it's a -T file...
	652	{
	653	unless (open FILE,$FileName){
	654	return ("ERR", "check_file_type : cannot open the file $FileName for reading $!");
	655	}
	656	my $line=<FILE>;
	657	close (FILE);
	658	if ($line=~/%PDF-/){
	659	$Type="PDF";
	660	}
	661	elsif ($line=~/\\rtf/){
	662	$Type="RTF";
	663	}
	664
	665	}
	666	}
	667	else
	668	{
	669	return ("ERR", "check_file_type : the file $FileName was not found");
	670	}
	671	return ("OK", $Type);
	672	}
	673	#---------------------------------------------
	674	1;

+347

-0

www/bioSequence_scripts_and_constants/GENERAL_CONSTANTS.pm less more

	0	#!/usr/bin/perl
	1
	2	package GENERAL_CONSTANTS; #don't forget: a package must end with a return value (1; in the end)!!!!!
	3
	4	# constants to use when sending e-mails using the server admin's email address.
	5	use constant ADMIN_EMAIL => "TAU BioSequence \<bioSequence\@tauex.tau.ac.il\>";
	6	use constant ADMIN_USER_NAME => "";
	7	use constant ADMIN_PASSWORD => "";
	8	#use constant SMTP_SERVER => "";
	9	use constant SMTP_SERVER => "";
	10
	11	# the name of the list of all running processes
	12	use constant QUEUING_JOBS => "/bioseq/bioSequence_scripts_and_constants/queuing_jobs.list";
	13	use constant RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/running_jobs.list";
	14	use constant SUBMITTED_JOBS => "/bioseq/bioSequence_scripts_and_constants/submitted_jobs.list";
	15	use constant JOBS_ON_BIOSEQ_NODE => "/bioseq/bioSequence_scripts_and_constants/jobs_on_bioc.01_node.list";
	16	use constant JOBS_WAITING_BIOSEQ_NODE => "/bioseq/bioSequence_scripts_and_constants/jobs_waiting_bioc.01_node.list";
	17	use constant CONSURF_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/consurf_running_jobs.list";
	18	use constant SELECTON_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/selecton_running_jobs.list";
	19	use constant CONSEQ_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/conseq_running_jobs.list";
	20	use constant PEPITOPE_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/pepitope_running_jobs.list";
	21
	22	# Databases urls
	23	use constant PROTEOPEDIA => "http://proteopedia.org/wiki/index.php/";
	24	use constant PDB_DB => "http://www.rcsb.org/pdb/explore/explore.do?structureId=";
	25	use constant RCSB_WGET=> "wget ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/";
	26	use constant RCSB => "http://www.rcsb.org/";
	27	use constant PISA_WGET => "wget http://www.ebi.ac.uk/msd-srv/pisa/cgi-bin/multimer.pdb?";
	28
	29
	30	# CGIs paths
	31	use constant CONSURF_CGI_DIR => "/var/www/cgi-bin/ConSurf";
	32
	33	#general paths
	34	use constant SERVERS_RESULTS_DIR => "/bioseq/data/results/";
	35	use constant SERVERS_LOGS_DIR => "/bioseq/data/logs/";
	36	#use constant SEND_EMAIL_DIR => "/db1/Local/src/sendEmail"; # path on biocluster
	37	use constant SEND_EMAIL_DIR => "/bioseq/bioSequence_scripts_and_constants/sendEmail";
	38	use constant SEND_EMAIL_DIR_IBIS => "/bioseq/bioSequence_scripts_and_constants/sendEmail"; # path on ibis
	39	use constant DAEMON_LOG_FILE => "/bioseq/bioSequence_scripts_and_constants/daemon.log";
	40	use constant UPDATE_RUN_TIME_LOG_FILE => "/bioseq/bioSequence_scripts_and_constants/update_runTime.log";
	41	use constant CONSURF_CGI => "/var/www/cgi-bin/ConSurf"; #on ibis
	42	use constant BIOSEQ_TEMP => "/bioseq/temp/";
	43
	44	# servers urls:
	45	use constant SELECTON_URL => "http://selecton.tau.ac.il";
	46	use constant CONSEQ_URL => "http://conseq.tau.ac.il/";
	47	use constant CONSURF_URL => "http://consurf.tau.ac.il/";
	48	use constant NEW_CONSURF_URL => "http://consurf.tau.ac.il/"; #"http://consurftest.tau.ac.il/";
	49	use constant EPITOPIA_URL => "http://epitopia.tau.ac.il/";
	50	use constant PEPITOPE_URL => "http://pepitope.tau.ac.il/";
	51	use constant QMF_URL => "http://quasimotifinder.tau.ac.il/";
	52	use constant PATCHFINDER_URL => "http://patchfinder.tau.ac.il/";
	53	#use constant FASTML_URL => "http://ibis.tau.ac.il/fastml/";
	54	use constant FASTML_URL => "http://fastml.tau.ac.il/";
	55	use constant RECONST_URL => "http://fastml.tau.ac.il/reconst/";
	56	use constant GAIN_LOSS_URL => "http://gloome.tau.ac.il/";
	57	use constant CONSURF_DB_URL => "http://consurfdb.tau.ac.il/";
	58	#use constant GILAD_SERVER_URL => "http://consurftest.tau.ac.il/Gilad/";
	59	use constant GILAD_SERVER_URL => "http://mud.tau.ac.il/";
	60	use constant MCPep_URL => "http://bental.tau.ac.il/MCPep/";
	61	use constant GUIDANCE_URL => "http://guidance.tau.ac.il/";
	62	use constant GUIDANCE_INDELS_URL => "http://guidance.tau.ac.il/indels/";
	63	use constant SPECBOOST_URL => "http://bental.tau.ac.il/specBoost/";
	64	use constant PROMAYA_URL => "http://bental.tau.ac.il/ProMaya/";
	65	use constant HOMOLOGY_SEARCH_URL => "http://fastml.tau.ac.il/HomologySearch/";
	66	use constant COPAP_URL => "http://copap.tau.ac.il/";
	67
	68	#servers logs:
	69	use constant CONSURF_LOG => "/bioseq/ConSurf_old/consurf.log";
	70	use constant CONSURF_NEW_LOG => "/bioseq/ConSurf/consurf.log";
	71	use constant SELECTON_LOG => "/bioseq/Selecton/selecton.log";
	72	use constant EPITOPIA_LOG => "/bioseq/epitopia/epitopia.log";
	73	use constant CONSEQ_LOG => "/bioseq/ConSeq/conseq.log";
	74	use constant PEPITOPE_LOG => "/bioseq/pepitope/pepitope.log";
	75	use constant RECONST_LOG => "/bioseq/ReConst_Server/reconst.log";
	76	use constant MCPep_LOG => "/bioseq/MCPep/mcpep.log";
	77	use constant Guidance_LOG => "/bioseq/Guidance/guidance.log";
	78	use constant Guidance_Indels_LOG => "/bioseq/GuidanceIndels/guidance_Indels.log";
	79	use constant MuD_LOG => "/bioseq/Gilad_Server/MuD.log";
	80	use constant FASTML_LOG => "/bioseq/FastML/fastml.log";
	81	use constant SPECBOOST_LOG => "/bioseq/specBoost/specBoost.log";
	82	use constant GAIN_LOSS_LOG => "/bioseq/GainLoss/GainLoss.log";
	83	use constant PROMAYA_LOG => "/bioseq/ProMaya/ProMaya.log";
	84	use constant COPAP_LOG => "/bioseq/CoPAP/CoPAP.log";
	85
	86	#servers results urls:
	87	# servers urls:
	88	use constant SELECTON_RESULTS_URL => SELECTON_URL."/results/";
	89
	90	#external databases
	91	#use constant PQS=> "/bioseq/data/results/PQS/";
	92	use constant PQS=> "/biodb/PQS/";
	93	use constant PDB_DIVIDED => "/biodb/PDB/data/structures/divided/pdb/";
	94	use constant SWISSPROT_DB => "/biodb/BLAST/Proteins/swissprot";
	95	use constant UNIPROT_DB => "/biodb/BLAST/Proteins/uniprot";
	96	use constant CLEAN_UNIPROT_DB => "/biodb/BLAST/Proteins/clean_uniprot";
	97	use constant UNIREF90_DB => "/biodb/BLAST/Proteins/uniref90";#"/groups/bioseq.home/HAIM/UNIREF90/uniref90";
	98	use constant PDBAA_NCBI=> "/biodb/BLAST/Proteins/pdbaa";
	99	use constant CULLED_PDB => "/groups/bioseq.home/HAIM/PDBAA/pdbaaent"; # TO CHANGE TO: /biodb/BLAST/dunbrack.fccc.edu/Guoli/culledpdb/pdbaaent_dun
	100	use constant PDB_DUNBRACK => "/groups/bioseq.home/HAIM/PDBAA/pdbaa"; # TO CHANGE TO: /biodb/BLAST/dunbrack.fccc.edu/Guoli/culledpdb/pdbaa_dun
	101	use constant NR_PROT_DB => "/biodb/BLAST/Proteins/nr";
	102	use constant NR_NUC_DB => "/biodb/BLAST/Nucleotides/nt";
	103	use constant UNIPROT_DAT_INDEX => "/bioseq/data/results/GB_CDS/uniprot.dat.bp_index";
	104	use constant PDB_TO_UNIPROT => "/bioseq/data/results/PDB_to_UNIPROT/idmapping_PDB_UNIPROTKB.dat";#"/biodb/idmapping_PDB_UNIPROTKB.dat";
	105	use constant PDB_TO_UNIPROT_test => "/biodb/idmapping_PDB_UNIPROTKB.dat";
	106	#internal databases
	107	use constant EPITOPIA_DATA => "/bioseq/epitopia/data";
	108
	109	#external programs
	110	use constant BLASTALL => "/opt/bio/ncbi/bin/blastall"; #"/opt/Bio/ncbi/bin/blastall"; # on the lecs
	111	use constant BLASTPGP => "blastpgp"; # "/opt/Bio/ncbi/bin/blastpgp"; # on the lecs
	112	use constant CS_BLAST => "/share/apps/csblast-2.1.0-linux64/csblast_static"; # on the lecs
	113	use constant MUSCLE_LECS => "/share/apps/bin/muscle"; # on the lecs
	114	use constant MUSCLE => "/usr/local/bin/muscle"; # on the biocluster
	115	use constant MUSCLE_3_6 => "/bioseq/Programs/muscle_3.6_from_BIOCLUSTER/muscle3.6/muscle"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq)
	116	use constant CLUSTALW_LECS => "/share/apps/bin/clustalw"; # on the lecs
	117	use constant CLUSTALW => "/usr/local/bin/clustalw"; # on the biocluster
	118	use constant CLUSTALW_1_82 => "/bioseq/Programs/ClustalW_1.82/clustalw1.82/clustalw"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq)
	119	use constant CLUSTALW_1_81 => "/bioseq/Programs/ClustalW_1.81/clustalw1.81/clustalw"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq)
	120	use constant CLUSTALW_2_0_10 => "/bioseq/Programs/ClustalW_2.0.10/clustalw-2.0.10-linux-i386-libcppstatic/clustalw2"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq)
	121
	122	use constant MAFFT_LINSI => "/usr/local/bin/mafft-linsi"; # on the biocluster
	123	use constant MAFFT => "/usr/local/bin/mafft"; # on the biocluster
	124	#use constant MAFFT_GUIDANCE => "/groups/pupko/privmane/bin/mafft"; #v6.711b
	125	#use constant MAFFT_LINSI_GUIDANCE => "/groups/pupko/privmane/bin/mafft --localpair --maxiterate 1000"; #v6.711b
	126	#use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.711b/mafft"; #v6.711b
	127	use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.833/bin/mafft"; #v6.833
	128	#use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.857/bin/mafft"; #v6.857 !!! make sure: 'setenv MAFFT_BINARIES /bioseq/Programs/MAFFT_6.857/mafft-6.857-with-extensions/binaries' BEFORE
	129	#use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.711b/mafft --localpair --maxiterate 1000"; #v6.711b
	130	use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.833/bin/mafft --localpair --maxiterate 1000"; #v6.833
	131	#use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.857/bin/mafft --localpair --maxiterate 1000"; #v6.857 !!! make sure: 'setenv MAFFT_BINARIES /bioseq/Programs/MAFFT_6.857/mafft-6.857-with-extensions/binaries' BEFORE
	132	use constant PRANK_LECS => "/share/apps/bin/prank"; # on the lecs
	133	use constant PRANK => "/usr/local/bin/prank"; # on the biocluster
	134	use constant T_COFFEE => "/share/apps/T-COFFEE-8.47/bin/binaries/linux/t_coffee"; # requiers setenv PATH /share/apps/T-COFFEE-8.47/bin/binaries/linux:$PATH
	135	use constant PAGAN_LECS => "/share/apps/pagan-msa/bin/pagan"; # requires: "module load gcc/gcc461" before!!
	136
	137	use constant TREE_VIEWER_DIR => "/bioseq/ConSurf_old/treeViewer/";
	138	use constant PACC_path => "/bioseq/ConSeq/external_scripts/PACC/";
	139	use constant RATE4SITE_BIOC_VER => "/bioseq/rate4site/BioCluster_Nov_06_dev/rate4site.exe";
	140	use constant RATE4SITE_SLOW_BIOC_VER => "/bioseq/rate4site/BioCluster_Nov_06_dev/rate4siteSlow.exe";
	141	use constant RATE4SITE => "/db1/Local/src/Rate4SiteSource/r4s_Nov_06_dev/rate4site.exe";
	142	use constant RATE4SITE_SLOW => "/db1/Local/src/Rate4SiteSource/r4s_Nov_06_dev/rate4siteSlow.exe";
	143	use constant RATE4SITE_SLOW_LECS => "/share/apps/bin/rate4site_slow";
	144	use constant RATE4SITE_LOCAL => "/bioseq/rate4site/rate4site";
	145	use constant RATE4SITE_SLOW_LOCAL =>"/bioseq/rate4site/rate4site.doubleRep";
	146	use constant RATE4SITE_WITH_LG => "/bioseq/rate4site/With_LG/rate4site";
	147	use constant RATE4SITE_WITH_LG_SLOW => "/bioseq/rate4site/With_LG/rate4site.doubleRep";
	148	use constant RUBY => "/share/apps/bin/ruby"; #"/usr/bin/ruby";
	149	#use constant CD_HIT_DIR => "/db1/Local/src/cd-hit_redundency/";
	150	use constant CD_HIT_DIR => "/bioseq/cd_hit/";
	151	use constant PREDICT_PACC => "/bioseq/ConSeq/external_scripts/PACC/run.sh";
	152	use constant MSA_to_HSSP => "/bioseq/ConSeq/external_scripts/PACC/MSA2hssp.pl";
	153	#use constant SEMPHY => "/groups/pupko/privmane/alignment/run/semphy"; #on Biocluster
	154	use constant SEMPHY => "/bioseq/Programs/Semphy/semphy.doubleRep";
	155
	156	#internal programs
	157	use constant EPITOPIA_EXECUTABLES => "/bioseq/epitopia/executables";
	158
	159	# constant values
	160	use constant BLAST_MAX_HOMOLOGUES_TO_DISPLAY => 500;
	161	use constant BLAST_PDB_MAX_HOMOLOGUES_TO_DISPLAY => 25;
	162	use constant CONSURF_PIPE_FORM => "/bioseq/ConSurf_old/consurf_pipe.form";
	163	use constant SELECTON_MAX_NUCLEOTIDE => 15000;
	164	use constant MAX_WALLTIME => "96:00:00";
	165
	166	# Queue Details
	167	use constant BIOSEQ_NODE => "bioc01.tau.ac.il"; #Node on BioCluster dedicated to Bioseq runs (Not part of the queue)
	168	#use constant MAX_QUEUE_RUNS => 60;
	169	use constant MAX_QUEUE_RUNS => 999;
	170
	171	# external links
	172	use constant RCSB_WEB => "http://www.rcsb.org/";
	173	use constant PYMOL_WEB => "http://pymol.sourceforge.net/";
	174	use constant CHIMERA_WEB => 'http://www.rbvi.ucsf.edu/chimera/';
	175	use constant CHIMERA_SAVING_FIGURE => 'http://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/print.html';
	176	use constant CHIMERA_DOWNLOAD => CHIMERA_WEB."download.html";
	177	use constant MSA_CONVERT => 'http://www.ebi.ac.uk/cgi-bin/readseq.cgi';
	178	use constant MSA_FORMATS => 'http://www.ebi.ac.uk/help/formats.html';
	179
	180	# redirect pages
	181	use constant CONSURF_REDIRECT_PAGE => CONSURF_URL."too_many_runs.html";
	182	use constant SELECTON_REDIRECT_PAGE => SELECTON_URL."/too_many_runs.html";
	183	use constant CONSEQ_REDIRECT_PAGE => CONSEQ_URL."too_many_runs.html";
	184	use constant PEPITOPE_REDIRECT_PAGE => PEPITOPE_URL."too_many_runs.html";
	185
	186	#faq pages
	187	use constant CONSURF_TREE_FAQ => CONSURF_URL.'quick_help.html#note5';
	188
	189	#Files Name Conventions
	190	use constant TEMPLATES_LIST_FILE=>"List_of_Templates";
	191	use constant PISA_ERRORS_FILE=>"PISA_Errors";
	192
	193
	194	#---------------------------------------------
	195	sub print_to_output{
	196	my $OutHtmlFile = shift;
	197	my $server_name = shift;
	198	my $run_name = shift;
	199	my $recipient = shift;
	200
	201	open OUTPUT, ">>$OutHtmlFile";
	202	flock OUTPUT, 2;
	203	print OUTPUT "\n<p><font size=+3 color='red'>ERROR! $server_name session has been terminated: </font>\n<br><b>A system error occured during the calculation. Please try to run $server_name again in a few minutes.</b>\n</p>\n";
	204	print OUTPUT "<H3><center>For assistance please <a href=\"mailto:".ADMIN_EMAIL."?subject=".$server_name."%20Run%20No:%20".$run_name."\">contact us</a> and mention this number: $run_name</H3>\n";
	205	flock OUTPUT, 8;
	206	close OUTPUT;
	207	&send_mail($server_name, $recipient, $run_name, "error","error") if ($recipient ne "NO");
	208	&stop_reload($OutHtmlFile);
	209	}
	210	#---------------------------------------------
	211
	212	# in case the desired mail report on error: the vars $email_subject and $email_message should be 'error'
	213	sub send_mail { # to user
	214	my $server_name = shift;
	215	my $recipient = shift;
	216	my $run_name = shift;
	217	my $email_subject= shift;
	218	my $email_message = shift;
	219	my $email_attach = shift;
	220	my $from_server = "";
	221	$from_server = shift;
	222	my $OutputURL;
	223	my $mail;
	224
	225	if ($server_name eq "Selecton") {$OutputURL = SELECTON_URL."/results/$run_name"."/output.html";}
	226	elsif ($server_name eq "ConSeq") {$OutputURL = CONSEQ_URL."results/$run_name"."/output.html";}
	227	elsif ($server_name eq "Epitopia") {$OutputURL = EPITOPIA_URL."results/$run_name"."/output.html";}
	228	elsif ($server_name eq "pepitope") {$OutputURL = PEPITOPE_URL."results/$run_name"."/output.html";}
	229	elsif ($server_name eq "ConSurf") {$OutputURL = CONSURF_URL."results/$run_name"."/output.html";}
	230	elsif ($server_name eq "QuasiMotiFinder") {$OutputURL = QMF_URL."results/$run_name"."/output.html";}
	231	elsif ($server_name eq "fastml") {$OutputURL = FASTML_URL."results/$run_name"."/output.html";}
	232
	233	$email_subject = "Error in $server_name running" if $email_subject eq "error";
	234	$email_message = "Hello!\n\nUnfortunately there was an error while running the $server_name server.\nPlease click on the following link to see more details\nWe apologize for the inconvenience\n\n$OutputURL\n" if $email_message eq "error";
	235	chdir SEND_EMAIL_DIR;
	236	chdir SEND_EMAIL_DIR_IBIS if ($from_server eq "ibis");
	237	$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -s '.SMTP_SERVER.' -m \''.$email_message."\'";
	238	#$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -xu '.ADMIN_USER_NAME.' -xp '.ADMIN_PASSWORD.' -s '.SMTP_SERVER.' -m \''.$email_message."\'";
	239	if ($email_attach ne '') {$mail.=" -a $email_attach";}
	240	`$mail`;
	241	}
	242	#---------------------------------------------
	243	sub stop_reload{
	244	my $OutHtmlFile = shift;
	245
	246	sleep 10;
	247	open OUTPUT, "<$OutHtmlFile";
	248	my @output = <OUTPUT>;
	249	close OUTPUT;
	250	open OUTPUT, ">$OutHtmlFile";
	251	foreach my $line (@output){ # we remove the refresh lines and the button which codes for Selecton cancelled job
	252	unless ($line =~ /REFRESH/i or $line =~ /NO-CACHE/i or $line =~ /ACTION=\"\/cgi\/kill_process.cgi/ or
	253	$line =~ /VALUE=\"Cancel Selecton Job\"/ or $line =~ /TYPE=hidden NAME=\"pid\"/ or
	254	$line =~ /TYPE=hidden NAME=\"selecton_http\"/ or $line =~ /TYPE=hidden NAME=\"run_no\"/ or
	255	$line =~ /<!--job_/){
	256	print OUTPUT $line;
	257	}
	258	}
	259	close OUTPUT;
	260	}
	261	#---------------------------------------------
	262	sub print_Q_status_in_html{
	263	my $html_file = shift;
	264	my $_status = shift;
	265	my $_time = shift;
	266	my $_estimated_run_time = shift;
	267
	268	my ($line, $line1, $line2);
	269	my $out = "/bioseq/ELANA/from_GENERAL_CONST.txt";
	270
	271	$_time = "" if ($_time eq "no");
	272	unless (open HTML, "+>>".$html_file) {
	273	return "print_Q_status_in_html : Could not open file $html_file to update the status. Status is: $_status reason: $!\n";}
	274	else{
	275	flock HTML, 2;
	276	seek HTML, 0, 0; #rewind the pointer to the beginning
	277	my @html_lines = <HTML>; # read the contents into the array
	278	truncate HTML, 0; # remove all the information, The 0 represents the size of the file that we want
	279	foreach (@html_lines){
	280	if(/<!--job_stat--><.+>Your job status is:<\/a> (.+)<br>/){
	281	if ($_status ne ""){
	282	s/$1/$_status/;
	283	}
	284	}
	285	elsif(/<!--job_pass-->The time that passed since submitting the query is: (.+)<br>/){
	286	if($_time ne ""){
	287	s/$1/$_time/;
	288	}
	289	}
	290	elsif(/<!--(job_time--)Estimated run time is: (-->)/ and $_estimated_run_time ne "none"){
	291	$line = $_;
	292	$line1 = $1;
	293	$line2 = $2;
	294	if ($_estimated_run_time =~ m/\d+:\d+:\d+:\d+/) {
	295	$_estimated_run_time .= " days";
	296	}
	297	elsif ($_estimated_run_time =~ m/\d+:\d+:\d+/) {
	298	$_estimated_run_time .= " hours";
	299	}
	300	elsif($_estimated_run_time =~ m/\d+:\d+/){
	301	$_estimated_run_time .= " minutes";
	302	}
	303	$_ = $line; # since we make another RE comparison, the original values of $_ and $1 are changing, therefore we must save them at the beginning and change them back here.
	304	s/$line2/$_estimated_run_time<br>/; # the reason we first substitue the second part, is that the first part creates an expression --> which might be wrongly replaced with this value
	305	s/$line1/$line1>/;
	306	}
	307	}
	308	print HTML $_ foreach (@html_lines);
	309	flock HTML, 8;
	310	close HTML;
	311	return "OK";
	312	}
	313	}
	314
	315
	316	# in case the desired mail report on error: the vars $email_subject and $email_message should be 'error'
	317	sub send_mail2 { # to user
	318	my $server_name = shift;
	319	my $recipient = shift;
	320	my $run_name = shift;
	321	my $email_subject= shift;
	322	my $email_message = shift;
	323	my $email_attach = shift;
	324	my $from_server = shift;
	325	my $OutputURL;
	326	my $mail;
	327
	328	if ($server_name eq "Selecton") {$OutputURL = SELECTON_URL."/results/$run_name"."/output.html";}
	329	elsif ($server_name eq "ConSeq") {$OutputURL = CONSEQ_URL."results/$run_name"."/output.html";}
	330	elsif ($server_name eq "Epitopia") {$OutputURL = EPITOPIA_URL."results/$run_name"."/output.html";}
	331	elsif ($server_name eq "pepitope") {$OutputURL = PEPITOPE_URL."results/$run_name"."/output.html";}
	332	elsif ($server_name eq "ConSurf") {$OutputURL = CONSURF_URL."results/$run_name"."/output.html";}
	333	elsif ($server_name eq "QuasiMotiFinder") {$OutputURL = QMF_URL."results/$run_name"."/output.html";}
	334	elsif ($server_name eq "fastml") {$OutputURL = FASTML_URL."results/$run_name"."/output.html";}
	335
	336	$email_subject = "Error in $server_name running" if $email_subject eq "error";
	337	$email_message = "Hello!\n\nUnfortunately there was an error while running the $server_name server.\nPlease click on the following link to see more details\nWe apologize for the inconvenience\n\n$OutputURL\n" if $email_message eq "error";
	338	chdir SEND_EMAIL_DIR;
	339	chdir SEND_EMAIL_DIR_IBIS if ($from_server eq "ibis");
	340	$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -s '.SMTP_SERVER.' -m \''.$email_message."\'";
	341	#$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -xu '.ADMIN_USER_NAME.' -xp '.ADMIN_PASSWORD.' -s '.SMTP_SERVER.' -m \''.$email_message."\'";
	342	if ($email_attach ne '') {$mail.=" -a $email_attach";}
	343	$mail = 'sh -c \' $mail 2>/dev/null\'';
	344	`$mail`;
	345	}
	346	1;

+169

-0

www/fastml/BuildRaxMLTree.pl less more

	0	use strict;
	1	use FileHandle;
	2	use Bio::SeqIO;
	3	use Bio::AlignIO;
	4
	5	my $MSA=shift;
	6	my $OutTree=shift;
	7	my $WorkingDir=shift;
	8
	9	my $Model=shift; #Available AA substitution models: DAYHOFF, DCMUT, JTT, MTREV, WAG, RTREV, CPREV, VT, BLOSUM62, MTMAM, LG, MTART, MTZOA, PMB, HIVB, HIVW, JTTDCMUT, FLU, GTR
	10	#NUC: GTRCAT
	11
	12	my $MSA_Name=$MSA; # IF WITHOUT PATH
	13	if ($MSA=~/([^\/]+)$/){$MSA_Name=$1;} # NAME WITHOUT PATH
	14
	15	my $OutTree_Suffix=$OutTree; # IF WITHOUT PATH
	16	if ($OutTree=~/([^\/]+)$/){$OutTree_Suffix=$1;} # NAME WITHOUT PATH
	17
	18	$WorkingDir=$WorkingDir."/" if ($WorkingDir!~/\//);
	19	my $Codes2NameIndex=$WorkingDir."$MSA_Name"."Codes2NamesIndex.txt";
	20	my $CodedMSA=$WorkingDir."/$MSA_Name".".coded.aln";
	21	my $CodedMSAPhylip=$WorkingDir."$MSA_Name".".coded.Phylip";
	22	# Convert Names to numbers
	23	my $ans=name2codeFastaFrom1("$MSA",$Codes2NameIndex,$CodedMSA);
	24	#if ($ans ne "ok") {exit_on_error}
	25	# Convert To Phylip
	26	convertMsaFormat($CodedMSA,$CodedMSAPhylip,"fasta","phylip");
	27	#my $convert_cmd="readseq -a -f12 $CodedMSA > $CodedMSAPhylip";
	28	#system ($convert_cmd);
	29	# Run RaxML
	30	$Model="PROTCAT".$Model if ($Model ne "GTRCAT");
	31	my $RaxML_cmd="cd $WorkingDir;raxmlHPC -s $CodedMSAPhylip -n $OutTree_Suffix"." -m $Model";
	32	print "$RaxML_cmd\n";
	33	system ($RaxML_cmd);
	34	# Bring Back names to tree
	35	my $RaxMLTree="RAxML_bestTree.$OutTree_Suffix";
	36	code2nameTree($Codes2NameIndex,$WorkingDir.$RaxMLTree,$WorkingDir."$OutTree_Suffix");
	37
	38
	39	sub name2codeFastaFrom1 {
	40	####################################################################################################################
	41	# Convert the names in a fasta file to numbers, and creates a code file with the names and the codes (running number)
	42	###################################################################################################################
	43	my $in_fileName = shift;
	44	my $code_fileName = shift;
	45	my $out_fileName = shift;
	46	my $counter_offset=shift; # optional
	47
	48	my $in_file = Bio::SeqIO->new(-file => $in_fileName , '-format' => 'Fasta');
	49	my $code_file = new FileHandle(">$code_fileName") or return ("Can't write to $code_fileName $!");
	50	my $out_file = new FileHandle(">$out_fileName") or return ("Can't write to $out_fileName");
	51	$counter_offset=1 if (!defined $counter_offset);
	52	$counter_offset=1 if ($counter_offset==0);
	53	my $counter = $counter_offset;
	54	my $i;
	55
	56	while ( my $seqObj = $in_file->next_seq() ) {
	57	my $name = $seqObj->display_id();
	58	$name.= " ".$seqObj->desc() if ($seqObj->desc());
	59	print $code_file "$name\t$counter\n";
	60	my $seq = $seqObj->seq();
	61	print $out_file ">$counter\n";
	62	for($i=0;$i<length($seq);$i+=60){
	63	print $out_file substr($seq,$i,60) . "\n";
	64	}
	65	if($i<length($seq)){
	66	print $out_file substr($seq,$i,length($seq)-$i);
	67	}
	68	print $out_file "\n";
	69	$counter++;
	70	}
	71	$out_file->close();
	72	$in_file->close();
	73	$code_file->close();
	74	return "ok";
	75	}
	76
	77	sub code2nameTree
	78	{
	79	###############################################################################################################
	80	# Works together (or rather after) the script names2codeFasta.pl. Takes a tree created based on
	81	# a fasta file with codes, and reverts the codes to the names. Required input is a code file which is created by
	82	# names2codeFasta.pl
	83	# ** very useful for working with all phyml and such, since these programs chop the name to 10 chars
	84	###############################################################################################################
	85
	86
	87	# die "Usage: code2name.pl CODE_FILE TREE_FILE NEW_FILE NAME_LENGTH" if (scalar(@ARGV) < 3);
	88	my $nameLength = "NA";
	89	my $code2nameFile = shift;
	90	my $treeFile = shift;
	91	my $newFile = shift;
	92
	93	$nameLength = shift;
	94	if (!defined $nameLength) {
	95	$nameLength = 30;
	96	}
	97
	98
	99
	100	my %names2code;
	101	my @fields;
	102
	103
	104	open FH, "<$code2nameFile";
	105	while (my $line=<FH>){
	106	$line =~ /(.+)\t(\d+)/;
	107	my $code = $2;
	108	my $name = $1;
	109	$name =~ s/[\[\]\,\:\;]/_/g; #remove characters that are newick format associated
	110	if ($name =~ m/(.*\\|.{$nameLength})/) {
	111	$name = $1;
	112	}
	113	$names2code{$code}=$name;
	114	print "$code $name\n";
	115	}
	116
	117	close FH;
	118
	119	open TREE, "<$treeFile";
	120	open NEWTREE, ">$newFile";
	121
	122	my $full_tree = "";
	123	my $line2;
	124	while ($line2 = <TREE>){ # this assumes there are bootstrap values on the input tree
	125	chomp $line2;
	126	$full_tree.=$line2;
	127
	128	}
	129
	130	@fields = split(/:/, $full_tree);
	131
	132	foreach my $field (@fields) {
	133	if ($field =~ /[\,\(](\d+)$/) { # a leaf comes either after a "(" or a ","
	134	$field =~ s/(\d+)$/$names2code{$1}/;
	135	}
	136
	137	if ($field !~/;$/) {print NEWTREE "$field:";}
	138	else {print NEWTREE "$field";} # Last One
	139	}
	140
	141	print NEWTREE "\n";
	142	}
	143
	144	sub convertMsaFormat
	145	{
	146	my $inFile=shift;
	147	my $outFile=shift;
	148	my $inFormat=shift;
	149	my $outFormat=shift;
	150
	151	#die "usage: convertMsaFormat.pl <inFile> <outFile> <inFormat> <outFormat>\n"
	152
	153	print "inFile = '$inFile'\n";
	154	print "outFile = '$outFile'\n";
	155	print "inFormat = '$inFormat'\n";
	156	print "outFormat = '$outFormat'\n";
	157	my $in = Bio::AlignIO->new( '-format' => $inFormat , -file => $inFile);
	158	my $out = Bio::AlignIO->new( '-format' => $outFormat , -file => ">$outFile");
	159
	160	my ($alignObj, $seqStr, $trans);
	161	while ($alignObj = $in->next_aln()) {
	162	$alignObj->verbose(1);
	163	# Otherwise, bioperl adds sequence start/stop values, causing problems
	164	# with clustal/bali_score
	165	$alignObj->set_displayname_flat();
	166	$out->write_aln($alignObj);
	167	}
	168	}

+2185

-0

www/fastml/FastML_Wrapper.pl less more

	0	use strict;
	1
	2	use Getopt::Long;
	3
	4
	5	use FindBin qw($Bin); # www/FastML_2012/
	6	use lib "$Bin/../bioSequence_scripts_and_constants/";
	7	#use lib "/bioseq/bioSequence_scripts_and_constants";
	8	use GENERAL_CONSTANTS;
	9	use BIOSEQUENCE_FUNCTIONS;
	10	use POSIX;
	11	use FindBin qw($Bin);
	12	use File::Copy;
	13	use File::Basename;
	14
	15	die "USAGE:FastML_Wrapper.pl --MSA_File <MSA_File> --seqType <aa\|nuc\|codon> --outDir <FULL_PATH_outDir>
	16	Optional parameters:
	17	--Tree <phylogenetic tree>
	18	--TreeAlg <NJ \| RAxML> - How to builed tree when tree not provided by user; default=NJ
	19	--SubMatrix <JTT \| LG \| mtREV \| cpREV \| WAG \| DAYHOFF > amino acid options, the default is JTT.
	20	<JC_Nuc \| T92 \| HKY \| GTR> nucleotide options, the default is JC_Nuc.
	21	<yang \| empiriCodon> codon options, the default is yang.
	22	--OptimizeBL <yes \| no> default: yes
	23	--UseGamma <yes \| no> default: yes
	24	# --OptAlpha <yes \| no> default: no (relevant only when UseGamma==yes)
	25	--Alpha <User provide alpha> (relevant only when UseGamma==yes)
	26	user alpha parameter of the gamma distribution [if alpha is not given, alpha and branches will be evaluated from the data]
	27	--jointReconstruction <yes \| no> default: yes
	28	--indelReconstruction <PARSIMONY\|ML\|BOTH> - which method is used for indel reconstruction
	29	--indelCutOff <Cutoff for indel vs Char> deafult =0.5
	30	" unless (@ARGV >= 1);
	31	my @ARGV_forPrint=@ARGV;
	32	my %VARS=(); # FOR PROGRAM VARS
	33	my %FORM=(); # FOR USER INPUTS
	34
	35	# Assign default
	36	$FORM{MSA_File}="";
	37	$FORM{outDir}="";
	38	$FORM{TreeAlg}="NA";
	39	$FORM{Tree}="NA";
	40	$FORM{OptimizeBL}="YES";
	41	$FORM{UseGamma}="YES";
	42	#$FORM{OptAlpha}="NO";
	43	$FORM{Alpha}="";
	44	$VARS{RunNumber}="NA";
	45	$VARS{isServer}="NO";
	46	$FORM{JointReconstruction}="YES";
	47
	48	$FORM{IndelReconstructionMethod}="BOTH";
	49	$FORM{IndelsCutoff}=0.5;
	50	$FORM{DEBUG}="NO";
	51	my $getoptResult = GetOptions ("MSA_File=s"=>\$FORM{MSA_File}, # = means that this parameter is required, s means string
	52	"outDir=s"=>\$FORM{outDir},
	53	"seqType=s"=>\$FORM{seqType},
	54	"Tree:s"=>\$FORM{Tree},
	55	"TreeAlg:s"=>\$FORM{TreeAlg}, # NJ \| RaxML
	56	"SubMatrix:s"=>\$FORM{SubMatrix},
	57	"OptimizeBL:s"=>\$FORM{OptimizeBL},
	58	"UseGamma:s"=>\$FORM{UseGamma},
	59	# "OptAlpha:s"=>\$FORM{OptAlpha},
	60	"Alpha:i"=>\$FORM{Alpha},
	61	"jointReconstruction:s"=>\$FORM{JointReconstruction},
	62	"indelReconstruction:s"=>\$FORM{IndelReconstructionMethod}, #Parsimony\|ML
	63	"RunNum:i"=>\$VARS{RunNumber}, # RELEVANT FOR SERVER ONLY
	64	"isServer:s"=>\$VARS{isServer}, # RELEVANT FOR SERVER ONLY
	65	"indelCutOff:f"=>\$FORM{IndelsCutoff},
	66	"DEBUG:s"=>\$FORM{DEBUG} # YES \| NO
	67	);
	68
	69	$FORM{JointReconstruction}=uc($FORM{JointReconstruction});
	70	$FORM{UseGamma}=uc($FORM{UseGamma});
	71	$FORM{OptimizeBL}=uc($FORM{OptimizeBL});
	72	$FORM{TreeAlg}=uc($FORM{TreeAlg});
	73	$FORM{DEBUG}=uc($FORM{DEBUG});
	74	$FORM{IndelReconstructionMethod}=uc($FORM{IndelReconstructionMethod});
	75
	76	die "ERROR: No path for output\n" if ($FORM{outDir} eq "");
	77	die "ERROR: MSA_File is requiered\n" if ($FORM{MSA_File} eq "");
	78
	79	$FORM{seqType}=lc ($FORM{seqType});
	80	die "ERROR: seqType must be aa or nuc or codon - NOT $FORM{seqType}\n" if (($FORM{seqType} ne "aa") and ($FORM{seqType} ne "codon") and ($FORM{seqType} ne "nuc"));
	81	unless ($FORM{outDir} =~ m/\/$/) {
	82	$FORM{outDir} .= "/";
	83	}
	84	print "outDir: $FORM{outDir}\n";
	85	unless (-e $FORM{outDir}) {
	86	mkdir ($FORM{outDir});
	87	}
	88
	89	if (!defined $FORM{SubMatrix}) # assign default
	90	{
	91	if ($FORM{seqType} eq "aa") {$FORM{SubMatrix}="JTT"; print "SubMatrix=JTT (default)\n";}
	92	elsif ($FORM{seqType} eq "nuc") {$FORM{SubMatrix}="JC_Nuc"; print "SubMatrix=JC_Nuc (default)\n";}
	93	elsif ($FORM{seqType} eq "codon") {$FORM{SubMatrix}="yang"; print "SubMatrix=yang (default)\n";}
	94	}
	95
	96	if (($FORM{Tree} ne "NA") and ($FORM{TreeAlg} ne "NA"))
	97	{
	98	die "ERROR: Notice, only --Tree or --TreeAlg should be provided, not both...\n";
	99	}
	100	if (($FORM{Tree} ne "NA") and (!-e $FORM{Tree}))
	101	{
	102	die "ERROR: The tree file '$FORM{Tree}' does not exists...\n";
	103	}
	104
	105	if (($FORM{IndelsCutoff}<0) or ($FORM{IndelsCutoff}>1))
	106	{
	107	die "ERROR: The --indelCutOff must be between 0 and 1...\n";
	108	}
	109	if (($FORM{IndelReconstructionMethod} ne "BOTH") and ($FORM{IndelReconstructionMethod} ne "PARSIMONY") and ($FORM{IndelReconstructionMethod} ne "ML"))
	110	{
	111	die "ERROR: The --indelReconstruction must be ML or PARSIMONY or BOTH Only...\n";
	112	}
	113	# Assign other defaults
	114	$VARS{Aln_format}="FASTA";
	115	$FORM{TreeAlg}="NJ" if ($FORM{TreeAlg} eq "NA");
	116	###### here are the name of the result files.
	117
	118	###### tree file output in Newick format:
	119	$VARS{tree_newick} = "tree.newick.txt";
	120
	121	###### ree file output in ANCESTOR format:
	122	$VARS{tree_ancestor} = "tree.ancestor.txt";
	123
	124	###### joint sequences output file:
	125	$VARS{seq_joint} = "seq.joint.txt";
	126	###### marginal sequences output file:
	127	$VARS{seq_marginal} = "seq.marginal.txt";
	128	###### joint probabilities output file:
	129	$VARS{prob_joint} = "prob.joint.txt";
	130
	131	###### marginal probabilities output file:
	132	$VARS{prob_marginal} = "prob.marginal.txt";
	133	$VARS{prob_marginal_csv} = "prob.marginal.csv";
	134	$VARS{log_likelihood_prob_marginal_csv}="LogLikelihood_prob.margianl.csv";
	135
	136	# Indel Reconstructions
	137	# Likelihood
	138	$VARS{marginal_seq_chars_and_indel}="seq.marginal_IndelAndChars.txt";
	139	$VARS{marginal_prob_chars_and_indel}="Ancestral_MaxMarginalProb_Char_Indel.txt";
	140	$VARS{marginal_indel_prob}="IndelsMarginalProb.txt";
	141	# Parsimony
	142	$VARS{marginal_prob_chars_and_parsimony_indels}="Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt";
	143	$VARS{marginal_seq_chars_and_parsimony_indels}="seq.marginal_Chars_ParsimonyIndels.txt";
	144	$VARS{parsimony_indels}="Indels.parsimony.txt";
	145	###### JalView Ouputs
	146	$VARS{JalViewMarginalFeaturesFile}="JalView_Features_Marginal_Prob";
	147	$VARS{seq_marginal_JalView}="seq.marginal_NO_IndelReconstruction_JalView.$VARS{Aln_format}".".aln";
	148	$VARS{Tree_JalView}="tree.JalView.newick";
	149	$VARS{JalView_Marginal_Reconstruction}="JalViewMarginal_Seq_Reconstruction_NO_IndelReconstruction.html" if ($VARS{isServer} eq "YES");
	150	$VARS{JalView_Marginal_Reconstruction}="JalViewMarginal_Seq_Reconstruction_NO_IndelReconstruction.jnlp" if ($VARS{isServer} eq "NO");
	151	##Chars and Indels
	152	# ML BASED
	153	$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile}="JalView_Features_CharsAndIndels_Marginal_Prob";
	154	$VARS{seq_marginal_Chars_and_Indels_JalView}="seq.marginal_CharsAndIndels_JalView.$VARS{Aln_format}".".aln";
	155	if ($VARS{isServer} eq "YES")
	156	{
	157	$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}="JalViewMarginal_CharsAndIndels_Reconstruction.html";
	158	}
	159	else
	160	{
	161	$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}="JalViewMarginal_CharsAndIndels_Reconstruction.jnlp";
	162	}
	163	# ML CHARS PARSIMONY INDELS
	164	$VARS{seq_marginal_chars_and_parsimony_indels_JalView}="seq.marginal_Chars_ParsimonyIndels_JalView.$VARS{Aln_format}".".aln";
	165	$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile}="JalView_Features_Marginal_Prob_Chars_And_Parsimony_Indels";
	166	if ($VARS{isServer} eq "YES")
	167	{
	168	$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}="JalViewMarginal_Chars_And_Parsimony_Indels_Reconstruction.html";
	169	}
	170	else
	171	{
	172	$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}="JalViewMarginal_Chars_And_Parsimony_Indels_Reconstruction.jnlp";
	173	}
	174	# Joint reconstruction
	175	$VARS{JalViewJointAnnotationGraphFile}="JalView_Annotation_Graph_Joint_Prob";
	176	$VARS{seq_joint_JalView}="seq.joint_JalView.$VARS{Aln_format}".".aln";
	177	if ($VARS{isServer} eq "YES")
	178	{
	179	$VARS{JalView_Joint_Reconstruction}="JalViewJoint_Reconstruction.html";
	180	}
	181	else
	182	{
	183	$VARS{JalView_Joint_Reconstruction}="JalViewJoint_Reconstruction.jnlp";
	184	}
	185
	186	###### here we set the html output file (where links to all files will be)
	187	if ($VARS{isServer} eq "NO")
	188	{
	189	$VARS{OutHtmlFile} = "output.html";
	190	}
	191	else
	192	{
	193	$VARS{OutHtmlFile} = "output.php";
	194	}
	195
	196	#TO DO
	197
	198	# Convert sequence names to num to avoid problems with RAxML and LIB
	199
	200	if ($VARS{isServer} eq "NO")
	201	# Copy input files to the running dir and work on them from now on
	202	{
	203	copy ($FORM{MSA_File},$FORM{outDir});
	204	my ($MSA_FileName,$MSA_dir)=fileparse($FORM{MSA_File});
	205	$FORM{MSA_File}=$FORM{outDir}.$MSA_FileName;
	206	print "Copy and analyse MSA: $FORM{MSA_File}\n";
	207	if (-e $FORM{Tree})
	208	{
	209	copy ($FORM{Tree},$FORM{outDir});
	210	my ($Tree_FileName,$Tree_dir)=fileparse($FORM{Tree});
	211	$FORM{Tree}=$FORM{outDir}.$Tree_FileName;
	212	print "Copy and analyse tree: $FORM{Tree}\n";
	213	}
	214	}
	215	my %SeqNamesToCode=();
	216	my %CodeToSeqName=();
	217	my ($SeqNamesToCode,$CodeToSeqName)=MSASeqNamesToCode($FORM{MSA_File},$FORM{outDir});
	218	TreeNamesToCodes ($FORM{Tree},$SeqNamesToCode) if (-e $FORM{Tree});
	219	%CodeToSeqName=%$CodeToSeqName;
	220	%SeqNamesToCode=%$SeqNamesToCode;
	221
	222	################
	223
	224	if ($FORM{Tree} ne "NA")
	225	{
	226	$VARS{UserProvideTree}="YES";
	227	}
	228	else
	229	{
	230	$VARS{UserProvideTree}="NO";
	231	if ($FORM{TreeAlg} eq "RAXML")
	232	{
	233	$VARS{RAxML_Tree}="RAxML_tree.newick";
	234	}
	235	}
	236	if ($VARS{isServer} eq "YES")
	237	{
	238	$VARS{All_Outputs_Zip}="FASTML_run_".$VARS{RunNumber}.".zip"; # All Outputs ZIP
	239	$VARS{logs_dir} = GENERAL_CONSTANTS::SERVERS_LOGS_DIR."fastml/" if ($VARS{isServer} eq "YES");
	240	$VARS{OutLogFile} = $VARS{logs_dir}.$VARS{RunNumber}.".log";
	241	###### WWWdir is where the web=page is.
	242	$VARS{WWWdir} = GENERAL_CONSTANTS::FASTML_URL."results/" .$VARS{RunNumber}. "/"; #XMXMXMXMX
	243	$VARS{run_url} = $VARS{WWWdir}.$VARS{OutHtmlFile};
	244	###### here we set the reload interval (in seconds).
	245	$VARS{reload_interval} = 30;
	246	###### here we set the email of the server - for problems...
	247	$VARS{DEVELOPER_MAIL} = GENERAL_CONSTANTS::ADMIN_EMAIL;
	248	$VARS{UserMailFile}=$FORM{outDir}."user_email.txt";
	249	$VARS{DevMail} = "\"mailto:$VARS{DEVELOPER_MAIL}?subject=Fastml%20Run%20No.:%20$VARS{RunNumber}\"";
	250	$VARS{ContactDef} = "\n<H3><center>For assistance please <a href=$VARS{DevMail}>contact us</a> and mention this number: $VARS{RunNumber}</H3>\n";
	251	###### this are the name of the program.
	252	# $VARS{fastml} = "/bioseq/pupkoSVN/tags/fastml.v2.05/programs/fastml/fastml"; # TO DO
	253	# $VARS{fastml} = "/groups/pupko/haim/pupkoSVN/trunk/programs/fastml/fastml"; # TO DO
	254	$VARS{fastml} = "/bioseq/FastML/fastml";
	255	$VARS{Indel_Reconstruction} = "/bioseq/FastML/IndelReconstruction/IndelReconstruct.pl"; # TO DO
	256	$VARS{RAxML} = "/bioseq/FastML/BuildRaxMLTree.pl"; # TO DO
	257	###### Send mail Global VARS
	258	$VARS{send_email_dir} = GENERAL_CONSTANTS::SEND_EMAIL_DIR_IBIS;
	259	$VARS{smtp_server} = GENERAL_CONSTANTS::SMTP_SERVER;
	260	$VARS{userName} = GENERAL_CONSTANTS::ADMIN_USER_NAME;
	261	$VARS{userPass} = GENERAL_CONSTANTS::ADMIN_PASSWORD;
	262	my $estimated_run_time=estimate_run_time($FORM{MSA_File},$FORM{seqType},$VARS{UserProvideTree},$FORM{UseGamma});
	263
	264	# UPDATE STATE
	265	open OUTPUT, "$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	266	my @OUTPUT=<OUTPUT>;
	267	close (OUTPUT);
	268	my $currentTime=time;
	269	print "CURRENT TIME:$currentTime\n";#<STDIN>;
	270	open (SUBMITING_TIME,">$FORM{outDir}SUBMISSION_TIME");
	271	print SUBMITING_TIME $currentTime;
	272	close (SUBMITING_TIME);
	273	open (STATUS,">$FORM{outDir}QUEUE_STATUS");
	274	print STATUS "Running";
	275	close (STATUS);
	276	open (OUTPUT, ">$FORM{outDir}$VARS{OutHtmlFile}") \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	277	foreach my $line (@OUTPUT)
	278	{
	279	if ($line=~/QUEUED/)
	280	{
	281	$line=~s/QUEUED/RUNNING/;
	282	print OUTPUT $line;
	283	}
	284	elsif ($line=~/The time that passed since submitting the query is:/)
	285	{
	286	$line=~s/The time that passed since submitting the query is:/Running time is:/;
	287	print OUTPUT "$line";
	288	}
	289	elsif ($line=~/\<!-- HERE WILL COME ESTIMATED TIME --\>/)
	290	{
	291	print OUTPUT "<font size=\"4\">Estimated running time:<b> $estimated_run_time</b></font><br>\n";
	292	}
	293	else
	294	{
	295	print OUTPUT $line;
	296	}
	297	}
	298	close (OUTPUT);
	299	}
	300	else
	301	{
	302	$VARS{logs_dir} = $FORM{outDir};
	303	$VARS{OutLogFile} = $FORM{outDir}."FastML_log.log";
	304	###### this are the name of the program
	305	# $VARS{fastml} = "/bioseq/pupkoSVN/tags/fastml.v2.05/programs/fastml/fastml";
	306	$VARS{fastml} = "$Bin/../../programs/fastml/fastml";
	307	# $VARS{fastml} = "/groups/pupko/haim/pupkoSVN/trunk/programs/fastml/fastml";
	308	$VARS{Indel_Reconstruction} = "$Bin/IndelReconstruction_Wrapper.pl";
	309	# $VARS{Indel_Reconstruction} = "/bioseq/FastML/IndelReconstruction/IndelReconstruct.pl";
	310	$VARS{RAxML} = "$Bin/BuildRaxMLTree.pl";
	311	# $VARS{RAxML} = "/bioseq/FastML/BuildRaxMLTree.pl";
	312	$VARS{DEVELOPER_MAIL} = GENERAL_CONSTANTS::ADMIN_EMAIL;
	313	$VARS{DevMail} = "\"mailto:$VARS{DEVELOPER_MAIL}?subject=FastML\"";
	314
	315	# VALIDATION FOR COMMAND LINE
	316	removeEndLineExtraChars($FORM{MSA_File});
	317	## TO DO - ADD MORE FROM THE CGI
	318	}
	319
	320	###### here we set the error definitions.
	321
	322	$VARS{ErrorDef} = "<font size=+3 color='red'>ERROR! FASTML session has been terminated: </font>";
	323	$VARS{SysErrorDef} = "<p><font size=+3 color='red'>SYSTEM ERROR - FASTML session has been terminated!</font><br><b>Please wait for a while and try to run FASTML again</b></p>\n";
	324	print "LOG: $VARS{OutLogFile}\n";
	325	open (LOG,">>$VARS{OutLogFile}") \|\| exit_on_error('sys_error', "Can't open Log File: $VARS{OutLogFile} $!");
	326	print LOG "\n\n========================================= NEW FASTML RUN STARTED ===========================================\n";
	327	print LOG "COMMAND: perl $0 "."@ARGV_forPrint"."\n";
	328	print LOG "FULL RUNNING PARAMETERS (INCLUDING DEFAULTS):\n--MSA_File $FORM{MSA_File} --outDir $FORM{outDir} --seqType $FORM{seqType} --Tree $FORM{Tree} --TreeAlg $FORM{TreeAlg} --SubMatrix $FORM{SubMatrix} --OptimizeBL $FORM{OptimizeBL} --UseGamma $FORM{UseGamma} --Alpha $FORM{Alpha} --jointReconstruction $FORM{JointReconstruction} --indelReconstruction $FORM{IndelReconstructionMethod} --indelCutOff $FORM{IndelsCutoff}\n";
	329
	330	open OUTPUT, ">>$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	331	print OUTPUT "<h4><font face=Verdana><u>Running Messages:</u></h4></font>\n";
	332	close OUTPUT;
	333	if (($VARS{UserProvideTree} eq "NO") and ($FORM{TreeAlg} eq "RAXML"))
	334	{
	335	if ($VARS{isServer} eq "YES")
	336	{
	337	open OUTPUT, ">>$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	338	print_message_to_output("Generating the phylogenetic tree using RAxML");
	339	close (OUTPUT);
	340	}
	341	RAxML();
	342	$FORM{Tree}="$FORM{outDir}$VARS{RAxML_Tree}";
	343	}
	344	RunFastML();
	345
	346	$FORM{Tree}="$FORM{outDir}$VARS{tree_newick}" if ($FORM{Tree} eq "NA");
	347	# Check if there are indels to reconstruct
	348	$VARS{AreThereIndels}=AreThereIndels($FORM{MSA_File});
	349	if ($VARS{AreThereIndels} eq "YES")
	350	{
	351	open OUTPUT, ">>$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	352	print_message_to_output("Ancestral reconstruction of indels");
	353	close (OUTPUT);
	354	IndelReconstruction($FORM{MSA_File},$FORM{Tree},$FORM{outDir},$FORM{IndelsCutoff});
	355	}
	356
	357
	358	#### BRING BACK NAMES
	359	#TreeCodesToNames($FORM{Tree},$CodeToSeqName);
	360	#TreeCodesToNames("$FORM{outDir}/$VARS{tree_newick}",$CodeToSeqName);
	361	TreeCodesToNamesShort($FORM{Tree},$CodeToSeqName);
	362	TreeCodesToNamesShort("$FORM{outDir}/$VARS{tree_newick}",$CodeToSeqName);
	363	MSACodesToNames($FORM{MSA_File},$CodeToSeqName);
	364
	365	MSACodesToNames("$FORM{outDir}/$VARS{seq_marginal}",$CodeToSeqName);
	366	MSACodesToNames("$FORM{outDir}/$VARS{marginal_seq_chars_and_indel}",$CodeToSeqName) if ($VARS{AreThereIndels} eq "YES");
	367	if((($FORM{IndelReconstructionMethod} eq "PARSIMONY") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"))
	368	{
	369	# print "MSACodesToNames($FORM{outDir}/$VARS{marginal_seq_chars_and_parsimony_indels},$CodeToSeqName);\n";<STDIN>;
	370	MSACodesToNames("$FORM{outDir}/$VARS{marginal_seq_chars_and_parsimony_indels}",$CodeToSeqName);
	371	TabDelFileCodesToNames("$FORM{outDir}/$VARS{marginal_prob_chars_and_parsimony_indels}",1,$CodeToSeqName);
	372	TabDelFileCodesToNames("$FORM{outDir}/$VARS{parsimony_indels}",1,$CodeToSeqName);
	373	}
	374
	375	#TabDelFileCodesToNames();
	376	TabDelFileCodesToNames("$FORM{outDir}/$VARS{marginal_prob_chars_and_indel}",1,$CodeToSeqName) if ((($FORM{IndelReconstructionMethod} eq "ML") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	377
	378	#CommaDelFileCodesToNames("$FORM{outDir}/$VARS{prob_marginal_csv}",0,$CodeToSeqName);
	379	TabDelFileCodesToNames("$FORM{outDir}/$VARS{marginal_indel_prob}",1,$CodeToSeqName) if ((($FORM{IndelReconstructionMethod} eq "ML") \|\| ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	380
	381	# print_message_to_output ("<A HREF= $VARS{marginal_prob_chars_and_indel} TARGET=_blank> The probabilities of the marginal reconstruction (including ancestral reconstruction of indels)</A>") if (($FORM{IndelReconstructionMethod} eq "ML") and ($VARS{AreThereIndels} eq "YES"));
	382	# print_message_to_output ("<A HREF= $VARS{prob_marginal} TARGET=_blank> The probabilities of the marginal reconstruction (without ancestral reconstruction of indels)</A>");
	383	# print_message_to_output ("<A HREF= $VARS{marginal_indel_prob} TARGET=_blank> The probabilities of the marginal reconstruction for indels</A>") if (($FORM{IndelReconstructionMethod} eq "ML") and ($VARS{AreThereIndels} eq "YES"));
	384
	385	if ($FORM{JointReconstruction} eq "YES"){
	386	MSACodesToNames("$FORM{outDir}/$VARS{seq_joint}",$CodeToSeqName);
	387	}
	388
	389	AncestorFileCodesToNames("$FORM{outDir}/$VARS{tree_ancestor}",$CodeToSeqName);
	390	# print_message_to_output ("<A HREF= $VARS{tree_ancestor} TARGET=_blank> Tree in Ancestor format</A>");
	391
	392
	393
	394
	395	#### OUTPUTS
	396	MakeJalViewOutputs();
	397	ExtractAncestralProbPerNodePerSite("$FORM{outDir}$VARS{prob_marginal}","$FORM{outDir}$VARS{prob_marginal_csv}",$FORM{seqType});
	398	ExtractAncestralLogLikelihoodPerNodePerSite("$FORM{outDir}$VARS{prob_marginal}","$FORM{outDir}$VARS{log_likelihood_prob_marginal_csv}",$FORM{seqType});
	399	ZipAllOutputs() if ($VARS{isServer} eq "YES");
	400	OrganizeOutputs();
	401	#print to output
	402	open OUTPUT, ">>$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	403	print OUTPUT "\n<H1><center><a name=finish> FastML has finished.</a></center></H1><br><br>\n";
	404	print OUTPUT "<p><A HREF= $VARS{JalView_Marginal_Chars_and_Indel_Reconstruction} TARGET=_blank> The sequences of the marginal reconstruction colored by probabilities with tree (with reconstruction of indels)</A></br>\n" if ((($FORM{IndelReconstructionMethod} eq "ML") \|\| ($FORM{IndelReconstructionMethod} eq "BOTH"))and ($VARS{AreThereIndels} eq "YES"));
	405	print OUTPUT "<p><A HREF= $VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction} TARGET=_blank> The sequences when using marginal probabilities to reconstruct characters and maximum parsimony to reconstruct indels colored by probabilities with tree (with reconstruction of indels)</A></br>\n" if ((($FORM{IndelReconstructionMethod} eq "PARSIMONY") \|\|($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	406	print OUTPUT "<p><A HREF= $VARS{JalView_Marginal_Reconstruction} TARGET=_blank> The sequences of the marginal reconstruction colored by probabilities with Tree (without reconstruction of indels)</A></br>\n";
	407	print OUTPUT "<p><A HREF= $VARS{JalView_Joint_Reconstruction} TARGET=_blank> The sequences of the joint reconstruction with probabilities and Tree (without reconstruction of indels)</A></br>\n" if ($FORM{JointReconstruction} eq "YES");
	408
	409	print OUTPUT "<br><br>\n";
	410	print OUTPUT "<br><br><h4><u>Output Files:</u></h4>\n";
	411	print_message_to_output ("<B>Download all FastML outputs in a <A HREF='$VARS{All_Outputs_Zip}'>click!</A><br><br></B>") if ($VARS{isServer} eq "YES");
	412	print OUTPUT "<span class=\"PrintOutH\"><div id=\"PrintOutH\">Marginal reconstruction</span></div>\n";
	413	print OUTPUT "<br><span class=\"PrintOutH_3\"><div id=\"PrintOutH_3\">Sequences<br></span></div>\n";
	414	print_message_to_output ("<A HREF= $VARS{marginal_seq_chars_and_indel} TARGET=_blank> The sequences of the marginal reconstruction (including ancestral reconstruction of indels)</A>") if ((($FORM{IndelReconstructionMethod} eq "ML") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	415	print_message_to_output ("<A HREF= $VARS{marginal_seq_chars_and_parsimony_indels} TARGET=_blank> The sequences when using marginal probability to reconstruct characters and maximum parsimony to reconstruct indels</A>") if((($FORM{IndelReconstructionMethod} eq "PARSIMONY") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	416	print_message_to_output ("<A HREF= $VARS{seq_marginal} TARGET=_blank> The sequences of the marginal reconstruction (without ancestral reconstruction of indels)</A>");
	417	if ($VARS{isServer} eq "YES")
	418	{
	419	print_message_to_output ("<form enctype=\"multipart/form-data\" action=\"http://guidance.tau.ac.il/make_logo.php\" method=\"post\">\n Create a logo of the posterior probability at ancestral node ".print_make_logo_selection_box("$FORM{outDir}$VARS{prob_marginal_csv}",$FORM{seqType}));
	420	print_message_to_output ("<form enctype=\"multipart/form-data\" action=\"http://guidance.tau.ac.il/generate_kMostProbSeq.php\" method=\"post\">\nGenerate <select name=\"k\" id=\"k\"><option value=\"10\">10</option><option value=\"25\">25</option><option value=\"50\">50</option><option value=\"100\">100</option><option value=\"250\">250</option><option value=\"500\">500</option><option value=\"1000\">1000</option></select> most likely ancestral sequences for ancestral node ".print_make_kMostProbSeq_selection_box("$FORM{outDir}$VARS{log_likelihood_prob_marginal_csv}",$FORM{seqType}));
	421	print_message_to_output ("<form enctype=\"multipart/form-data\" action=\"http://guidance.tau.ac.il/SampleSeqFromProb.php\" method=\"post\">\n Sample <select name=\"NumOfSeq\" id=\"NumOfSeq\"><option value=\"10\">10</option><option value=\"25\">25</option><option value=\"50\">50</option><option value=\"100\">100</option><option value=\"250\">250</option><option value=\"500\">500</option><option value=\"1000\">1000</option><option value=\"2000\">2000</option><option value=\"3000\">3000</option><option value=\"4000\">4000</option><option value=\"5000\">5000</option></select> sequences from the posterior distribution for ancestral node ".print_SampleSeq_selection_box("$FORM{outDir}$VARS{prob_marginal_csv}",$FORM{seqType}));
	422	}
	423
	424	print OUTPUT "<br><span class=\"PrintOutH_3\"><div id=\"PrintOutH_3\">Probabilities<br></span></div>\n";
	425	print_message_to_output ("<A HREF= $VARS{marginal_prob_chars_and_indel} TARGET=_blank> The probabilities of the marginal reconstruction (including ancestral reconstruction of indels)</A>") if ((($FORM{IndelReconstructionMethod} eq "ML") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	426	print_message_to_output ("<A HREF= $VARS{prob_marginal_csv} TARGET=_blank> The probabilities of the marginal reconstruction (without ancestral reconstruction of indels) - csv format</A>");
	427	print_message_to_output ("<A HREF= $VARS{marginal_indel_prob} TARGET=_blank> The probabilities of the marginal reconstruction for indels</A>") if ((($FORM{IndelReconstructionMethod} eq "ML") or ($FORM{IndelReconstructionMethod} eq "BOTH")) and ($VARS{AreThereIndels} eq "YES"));
	428
	429	if ($FORM{JointReconstruction} eq "YES"){
	430	print OUTPUT "<span class=\"PrintOutH\"><div id=\"PrintOutH\">Joint reconstruction</span></div>\n";
	431	print_message_to_output ("<A HREF= $VARS{seq_joint} TARGET=_blank> The sequences of the joint reconstruction</A>");
	432	print_message_to_output ("<A HREF= $VARS{prob_joint} TARGET=_blank> The probabilities of the joint reconstruction</A>");
	433	}
	434
	435	print OUTPUT "<span class=\"PrintOutH\"><div id=\"PrintOutH\">Phylogenetic tree</span></div>\n";
	436	print_message_to_output ("<A HREF= $VARS{tree_newick} TARGET=_blank> Tree in Newick format</A>");
	437	print_message_to_output ("<A HREF= $VARS{tree_ancestor} TARGET=_blank> Tree in Ancestor format</A>");
	438
	439	print OUTPUT "<br><br>\n";
	440
	441	print OUTPUT "\n<br><br><p><center>Please <a href= $VARS{DevMail}>report any problem</a> in case of need.</center></p>\n";
	442	open (END_OK,">$FORM{outDir}"."FASTML_$VARS{RunNumber}".".END_OK");
	443	close (END_OK);
	444	close LOG;
	445	close OUTPUT;
	446
	447	if ($VARS{isServer} eq "YES")
	448	{
	449	sleep(25);
	450
	451	# UPDATE HEADER that the job has finished
	452	open OUTPUT, "<$FORM{outDir}$VARS{OutHtmlFile}";
	453	my @output = <OUTPUT>;
	454	close OUTPUT;
	455	open OUTPUT, ">$FORM{outDir}$VARS{OutHtmlFile}";
	456	foreach my $line (@output){
	457	if ($line =~ /FastML job status page/i){ #finds the phrase "FATML" job status page, case-insensitive
	458	print OUTPUT "<H1 align=center>FastML Job Status Page - <font color='red'>FINISHED</font></h1>\n";
	459	}
	460	elsif ($line =~/Queued/)
	461	{
	462	$line=~s/Queued/Finished/;
	463	print OUTPUT $line;
	464	}
	465	elsif ($line =~ /REFRESH/ or $line =~ /NO-CACHE/){next;}
	466	else {
	467	print OUTPUT $line;
	468	}
	469	}
	470	close OUTPUT;
	471	unlink ("$FORM{outDir}QUEUE_STATUS");
	472	&send_finish_email_to_user();
	473	}
	474
	475	################################################ SUBROUTINES #############################################
	476
	477	#---------------------------------------------
	478	sub OrganizeOutputs
	479	{
	480	my ($Tree_FileName,$Tree_dir)=fileparse($FORM{Tree});
	481	my ($MSA_FileName,$MSA_dir)=fileparse($FORM{MSA_File});
	482	my @DebugFiles=($Tree_FileName.".ORIG_NAMES",$Tree_FileName.".ForIndelReconstruction",$Tree_FileName.".CODES",$VARS{tree_newick}.".CODES",$MSA_FileName.".ORIG_NAMES",$MSA_FileName.".CODES","SeqCodes",$VARS{seq_marginal}.".CODES",$VARS{marginal_seq_chars_and_indel}.".CODES",$VARS{marginal_prob_chars_and_indel}.".CODES",$VARS{marginal_indel_prob}.".CODES",$VARS{seq_joint}.".CODES",$VARS{tree_ancestor}.".CODES",$VARS{seq_marginal_JalView}.".CODES",$VARS{seq_marginal_Chars_and_Indels_JalView}.".CODES",$VARS{Tree_JalView}.".CODES",$VARS{seq_joint_JalView}.".CODES",$VARS{marginal_prob_chars_and_parsimony_indels}.".CODES",$VARS{marginal_seq_chars_and_parsimony_indels}.".CODES",$VARS{parsimony_indels}.".CODES",$VARS{seq_marginal_chars_and_parsimony_indels_JalView}.".CODES");
	483	my @JalViewFiles=($VARS{JalViewMarginalFeaturesFile},$VARS{seq_marginal_JalView},$VARS{Tree_JalView},$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile},$VARS{seq_marginal_Chars_and_Indels_JalView},$VARS{JalViewJointAnnotationGraphFile},$VARS{seq_joint_JalView},$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction},$VARS{JalView_Joint_Reconstruction},$VARS{JalView_Marginal_Reconstruction},$VARS{seq_marginal_chars_and_parsimony_indels_JalView},$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile},$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction});
	484
	485	my @FilesWithParsimonyIndels=($VARS{marginal_prob_chars_and_parsimony_indels},$VARS{marginal_seq_chars_and_parsimony_indels},$VARS{parsimony_indels});
	486	my @FilesWithMLIndels=($VARS{marginal_seq_chars_and_indel},$VARS{marginal_prob_chars_and_indel},$VARS{marginal_indel_prob},$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile},$VARS{seq_marginal_Chars_and_Indels_JalView},$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction});
	487	my @Logs_files=("IndelReconstruction.log","log.txt");
	488
	489	if ($FORM{IndelReconstructionMethod} eq "PARSIMONY")
	490	{
	491	foreach my $file (@FilesWithMLIndels)
	492	{
	493	unlink ("$FORM{outDir}$file");
	494	}
	495	}
	496	if ($FORM{IndelReconstructionMethod} eq "ML")
	497	{
	498	foreach my $file (@FilesWithParsimonyIndels)
	499	{
	500	unlink ("$FORM{outDir}$file");
	501	}
	502	}
	503	if ($FORM{DEBUG} eq "YES")
	504	{
	505	my $ForDebugDir="$FORM{outDir}/FilesForDebug/";
	506	mkdir ("$ForDebugDir") if (!-e "$ForDebugDir");
	507	foreach my $file (@DebugFiles)
	508	{
	509	move ("$FORM{outDir}$file",$ForDebugDir) if (-e "$FORM{outDir}$file");
	510	}
	511	}
	512	else # delete files
	513	{
	514	foreach my $file (@DebugFiles)
	515	{
	516	unlink ("$FORM{outDir}$file");
	517	}
	518	}
	519	if ($VARS{isServer} eq "NO")
	520	{
	521	# JALVEIW RELATED FILES
	522	my $ForJalViewDir="$FORM{outDir}/FilesForJalView/";
	523	mkdir ($ForJalViewDir) if (!-e $ForJalViewDir);
	524	foreach my $file (@JalViewFiles)
	525	{
	526	move ("$FORM{outDir}$file",$ForJalViewDir) if (-e "$FORM{outDir}$file");
	527	}
	528	}
	529	my $test_gzip=`which gzip`;
	530	if (($test_gzip !~/not found/) and ($test_gzip!~/^\s+$/))
	531	{
	532	foreach my $file (@Logs_files)
	533	{
	534	system ("gzip $FORM{outDir}$file");
	535	}
	536	}
	537	}
	538	#---------------------------------------------
	539	sub ZipAllOutputs
	540	{
	541
	542	#Marginal reconstruction
	543	system ("cp $VARS{marginal_seq_chars_and_indel} $FORM{outDir}sequences_of_the_marginal_reconstruction_including_indels.fas");
	544	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} sequences_of_the_marginal_reconstruction_including_indels.fas; rm -f $FORM{outDir}sequences_of_the_marginal_reconstruction_including_indels.fas");
	545
	546	if(($FORM{IndelReconstructionMethod} eq "PARSIMONY") and ($VARS{AreThereIndels} eq "YES"))
	547	{
	548	system ("cp $VARS{marginal_prob_chars_and_parsimony_indels} $FORM{outDir}sequences_of_marginal_reconstruction_characters_and_parsimony_indels.fas");
	549	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} sequences_of_marginal_reconstruction_characters_and_parsimony_indels.fas; rm -f $FORM{outDir}sequences_of_marginal_reconstruction_characters_and_parsimony_indels.fas");
	550	}
	551
	552	system ("cp $VARS{seq_marginal} $FORM{outDir}sequences_of_the_marginal_reconstruction_without_reconstruction_of_indels.fas");
	553	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} sequences_of_the_marginal_reconstruction_without_reconstruction_of_indels.fas; rm -f $FORM{outDir}sequences_of_the_marginal_reconstruction_without_reconstruction_of_indels.fas");
	554
	555
	556	#Probabilities
	557	if (($FORM{IndelReconstructionMethod} eq "ML") and ($VARS{AreThereIndels} eq "YES"))
	558	{
	559	system ("cp $VARS{marginal_prob_chars_and_indel} $FORM{outDir}Max_probabilities_of_marginal_reconstruction_with_indels.txt");
	560	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} Max_probabilities_of_marginal_reconstruction_with_indels.txt; rm -f $FORM{outDir}Max_probabilities_of_marginal_reconstruction_with_indels.txt");
	561	}
	562
	563	system ("cp $VARS{prob_marginal} $FORM{outDir}probabilities_of_the_marginal_reconstruction_without_indels.txt");
	564	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} probabilities_of_the_marginal_reconstruction_without_indels.txt; rm -f $FORM{outDir}probabilities_of_the_marginal_reconstruction_without_indels.txt");
	565
	566	if (($FORM{IndelReconstructionMethod} eq "ML") and ($VARS{AreThereIndels} eq "YES"))
	567	{
	568	system ("cp $VARS{marginal_indel_prob} $FORM{outDir}probabilities_of_the_marginal_reconstruction_for_indels.txt");
	569	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} probabilities_of_the_marginal_reconstruction_for_indels.txt; rm -f $FORM{outDir}probabilities_of_the_marginal_reconstruction_for_indels.txt");
	570	}
	571
	572	# Joint reconstruction
	573	if ($FORM{JointReconstruction} eq "YES")
	574	{
	575	system ("cp $VARS{seq_joint} $FORM{outDir}sequences_of_the_joint_reconstruction.fas");
	576	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} sequences_of_the_joint_reconstruction.fas; rm -f $FORM{outDir}sequences_of_the_joint_reconstruction.fas");
	577
	578	system ("cp $VARS{prob_joint} $FORM{outDir}probabilities_of_the_joint_reconstruction.txt");
	579	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} probabilities_of_the_joint_reconstruction.txt; rm -f $FORM{outDir}probabilities_of_the_joint_reconstruction.fas");
	580	}
	581
	582	#Phylogenetic tree
	583	system ("cp $VARS{tree_newick} $FORM{outDir}Tree.txt");
	584	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} Tree.txt; rm -f $FORM{outDir}Tree.txt");
	585
	586	system ("cp $VARS{tree_ancestor} $FORM{outDir}Tree_in_Ancestor_format.txt");
	587	system ("zip -r $FORM{outDir}$VARS{All_Outputs_Zip} Tree_in_Ancestor_format.txt; rm -f $FORM{outDir}Tree_in_Ancestor_format.txt");
	588
	589	}
	590
	591	#---------------------------------------------
	592	sub print_message_to_output{
	593	#---------------------------------------------
	594	my $msg = shift;
	595	print OUTPUT "\n<ul><li>$msg</li></ul>\n";
	596	}
	597
	598	sub print_make_logo_selection_box
	599	{
	600	my $Marginal_ProbFile_CSV=shift;
	601	my $SeqType=shift;
	602	open (MARGINAL_PROB,$Marginal_ProbFile_CSV) \|\| exit_on_error("sys_error","print_make_logo_selection_box:Can't open MARGINAL_PROB: '$Marginal_ProbFile_CSV' $!");
	603	my $return_str="";
	604	$return_str.="<input type=\"hidden\" name=\"run_num\" value=\"$VARS{RunNumber}\">\n";
	605	$return_str.="<input type=\"hidden\" name=\"MarginalProbFile\" value=\"$Marginal_ProbFile_CSV\"/>\n";
	606	$return_str.="<input type=\"hidden\" name=\"SeqType\" value=\"$SeqType\"/>\n";
	607	$return_str.="<select name=\"Node\" id=\"Node\">\n";
	608
	609	my %Nodes=();
	610	while (my $line=<MARGINAL_PROB>)
	611	{
	612	if ($line=~/N([0-9]+)/)
	613	{
	614	$Nodes{$1}=1;
	615	}
	616	}
	617	for my $Node ( sort {$a<=>$b} keys %Nodes) {
	618	$return_str.="<option value=\"N$Node\">N$Node</option>";
	619	}
	620	$return_str.="<input type=\"submit\" value=\"Make Logo\"/>\n";
	621	$return_str.="</form>\n";
	622	close (MARGINAL_PROB);
	623	return $return_str;
	624	}
	625
	626	sub print_make_kMostProbSeq_selection_box
	627	{
	628	my $LLMarginal_ProbFile_CSV=shift;
	629	my $SeqType=shift;
	630	open (MARGINAL_PROB,$LLMarginal_ProbFile_CSV) \|\| exit_on_error("sys_error","print_make_kMostProbSeq_selection_box:Can't open MARGINAL_PROB: '$LLMarginal_ProbFile_CSV' $!");
	631	my $return_str="";
	632	$return_str.="<input type=\"hidden\" name=\"run_num\" value=\"$VARS{RunNumber}\">\n";
	633	$return_str.="<input type=\"hidden\" name=\"LLMarginalProbFile\" value=\"$LLMarginal_ProbFile_CSV\"/>\n";
	634	$return_str.="<input type=\"hidden\" name=\"SeqType\" value=\"$SeqType\"/>\n";
	635	$return_str.="<select name=\"Node\" id=\"Node\">\n";
	636
	637	my %Nodes=();
	638	while (my $line=<MARGINAL_PROB>)
	639	{
	640	if ($line=~/N([0-9]+)/)
	641	{
	642	$Nodes{$1}=1;
	643	}
	644	}
	645	for my $Node ( sort {$a<=>$b} keys %Nodes) {
	646	$return_str.="<option value=\"N$Node\">N$Node</option>";
	647	}
	648	$return_str.="<input type=\"submit\" value=\"Generate Sequences\"/>\n";
	649	$return_str.="</form>\n";
	650	close (MARGINAL_PROB);
	651	return $return_str;
	652	}
	653	sub send_finish_email_to_user
	654	{
	655	# send mail
	656	if (-s $VARS{UserMailFile}){
	657	open USR_MAIL,$VARS{UserMailFile};
	658	my $recipient=<USR_MAIL>;
	659	chomp ($recipient);
	660	close USR_MAIL;
	661	my $email_subject;
	662	my $HttpPath=$VARS{run_url};
	663	$email_subject = "'Your FastML results for run number $VARS{RunNumber} are ready'";
	664	my $email_message = "'Hello,\\n\\nThe results for your FastML run are ready at:\\n".$HttpPath."\\n\\nRunning Parameters:\\n";
	665	if (-e "$FORM{outDir}JOB_TITLE")
	666	{
	667	open (JOB_TITLE,"$FORM{outDir}JOB_TITLE");
	668	my $JOB_TITLE_STR=<JOB_TITLE>;
	669	chomp ($JOB_TITLE_STR);
	670	$email_message.="Job Title:$JOB_TITLE_STR\\n";
	671	close (JOB_TITLE);
	672	}
	673	$email_message.="\\nPlease note: the results will be kept on the server for three months.\\n\\nThanks\\nFastML Team'";
	674
	675	my $msg = './sendEmail.pl -f \''.GENERAL_CONSTANTS::ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u '.$email_subject.' -xu '.$VARS{userName}.' -xp '.$VARS{userPass}.' -s '.$VARS{smtp_server}.' -m '.$email_message;
	676	# my $msg = "ssh bioseq\@lecs \"cd $VARS{send_email_dir}; ".'perl sendEmail.pl -f \''.GENERAL_CONSTANTS::ADMIN_EMAIL.'\' -t \''.$FORM{user_mail}.'\' -u '.$email_subject.' -xu '.$VARS{userName}.' -xp '.$VARS{userPass}.' -s '.$VARS{smtp_server}.' -m '.$email_message."\""; # TO ACTIVATE IF THE NODES IN CLUSTER FAILS TO COMMUNICATE WITH NET
	677	#if ($attach ne ''){$msg.=" -a $attach"; print LOG "sending $msg\n";}
	678	open LOG, ">>$VARS{OutLogFile}";
	679	print LOG "MESSAGE:$email_message\nCOMMAND:$msg\n";
	680	chdir $VARS{send_email_dir};
	681	my $email_system_return = `$msg`;
	682	unless ($email_system_return =~ /successfully/) {
	683	print LOG "send_mail: The message was not sent successfully. system returned: $email_system_return\n";
	684	}
	685	close (LOG);
	686	}
	687	}
	688
	689	sub ValidateInput
	690	{
	691	}
	692	sub RAxML
	693	{
	694	my $SubModelRaxML=uc($FORM{SubMatrix});
	695	$SubModelRaxML="GTRCAT" if (($FORM{SubMatrix} eq "yang") or ($FORM{SubMatrix} eq "goldman_yang") or ($FORM{SubMatrix} eq "empiriCodon") or ($FORM{SubMatrix} eq "JC_Nuc") or($FORM{SubMatrix} eq "T92") or($FORM{SubMatrix} eq "HKY") or($FORM{SubMatrix} eq "GTR"));
	696	my $RAxML_cmd="cd $FORM{outDir};perl $VARS{RAxML} $FORM{MSA_File} $VARS{RAxML_Tree} $FORM{outDir} $SubModelRaxML";
	697	print LOG "RAxML: running: $RAxML_cmd\n";
	698	print "$RAxML_cmd\n";# <STDIN>;# DEBUG
	699	system($RAxML_cmd);
	700
	701	if (!-e "$FORM{outDir}$VARS{RAxML_Tree}")
	702	{
	703	exit_on_error("sys_error","RAxML: failed to create the output file: '$FORM{outDir}$VARS{RAxML_Tree}'");
	704	}
	705
	706	}
	707	sub RunFastML
	708	{
	709	my %MatrixHash = (JTT => '-mj',
	710	LG => '-ml',
	711	mtREV => '-mr',
	712	cpREV => '-mc',
	713	WAG => '-mw',
	714	Dayhoff => '-md',
	715	yang => '-my',
	716	goldman_yang => '-mg',
	717	empiriCodon => '-me',
	718	JC_Nuc => '-mn',
	719	JC_AA => '-ma',
	720	T92 => '-mt',
	721	HKY => '-mh',
	722	GTR => '-mg'
	723	);
	724
	725	open OUTPUT, ">>$FORM{outDir}$VARS{OutHtmlFile}" \|\| exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!");
	726	print_message_to_output("Ancestral reconstruction of characters");
	727	close (OUTPUT);
	728	my $fastml_comm ="cd $FORM{outDir}; ".$VARS{fastml}." -s $FORM{MSA_File} $MatrixHash{$FORM{SubMatrix}} -qf";
	729	if ($VARS{UserProvideTree} eq "YES") { # was $treeFilePath ne "") {
	730	$fastml_comm = $fastml_comm." -t $FORM{Tree}";
	731	}
	732
	733	elsif (($VARS{UserProvideTree} eq "NO") and ($FORM{TreeAlg} eq "RAXML")) # Tree build by RAxML
	734	{
	735	$fastml_comm = $fastml_comm." -t $FORM{outDir}$VARS{RAxML_Tree}";
	736	}
	737	if ($FORM{OptimizeBL} eq "NO") {
	738	$fastml_comm = $fastml_comm." -b";
	739	}
	740	if (($VARS{UserProvideTree} eq "NO") and ($FORM{TreeAlg} eq "RAXML")) # Building Tree with RAxML - NO BL OPTIMIZATION
	741	{
	742	$fastml_comm = $fastml_comm." -b";
	743	}
	744	if ($FORM{UseGamma} eq "YES") {
	745	$fastml_comm = $fastml_comm." -g";
	746	if ($FORM{Alpha} ne "") {
	747	$fastml_comm = $fastml_comm." -p $FORM{Alpha}";
	748	}
	749	}
	750	if ($FORM{JointReconstruction} eq "NO") {
	751	$fastml_comm = $fastml_comm." -f";
	752	}
	753
	754	$fastml_comm = $fastml_comm." > $FORM{outDir}"."fastml.std";
	755	print LOG "\nRunFastML: running $fastml_comm\n";
	756	print "$fastml_comm\n";#<STDIN>;#DEBUG
	757	system ($fastml_comm);
	758
	759	# CHECK FOR ERRORS - TO DO VALIDATE FILE NAME
	760	my $found_error=0;
	761	if ((-e $FORM{outDir}."fastml.std") and ((-s $FORM{outDir}."fastml.std") > 0)){
	762	open STD, $FORM{outDir}."fastml.std";
	763	while (<STD>){
	764	if (/error/i){
	765	$found_error = 1;
	766	print LOG "\nAn error was found in file \"$FORM{outDir}fastml.std\":$_ \n";
	767	exit_on_error("sys_error","An error was found in file \"$FORM{outDir}fastml.std\"");
	768	last;
	769	}
	770	}
	771	close STD;
	772	}
	773	if(!-e $FORM{outDir}.$VARS{seq_marginal} or -z $FORM{outDir}.$VARS{seq_marginal} or !-e $FORM{outDir}.$VARS{prob_marginal} or -z $FORM{outDir}.$VARS{prob_marginal}){
	774	print LOG "\nThe file $VARS{seq_marginal} or $VARS{prob_marginal} was not created.\n";
	775	exit_on_error("sys_error","FASTML failed to run, one of the files $VARS{seq_marginal} or $VARS{prob_marginal} were not created...\n");
	776	}
	777	}
	778
	779	sub IndelReconstruction
	780	{
	781	my $MSA=shift;
	782	my $Tree=shift;
	783	my $OutDir=shift;
	784	my $IndelsCutoff=shift;
	785
	786	## TO DO: TO THINK IF WE WANT TO RECONSTRUCT INDELS ALWAYS WITH USER/RaxML provided tree or better to do it with FASTML output and than need to change it from $VARS{treePathOnServer}
	787	my $IndelReconstruction_cmd="cd $OutDir; perl $VARS{Indel_Reconstruction} --MSA_File $MSA --Tree_File $Tree --outDir $OutDir --seqType $FORM{seqType} --indelCutOff $IndelsCutoff ";
	788	$IndelReconstruction_cmd=$IndelReconstruction_cmd." --Debug" if ($FORM{DEBUG} eq "YES");
	789	$IndelReconstruction_cmd=$IndelReconstruction_cmd." > $OutDir/IndelReconstruction.log";
	790
	791	# my $IndelReconstruction_cmd="cd $OutDir;perl $VARS{Indel_Reconstruction} $OutDir $MSA $Tree $OutDir/IndelsMarginalProb.txt $OutDir/prob.marginal.txt $OutDir/seq.marginal_IndelAndChars.txt $OutDir/AncestralMaxMarginalProb_Char_Indel.txt $OutDir/Indels.parsimony $OutDir/AncestralMaxProbMarginal_Char_Parsimony_Indel.txt $OutDir/seq.marginal_Chars_ParsimonyIndels.txt $FORM{seqType} $IndelsCutoff > $OutDir/IndelReconstruction.log";
	792	print LOG "IndelReconstruction: $IndelReconstruction_cmd\n";
	793	print "$IndelReconstruction_cmd\n";# <STDIN>; #DEBUG
	794	system($IndelReconstruction_cmd);
	795	# CHECK FOR ERRORS
	796
	797	}
	798
	799	sub MakeJalViewOutputs # TO DO
	800	{
	801	# Prepare JalView Outputs
	802	# system ("cp $FORM{outDir}/$VARS{tree_newick}.CODES $FORM{outDir}/$VARS{Tree_JalView}");
	803	# TreeCodesToNamesShort("$FORM{outDir}/$VARS{Tree_JalView}",$CodeToSeqName);
	804	# FixTreeNamesForJalView("$FORM{outDir}/$VARS{Tree_JalView}");
	805
	806	my $ans=RemoveN_FormAncestralNames("$FORM{outDir}/$VARS{seq_marginal}.CODES","$FORM{outDir}/$VARS{seq_marginal_JalView}",$VARS{Aln_format},"$FORM{outDir}/$VARS{tree_newick}.CODES","$FORM{outDir}/$VARS{Tree_JalView}");
	807	if ($ans ne "OK"){exit_on_error("sys_error","RemoveN_FormAncestralNames($FORM{outDir}/$VARS{seq_marginal}.CODES,$FORM{outDir}/$VARS{seq_marginal_JalView},$VARS{Aln_format},$FORM{outDir}/$VARS{tree_newick}.CODES,$FORM{outDir}/$VARS{Tree_JalView}): FAILED: $ans");}
	808	MSACodesToNamesShort("$FORM{outDir}/$VARS{seq_marginal_JalView}",$CodeToSeqName);
	809	TreeCodesToNamesShort("$FORM{outDir}/$VARS{Tree_JalView}",$CodeToSeqName);
	810	# Fix JalView Species names - If species name only numbers add to it ID_
	811	# FixAlnNamesForJalView("$FORM{outDir}/$VARS{seq_marginal_JalView}");
	812
	813	$ans=make_Jalview_Features_MarginalProb("$FORM{outDir}/$VARS{prob_marginal}","$FORM{outDir}/$VARS{JalViewMarginalFeaturesFile}","$FORM{outDir}/$VARS{seq_marginal_JalView}");
	814	if ($ans ne "OK"){exit_on_error("sys_error","make_Jalview_Features_MarginalProb($FORM{outDir}/$VARS{prob_marginal},$FORM{outDir}/$VARS{JalViewMarginalFeaturesFile},$FORM{outDir}/$VARS{seq_marginal_JalView}): FAILED: $ans");}
	815
	816	if ($VARS{isServer} eq "YES")
	817	{
	818	$ans=PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_JalView},$VARS{WWWdir},"$FORM{outDir}/$VARS{JalView_Marginal_Reconstruction}","NA",$VARS{JalViewMarginalFeaturesFile});
	819	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_JalView},$VARS{WWWdir},$FORM{outDir}/$VARS{JalView_Marginal_Reconstruction},NA,$VARS{JalViewMarginalFeaturesFile}): FAILED: $ans");}
	820	}
	821	elsif ($VARS{isServer} eq "NO")
	822	{
	823	$ans=PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_JalView},"$FORM{outDir}/$VARS{JalView_Marginal_Reconstruction}","NA",$VARS{JalViewMarginalFeaturesFile});
	824	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_JalView},$FORM{outDir}/$VARS{JalView_Marginal_Reconstruction},NA,$VARS{JalViewMarginalFeaturesFile}): FAILED: $ans");}
	825	}
	826	if ($VARS{AreThereIndels} eq "YES")
	827	{
	828	if (($FORM{IndelReconstructionMethod} eq "ML") or ($FORM{IndelReconstructionMethod} eq "BOTH"))
	829	{
	830	# Prepare JalView outputs for united indels and chars reconstruction
	831	$ans=RemoveN_FormAncestralNames("$FORM{outDir}/$VARS{marginal_seq_chars_and_indel}.CODES","$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}",$VARS{Aln_format},"$FORM{outDir}/$VARS{tree_newick}.CODES","$FORM{outDir}/$VARS{Tree_JalView}");
	832	if ($ans ne "OK"){exit_on_error("sys_error","RemoveN_FormAncestralNames($FORM{outDir}/$VARS{marginal_seq_chars_and_indel}.CODES,$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView},$VARS{Aln_format},$FORM{outDir}/$VARS{tree_newick}.CODES,$FORM{outDir}/$VARS{Tree_JalView}): FAILED: $ans");}
	833	MSACodesToNamesShort("$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}",$CodeToSeqName);
	834	TreeCodesToNamesShort("$FORM{outDir}/$VARS{Tree_JalView}",$CodeToSeqName);
	835	# FixAlnNamesForJalView("$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}");
	836	$ans=make_Jalview_Features_MarginalProb_IndelsChar("$FORM{outDir}/$VARS{marginal_prob_chars_and_indel}","$FORM{outDir}/$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile}","$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}");
	837	if ($ans ne "OK"){exit_on_error("sys_error","make_Jalview_Features_MarginalProb_IndelsChar($FORM{outDir}/$VARS{marginal_prob_chars_and_indel},$FORM{outDir}/$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile},$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}): FAILED: $ans");}
	838	my $ans="";
	839	if ($VARS{isServer} eq "YES")
	840	{
	841	$ans=PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_Chars_and_Indels_JalView},$VARS{WWWdir},"$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}","NA",$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile});
	842	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_Chars_and_Indels_JalView},$VARS{WWWdir},$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction},NA,$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile}): FAILED: $ans");}
	843	}
	844	else
	845	{
	846	$ans=PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_Chars_and_Indels_JalView},"$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}","NA",$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile});
	847	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_Chars_and_Indels_JalView},$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Indel_Reconstruction},NA,$VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile}): FAILED: $ans");}
	848	}
	849	}
	850	if(($FORM{IndelReconstructionMethod} eq "PARSIMONY") or ($FORM{IndelReconstructionMethod} eq "BOTH"))
	851	{
	852	# Prepare JalView outputs for ML chars and Parsimony indels reconstruction
	853	$ans=RemoveN_FormAncestralNames("$FORM{outDir}/$VARS{marginal_seq_chars_and_parsimony_indels}.CODES","$FORM{outDir}/$VARS{seq_marginal_chars_and_parsimony_indels_JalView}",$VARS{Aln_format},"$FORM{outDir}/$VARS{tree_newick}.CODES","$FORM{outDir}/$VARS{Tree_JalView}");
	854	if ($ans ne "OK"){exit_on_error("sys_error","RemoveN_FormAncestralNames($FORM{outDir}/$VARS{marginal_seq_chars_and_parsimony_indels}.CODES,$FORM{outDir}/$VARS{seq_marginal_chars_and_parsimony_indels_JalView},$VARS{Aln_format},$FORM{outDir}/$VARS{tree_newick}.CODES,$FORM{outDir}/$VARS{Tree_JalView}): FAILED: $ans");}
	855	MSACodesToNamesShort("$FORM{outDir}/$VARS{seq_marginal_chars_and_parsimony_indels_JalView}",$CodeToSeqName);
	856	TreeCodesToNamesShort("$FORM{outDir}/$VARS{Tree_JalView}",$CodeToSeqName) if (!-e "$FORM{outDir}/$VARS{Tree_JalView}.CODES"); # convert if not already converted
	857	# FixAlnNamesForJalView("$FORM{outDir}/$VARS{seq_marginal_Chars_and_Indels_JalView}");
	858	$ans=make_Jalview_Features_MarginalProb_IndelsChar("$FORM{outDir}/$VARS{marginal_prob_chars_and_parsimony_indels}","$FORM{outDir}/$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile}","$FORM{outDir}/$VARS{seq_marginal_chars_and_parsimony_indels_JalView}");
	859	if ($ans ne "OK"){exit_on_error("sys_error","make_Jalview_Features_MarginalProb_IndelsChar($FORM{outDir}/$VARS{marginal_prob_chars_and_parsimony_indels},$FORM{outDir}/$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile},$FORM{outDir}/$VARS{seq_marginal_chars_and_parsimony_indels_JalView}): FAILED: $ans");}
	860	my $ans="";
	861	if ($VARS{isServer} eq "YES")
	862	{
	863	$ans=PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_chars_and_parsimony_indels_JalView},$VARS{WWWdir},"$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}","NA",$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile});
	864	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalView($VARS{Tree_JalView},$VARS{seq_marginal_chars_and_parsimony_indels_JalView},$VARS{WWWdir},$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction},NA,$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile}): FAILED: $ans");}
	865	}
	866	else
	867	{
	868	$ans=PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_chars_and_parsimony_indels_JalView},"$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}","NA",$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile});
	869	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_marginal_chars_and_parsimony_indels_JalView},$FORM{outDir}/$VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction},NA,$VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile}): FAILED: $ans");}
	870	}
	871	}
	872
	873	}
	874	# FOR JOINT
	875	if ($FORM{JointReconstruction} eq "YES")
	876	{
	877	$ans=Make_JalView_AnnotGraph_JointProb("$FORM{outDir}/$VARS{prob_joint}","$FORM{outDir}/$VARS{JalViewJointAnnotationGraphFile}","Joint log likelihood");
	878	if ($ans ne "OK"){exit_on_error("sys_error","Make_JalView_AnnotGraph_JointProb($FORM{outDir}/$VARS{prob_joint},$FORM{outDir}/$VARS{JalViewJointAnnotationGraphFile},Joint log likelihood): FAILED: $ans");}
	879	#system ("cp $FORM{outDir}/$VARS{tree_newick} $FORM{outDir}/$VARS{Tree_JalView}");
	880	#FixTreeNamesForJalView("$FORM{outDir}/$VARS{Tree_JalView}");
	881	$ans=RemoveN_FormAncestralNames("$FORM{outDir}/$VARS{seq_joint}.CODES","$FORM{outDir}/$VARS{seq_joint_JalView}",$VARS{Aln_format},"$FORM{outDir}/$VARS{tree_newick}.CODES","$FORM{outDir}/$VARS{Tree_JalView}");
	882	if ($ans ne "OK"){exit_on_error("sys_error","RemoveN_FormAncestralNames($FORM{outDir}/$VARS{seq_joint}.CODES,$FORM{outDir}/$VARS{seq_joint_JalView},$VARS{Aln_format},$FORM{outDir}/$VARS{tree_newick}.CODES,$FORM{outDir}/$VARS{Tree_JalView}): Failed: $ans");}
	883	MSACodesToNamesShort("$FORM{outDir}/$VARS{seq_joint_JalView}",$CodeToSeqName);
	884	TreeCodesToNamesShort("$FORM{outDir}/$VARS{Tree_JalView}",$CodeToSeqName);
	885
	886	# FixTreeNamesForJalView("$FORM{outDir}/$VARS{Tree_JalView}");
	887	# FixAlnNamesForJalView("$FORM{outDir}/$VARS{seq_joint_JalView}");
	888	if ($VARS{isServer} eq "YES")
	889	{
	890	$ans=PrepareJalView($VARS{Tree_JalView},$VARS{seq_joint_JalView},$VARS{WWWdir},"$FORM{outDir}/$VARS{JalView_Joint_Reconstruction}",$VARS{JalViewJointAnnotationGraphFile},"NA");
	891	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalView($VARS{Tree_JalView},$VARS{seq_joint_JalView},$VARS{WWWdir},$FORM{outDir}/$VARS{JalView_Joint_Reconstruction},$VARS{JalViewJointAnnotationGraphFile},NA): FAILED: $ans");}
	892	}
	893	elsif ($VARS{isServer} eq "NO")
	894	{
	895	$ans=PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_joint_JalView},"$FORM{outDir}/$VARS{JalView_Joint_Reconstruction}",$VARS{JalViewJointAnnotationGraphFile},"NA");
	896	if ($ans ne "OK"){exit_on_error("sys_error","PrepareJalViewJNLP($VARS{Tree_JalView},$VARS{seq_joint_JalView},$FORM{outDir}/$VARS{JalView_Joint_Reconstruction},$VARS{JalViewJointAnnotationGraphFile},NA): FAILED: $ans");}
	897	}
	898	}
	899	}
	900
	901	sub FixTreeNamesForJalView
	902	{
	903	my $TreeFile=shift;
	904	open (TREE,$TreeFile);
	905	my $Tree=<TREE>;
	906	close (TREE);
	907	open (TREE,">$TreeFile");
	908	if ($Tree !~/:/) # With No distances
	909	{
	910	$Tree=~s/\(([0-9]+),/\(ID_$1,/g; # between ( and ,
	911	$Tree=~s/,([0-9]+)\),/,ID_$1,/g; # between , and ,
	912	$Tree=~s/,([0-9]+)\)/,ID_$1\)/g; # between , and )
	913	# $Tree=~s/([0-9]+),/ID_$1,/g;
	914	# $Tree=~s/,([0-9]+)\)/,ID_$1\)/g;
	915	print TREE $Tree,"\n";
	916	}
	917	else # (A:0.1,B:0.2,(C:0.3,D:0.4):0.5);
	918	{
	919	# $Tree=~s/([0-9]+):(.*?),/ID_$1:$2,/g;
	920	# $Tree=~s/,([0-9]+):(.*?)\)/,ID_$1:$2\)/g;
	921	$Tree=~s/\(([0-9]+):(.*?),/\(ID_$1:$2,/g;
	922	$Tree=~s/,([0-9]+):(.*?),/,ID_$1:$2,/g;
	923	$Tree=~s/,([0-9]+):(.*?)\)/,ID_$1:$2\)/g;
	924	print TREE $Tree,"\n";
	925	}
	926	close (TREE);
	927	}
	928	sub FixAlnNamesForJalView
	929	{
	930	my $MSA=shift;
	931	open (MSA,$MSA) \|\| die "FixAlnNamesForJalView: Can't open MSA for reading: '$MSA' $!";
	932	my @MSA=<MSA>;
	933	close (MSA);
	934	open (MSA,">$MSA") \|\| die "FixAlnNamesForJalView: Can't open MSA for writing: '$MSA' $!";
	935	foreach my $line (@MSA)
	936	{
	937	if ($line=~/^>(.*)/)
	938	{
	939	chomp ($line);
	940	my $ID=$1;
	941	if ($ID=~/^([0-9]+)$/)
	942	{
	943	$ID="ID_".$1;
	944	}
	945	print MSA ">$ID\n";
	946	}
	947	else
	948	{
	949	print MSA $line;
	950	}
	951	}
	952	close (MSA);
	953	}
	954	## Handle ERRORS
	955	# HANDLE EXIT
	956	sub exit_on_error{
	957	my $which_error = shift;
	958	my $error_msg = shift;
	959
	960	my $error_definition = "<font size=+2 color='red'>ERROR! FastML session has been terminated:</font><br />\n";
	961	my $syserror = "<font size=+1 color='red'>A SYSTEM ERROR OCCURRED!</font><br />Plesae try to run FastML again in a few minutes.<br />We apologize for the inconvenience.<br />\n";
	962
	963	if ($which_error eq 'user_error'){
	964	open LOG, ">>$VARS{OutLogFile}";
	965	print LOG "\n\t EXIT on error:\n$error_msg\n";
	966	close LOG;
	967	if ($VARS{isServer} eq "YES")
	968	{
	969	if (-e "$FORM{outDir}$VARS{OutHtmlFile}") # OUTPUT IS OPEN
	970	{
	971	open (OUTPUT,">>$FORM{outDir}$VARS{OutHtmlFile}");
	972	print OUTPUT $error_definition."$error_msg";
	973	close (OUTPUT);
	974	print "$error_definition.$error_msg\n";
	975	}
	976	else # OUTPUT WAS NOT CREATED
	977	{
	978	print "Content-type: text/html\n\n";
	979	print "<html>\n";
	980	print "<head>\n";
	981	print "<title>ERROR has occurred</title>\n";
	982	print "</head>\n";
	983	print "<body>\n";
	984	print $error_definition."$error_msg";
	985	}
	986	# print $error_msg to the screen
	987	}
	988	}
	989	elsif ($which_error eq 'sys_error')
	990	{
	991	open LOG, ">>$VARS{OutLogFile}";
	992	if ($VARS{isServer} eq "YES")
	993	{
	994	send_administrator_mail_on_error ($error_msg) if ($VARS{isServer} eq "YES");
	995	if (-e "$FORM{outDir}$VARS{OutHtmlFile}") # OUTPUT IS OPEN
	996	{
	997	#open LOG, ">>$FORM{outDir}$VARS{OutLogFile}";
	998	print LOG "\n$error_msg\n";
	999	open (OUTPUT,">>$FORM{outDir}$VARS{OutHtmlFile}");
	1000	print OUTPUT $syserror;
	1001	close OUTPUT;
	1002	print "\n$error_msg\n";
	1003	}
	1004	else # Output not open
	1005	{
	1006	print "Content-type: text/html\n\n";
	1007	print "<html>\n";
	1008	print "<head>\n";
	1009	print "<title>ERROR has occurred</title>\n";
	1010	print "</head>\n";
	1011	print "<body>\n";
	1012	print $syserror;
	1013	}
	1014	#print $error_msg to the log file
	1015	}
	1016	else
	1017	{
	1018	print STDERR "\n\tFASTML EXIT ON ERROR:\n$error_msg\n";
	1019	print "\n\tFASTML EXIT ON ERROR:\n$error_msg\n";
	1020	print LOG "\n\tFASTML EXIT ON ERROR:\n$error_msg\n";
	1021	}
	1022	close LOG;
	1023	}
	1024	close OUTPUT;
	1025
	1026	if (($VARS{isServer} eq "YES") and (-e $VARS{UserMailFile}))
	1027	{
	1028	open (EMAIL,$VARS{UserMailFile});
	1029	$VARS{user_email}=<EMAIL>;
	1030	chomp($VARS{user_email});
	1031	send_mail_on_error();
	1032	update_output_that_run_failed();
	1033	}
	1034	open LOG, ">>$VARS{OutLogFile}";
	1035	print LOG "\nExit Time: ".(BIOSEQUENCE_FUNCTIONS::printTime)."\n";
	1036	close LOG;
	1037	exit;
	1038	}
	1039
	1040	########################################################################################
	1041	sub send_mail_on_error
	1042	{
	1043	my $email_subject;
	1044	my $HttpPath=$VARS{run_url};
	1045	$email_subject = "'Your FastML run $VARS{RunNumber} FAILED'";
	1046	my $JOB_TITLE_STR="";
	1047	if (-e "$FORM{outDir}JOB_TITLE")
	1048	{
	1049	open (JOB_TITLE,"$FORM{outDir}JOB_TITLE");
	1050	$JOB_TITLE_STR=<JOB_TITLE>;
	1051	chomp ($JOB_TITLE_STR);
	1052	close (JOB_TITLE);
	1053	}
	1054	my $email_message = "'Hello,\\n\\nUnfortunately your FastML run (number ".$VARS{RunNumber}.") has failed.\\n";
	1055	if ($JOB_TITLE_STR ne "")
	1056	{
	1057	$email_message.="Job Title:$JOB_TITLE_STR\\n";
	1058	}
	1059	$email_message.="Please have a look at ".$HttpPath." for further details\\n\\nSorry for the inconvenience\\nFastML Team'";
	1060
	1061	my $msg = "ssh bioseq\@lecs \"cd $VARS{send_email_dir}; ".'./sendEmail.pl -f \'TAU BioSequence <bioSequence@tauex.tau.ac.il>\' -t \''.$VARS{user_email}.'\' -u '.$email_subject.' -xu '.$VARS{userName}.' -xp '.$VARS{userPass}.' -s '.$VARS{smtp_server}.' -m '.$email_message.'"';
	1062	#if ($attach ne ''){$msg.=" -a $attach"; print LOG "sending $msg\n";}
	1063	open LOG, ">>$VARS{OutLogFile}";
	1064	print LOG "MESSAGE:$email_message\nCOMMAND:$msg\n";
	1065	chdir $VARS{send_email_dir};
	1066	my $email_system_return = `$msg`;
	1067	unless ($email_system_return =~ /successfully/) {
	1068	print LOG "send_mail: The message was not sent successfully. system returned: $email_system_return\n";
	1069	}
	1070	close LOG;
	1071	}
	1072
	1073	####################################################################################
	1074	sub send_administrator_mail_on_error
	1075	{
	1076	my $message=shift;
	1077	my $email_subject;
	1078	chomp ($message);
	1079	$email_subject = "'System ERROR has occurred on FastML: $VARS{run_url}'";
	1080	my $email_message = "'Hello,\\n\\nUnfortunately a system System ERROR has occurred on FastML: $VARS{run_url}.\\nERROR: $message.'";
	1081	my $Admin_Email=GENERAL_CONSTANTS::ADMIN_EMAIL;
	1082	my $msg = "ssh bioseq\@lecs \"cd $VARS{send_email_dir}; ".'./sendEmail.pl -f \'bioSequence@tauex.tau.ac.il\' -t \''."bioSequence\@tauex.tau.ac.il".'\' -u '.$email_subject.' -xu '.$VARS{userName}.' -xp '.$VARS{userPass}.' -s '.$VARS{smtp_server}.' -m '.$email_message.'"';
	1083	#if ($attach ne ''){$msg.=" -a $attach"; print LOG "sending $msg\n";}
	1084	print LOG "MESSAGE:$email_message\nCOMMAND:$msg\n";
	1085	chdir $VARS{send_email_dir};
	1086	my $email_system_return = `$msg`;
	1087	print LOG "RESULT: $email_system_return\n";
	1088	}
	1089	sub update_output_that_run_failed
	1090	{
	1091	open (STATUS,">$FORM{outDir}QUEUE_STATUS");
	1092	print STATUS "Failed";
	1093	close (STATUS);
	1094	close OUTPUT;
	1095	# finish the output page
	1096	open OUTPUT, "$FORM{outDir}$VARS{OutHtmlFile}";
	1097	my @output = <OUTPUT>;
	1098	close OUTPUT;
	1099	# remove the refresh commands from the output page
	1100	open OUTPUT, ">$FORM{outDir}$VARS{OutHtmlFile}";
	1101	foreach my $line (@output){
	1102	if (($line=~/TTP-EQUIV="REFRESH"/) or ($line=~/CONTENT="NO-CACHE"/))
	1103	{
	1104	next;
	1105	}
	1106	elsif ($line=~/(.)RUNNING(.)/)
	1107	{
	1108	print OUTPUT $1."FAILED".$2;
	1109	}
	1110	elsif ($line=~/Queued/)
	1111	{
	1112	$line=~s/Queued/Failed/;
	1113	print OUTPUT $line;
	1114	}
	1115	else {
	1116	print OUTPUT $line;
	1117	}
	1118	}
	1119	print OUTPUT "<h4 class=footer align=\"center\">Questions and comments are welcome! Please <span class=\"admin_link\"><a href=\"mailto:bioSequence\@tauex.tau.ac.il\?subject=FastML\%20Run\%20Number\%20$VARS{RunNumber}\">contact us</a></span></h4>";
	1120	print OUTPUT "</body>\n";
	1121	print OUTPUT "</html>\n";
	1122	close OUTPUT;
	1123	unlink ("$FORM{outDir}QUEUE_STATUS");
	1124	}
	1125	sub stop_reload
	1126	{
	1127	open LOG, ">>$FORM{outDir}$VARS{OutLogFile}";
	1128	print LOG "\nEnd time: ".BIOSEQUENCE_FUNCTIONS::printTime();
	1129	close LOG;
	1130
	1131	sleep ($VARS{reload_interval});
	1132
	1133	open OUTPUT, "<$FORM{outDir}$VARS{OutHtmlFile}";
	1134	my @output = <OUTPUT>;
	1135	close OUTPUT;
	1136
	1137	open OUTPUT, ">$FORM{outDir}$VARS{OutHtmlFile}";
	1138	foreach my $line (@output){
	1139	unless ($line =~ /REFRESH/ or $line =~ /NO-CACHE/){
	1140	print OUTPUT $line;
	1141	}
	1142	}
	1143	close OUTPUT;
	1144	}
	1145
	1146
	1147
	1148	### MAKE JalView
	1149
	1150
	1151	sub Make_JalView_AnnotGraph_JointProb
	1152	{
	1153	my $JointProb_File=shift;
	1154	my $OutJalviewAnnotFile=shift;
	1155	my $Y_label=shift; # The Y label
	1156	my $last_x = 0;
	1157	open (OUT,">$OutJalviewAnnotFile") \|\| return ("Can't open outAnnotationsFile '$OutJalviewAnnotFile': $!");
	1158	print OUT "JALVIEW_ANNOTATION\n";
	1159	print OUT "BAR_GRAPH\t$Y_label\t";
	1160	open (DATA_FILE,$JointProb_File) \|\| die ("make_Jalview_AnnotationGraph: Can't open data file '$JointProb_File' $!");
	1161	my $line=<DATA_FILE>; # header
	1162	while ($line=<DATA_FILE>)
	1163	{
	1164	#Joint log likelihood of position 147: -24.7398
	1165	chomp ($line);
	1166	if ($line=~/Joint log likelihood of position ([0-9]+): ([0-9\-e\.]+)/)
	1167	{
	1168	my $Pos=$1;
	1169	my $Score=$2;
	1170	print OUT "$Score,$Score,$Score\|";
	1171	}
	1172	}
	1173	print OUT "\n";
	1174	close (OUT);
	1175	return ("OK");
	1176	}
	1177
	1178
	1179	sub make_Jalview_Features_MarginalProb_IndelsChar
	1180	{
	1181	my $MarginalProb_Chars_and_Indels_File=shift;
	1182	my $outJalviewFeaturesFile=shift;
	1183	my $aln=shift; # The naming of the seq in the features file is according to their place in the alignment
	1184
	1185	my %NodeId_To_Seq_Num=();
	1186	open (ALN,$aln) or return ("Can't open ALN for reading: '$aln': $!");
	1187	my $seq_number=-1;
	1188	while (my $line=<ALN>)
	1189	{
	1190
	1191	chomp ($line);
	1192	if ($line=~/^>(.*)/)
	1193	{
	1194	$seq_number++;
	1195	$NodeId_To_Seq_Num{$1}=$seq_number;
	1196	}
	1197	}
	1198	close (ALN);
	1199
	1200	open JALVIEW_FEATURES, ">$outJalviewFeaturesFile" or return ("Can't open outFeaturesFile '$outJalviewFeaturesFile': $!");
	1201	print JALVIEW_FEATURES "Score0\t8E0152\n"; # When rounding down
	1202	print JALVIEW_FEATURES "Score1\t8E0152\n";
	1203	print JALVIEW_FEATURES "Score2\tC51B7D\n";
	1204	print JALVIEW_FEATURES "Score3\tDE77AE\n";
	1205	print JALVIEW_FEATURES "Score4\tF1B6DA\n";
	1206	print JALVIEW_FEATURES "Score5\tFDE0EF\n";
	1207	print JALVIEW_FEATURES "Score6\tE6F5D0\n";
	1208	print JALVIEW_FEATURES "Score7\tB8E186\n";
	1209	print JALVIEW_FEATURES "Score8\t7FBC41\n";
	1210	print JALVIEW_FEATURES "Score9\t4D9221\n";
	1211	print JALVIEW_FEATURES "Score10\t276419\n";
	1212
	1213	print JALVIEW_FEATURES "STARTGROUP\tMarginalProb\n";
	1214	my $pos=0;
	1215	open (MARGINAL_PROB_CHAR_AND_INDELS,$MarginalProb_Chars_and_Indels_File) \|\| return "make_Jalview_Color_MSA: Could Not Open the MarginalProb_Chars_and_Indels_File: '$MarginalProb_Chars_and_Indels_File' $!";
	1216	my $line=<MARGINAL_PROB_CHAR_AND_INDELS>; # header
	1217
	1218	## SEQ ID IS NOT IDENTIFIED CORRECTLY CHANGE THE ANNOTATION TO BE ACCORDING SEQ NUMBER... (LIKE IN GUIDANCE)
	1219	## http://www.jalview.org/help/html/features/featuresFormat.html
	1220	## description sequenceId sequenceIndex start end featureType score (optional)
	1221	## This format allows two alternate ways of referring to a sequence, either by its text ID, or its index in an associated alignment. Normally, sequence features are associated with sequences rather than alignments, and the sequenceIndex field is given as "-1". In order to specify a sequence by its index in a particular alignment, the sequenceId should be given as "ID_NOT_SPECIFIED", otherwise the sequenceId field will be used in preference to the sequenceIndex field.
	1222
	1223	while (my $line=<MARGINAL_PROB_CHAR_AND_INDELS>)
	1224	{
	1225	chomp ($line);
	1226	my ($Pos_on_MSA,$Node,$Char,$CharProb)=split(/\t/,$line);
	1227	if ($Node=~/^([0-9]+)$/){$Node="ID_".$1;}
	1228	if ($Node=~/^N([0-9]+)/)
	1229	{
	1230	$Node=~s/^N([0-9]+)$/$1/;
	1231	my $Color_Class="Score".sprintf("%.0f",$CharProb*10);
	1232	# print JALVIEW_FEATURES "$CharProb\t$Node\t-1\t",$Pos_on_MSA,"\t",$Pos_on_MSA,"\t$Color_Class\t$CharProb\n";
	1233	print JALVIEW_FEATURES "$CharProb\tID_NOT_SPECIFIED\t$NodeId_To_Seq_Num{$Node}\t",$Pos_on_MSA,"\t",$Pos_on_MSA,"\t$Color_Class\t$CharProb\n";#FEATURE ACCORDING TO SEQ NUMBER
	1234	}
	1235	}
	1236	print JALVIEW_FEATURES "ENDGROUP\tMarginalProb\n";
	1237	close (JALVIEW_FEATURES);
	1238	close (MARGINAL_PROB_CHAR_AND_INDELS);
	1239	return ("OK");
	1240	}
	1241
	1242	sub make_Jalview_Features_MarginalProb
	1243	{
	1244	my $MarginalProb_File=shift;
	1245	my $outJalviewFeaturesFile=shift;
	1246	my $aln=shift; # The naming of the seq in the features file is according to their place in the alignment
	1247
	1248	my %NodeId_To_Seq_Num=();
	1249	# print "ALN: $aln\n"; #QA
	1250	open (ALN,$aln) or return ("Can't open ALN for reading '$aln': $!");
	1251	my $seq_number=-1;
	1252	while (my $line=<ALN>)
	1253	{
	1254
	1255	chomp ($line);
	1256	if ($line=~/^>(.*)/)
	1257	{
	1258	$seq_number++;
	1259	$NodeId_To_Seq_Num{$1}=$seq_number;
	1260	}
	1261	}
	1262	close (ALN);
	1263
	1264
	1265	# Global VARS
	1266
	1267	open JALVIEW_FEATURES, ">$outJalviewFeaturesFile" or return ("Can't open outFeaturesFile '$outJalviewFeaturesFile': $!");
	1268	# print "JALVIEW OUTPUT WRITE TO: $outJalviewFeaturesFile\n"; #QA
	1269	print JALVIEW_FEATURES "Score0\t8E0152\n"; # When rounding down
	1270	print JALVIEW_FEATURES "Score1\t8E0152\n";
	1271	print JALVIEW_FEATURES "Score2\tC51B7D\n";
	1272	print JALVIEW_FEATURES "Score3\tDE77AE\n";
	1273	print JALVIEW_FEATURES "Score4\tF1B6DA\n";
	1274	print JALVIEW_FEATURES "Score5\tFDE0EF\n";
	1275	print JALVIEW_FEATURES "Score6\tE6F5D0\n";
	1276	print JALVIEW_FEATURES "Score7\tB8E186\n";
	1277	print JALVIEW_FEATURES "Score8\t7FBC41\n";
	1278	print JALVIEW_FEATURES "Score9\t4D9221\n";
	1279	print JALVIEW_FEATURES "Score10\t276419\n";
	1280
	1281	print JALVIEW_FEATURES "STARTGROUP\tMarginalProb\n";
	1282	my $pos=0;
	1283	open (MARGINAL_PROB,$MarginalProb_File) \|\| return "make_Jalview_Color_MSA: Could Not Open the MarginalProb_File: '$MarginalProb_File' $!";
	1284	while (my $line=<MARGINAL_PROB>)
	1285	{
	1286	if ($line=~/marginal probabilities at position: ([0-9]+)/)
	1287	{
	1288	$pos=$1;
	1289	# print "POS:$pos\n";
	1290	}
	1291	elsif ($line=~/of node: (.*?): p$[A-Z]$=([0-9.]+)/)
	1292	{
	1293	my $prob=$2;
	1294	my $Seq_ID=$1;
	1295	if ($Seq_ID=~/^([0-9]+)$/){$Seq_ID="ID_".$1;}
	1296	if ($Seq_ID=~/^N/) # prob only for the ancestral nodes
	1297	{
	1298	$Seq_ID=~s/^N([0-9]+)$/$1/;
	1299	my $Color_Class="Score".sprintf("%.0f",$prob*10);
	1300	# print JALVIEW_FEATURES "$prob\t$Seq_ID\t-1\t",$pos,"\t",$pos,"\t$Color_Class\t$prob\n"; # FEATURE ACCORDING TO SEQ ID (DOES PROBLEMS)
	1301	print "[WARNNING] MISSING: NodeId_To_Seq_Num{$Seq_ID}\n" if (!defined $NodeId_To_Seq_Num{$Seq_ID});
	1302	print JALVIEW_FEATURES "$prob\tID_NOT_SPECIFIED\t$NodeId_To_Seq_Num{$Seq_ID}\t",$pos,"\t",$pos,"\t$Color_Class\t$prob\n"; #FEATURE ACCORDING TO SEQ NUMBER
	1303	}
	1304	}
	1305	}
	1306	print JALVIEW_FEATURES "ENDGROUP\tMarginalProb\n";
	1307	close (JALVIEW_FEATURES);
	1308	close (MARGINAL_PROB);
	1309	return ("OK");
	1310	}
	1311
	1312
	1313	sub RemoveN_FormAncestralNames
	1314	{
	1315	my $Aln=shift;
	1316	my $Aln_No_N=shift;
	1317	my $Aln_Format=shift;
	1318
	1319	my $Tree=shift;
	1320	my $Tree_No_N=shift;
	1321
	1322	open (ALN,$Aln) \|\| return ("RemoveN_FormAncestralNames: Could not open '$Aln' $!");
	1323	open (ALN_NO_N,">$Aln_No_N") \|\| return ("RemoveN_FormAncestralNames: Could not open '$Aln_No_N' $!");
	1324
	1325	if ($Aln_Format eq "FASTA")
	1326	{
	1327	while (my $line=<ALN>)
	1328	{
	1329	if ($line=~/^>N([0-9]+)$/)
	1330	{
	1331	print ALN_NO_N ">$1\n";
	1332	}
	1333	elsif ($line=~/^>([0-9]+)$/)
	1334	{
	1335	print ALN_NO_N ">ID_$1\n";
	1336	}
	1337	else
	1338	{
	1339	print ALN_NO_N $line;
	1340	}
	1341	}
	1342	}
	1343	elsif ($Aln_Format eq "CLUSTALW")
	1344	{
	1345	while (my $line=<ALN>)
	1346	{
	1347	if ($line=~/^N([0-9]+)(.*)/)
	1348	{
	1349	print ALN_NO_N "$1 $2\n";
	1350	}
	1351	else
	1352	{
	1353	print ALN_NO_N $line;
	1354	}
	1355	}
	1356	}
	1357	close (ALN);
	1358	close (ALN_NO_N);
	1359
	1360	open (TREE,$Tree) \|\| return "RemoveN_FormAncestralNames: Could not open file: '$Tree' $!";
	1361	open (TREE_NO_N,">$Tree_No_N") \|\| return "RemoveN_FormAncestralNames: Could not open '$Tree_No_N' $!";
	1362
	1363	my $Tree_String=<TREE>;
	1364	$Tree_String=~s/\)N([0-9]+)/\)$1/g;
	1365	print TREE_NO_N $Tree_String;
	1366	close TREE;
	1367	close TREE_NO_N;
	1368	return ("OK");
	1369	}
	1370	sub PrepareJalView
	1371	{
	1372	my $tree=shift;
	1373	my $MSA=shift;
	1374	my $http=shift;
	1375	my $JalViewPage=shift;
	1376	my $Jalview_AnnotFile=shift; # Optional, otherwise NA
	1377	my $JalviewFeaturesFile=shift; # Optional Otherwise NA
	1378	#print "$Jalview_AnnotFile\n$JalviewFeaturesFile\n";
	1379	open (JALVIEW,">$JalViewPage") \|\| return ("PrepareJalView: Can't open $JalViewPage for writing $!");
	1380
	1381	print JALVIEW "<HTML>\n";
	1382	print JALVIEW "<applet CODEBASE=\"http://fastml.tau.ac.il/\"\n";
	1383	print JALVIEW "CODE=\"jalview.bin.JalviewLite\" width=100% height=100%\n";
	1384	print JALVIEW "ARCHIVE=\"jalviewApplet.jar\">\n";
	1385	print JALVIEW "<param name=\"file\" value=\"$http"."$MSA\">\n";
	1386	print JALVIEW "<param name=\"tree\" value=\"$http"."$tree\">\n";
	1387	print JALVIEW "<param name=\"features\" value=\"$http".$JalviewFeaturesFile."\">\n" if ($JalviewFeaturesFile ne "NA");
	1388	print JALVIEW "<param name=\"annotations\" value=\"$http".$Jalview_AnnotFile."\">\n" if ($Jalview_AnnotFile ne "NA");
	1389	print JALVIEW "<param name=\"application_url\" value=\"http://www.jalview.org/services/launchApp\">\n";
	1390	print JALVIEW "<param name=\"showbutton\" VALUE=\"false\">\n";
	1391	print JALVIEW "<param name=\"showConservation\" VALUE=\"false\">\n";
	1392	print JALVIEW "<param name=\"showQuality\" VALUE=\"false\">\n";
	1393	print JALVIEW "<param name=\"showConsensus\" VALUE=\"false\">\n";
	1394
	1395	print JALVIEW "<param name=\"showTreeBootstraps\" VALUE=\"true\">\n";
	1396	print JALVIEW "<param name=\"showTreeDistances\" VALUE=\"false\">\n";
	1397
	1398	print JALVIEW "</APPLET>\n";
	1399	print JALVIEW "</HTML>\n";
	1400	close (JALVIEW);
	1401	return ("OK");
	1402
	1403	}
	1404
	1405	sub ExtractAncestralLogLikelihoodPerNodePerSite
	1406	{
	1407	my $FastMLProbFile=shift;
	1408	my $OutLL=shift;
	1409	my $SeqType=shift;
	1410	my @AB=();
	1411	if ($SeqType eq "nuc")
	1412	{
	1413	@AB=qw(A C G T);
	1414	}
	1415	if ($SeqType eq "aa")
	1416	{
	1417	@AB=qw(A C D E F G H I K L M N P Q R S T V W Y);
	1418	}
	1419	if ($SeqType eq "codon")
	1420	{
	1421	@AB=qw(AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT);
	1422	}
	1423
	1424	open (FASTML,$FastMLProbFile) \|\| return ("ExtractAncestralProbPerNodePerSite: Failed to open FastML Prob File: '$FastMLProbFile' $!");
	1425	open (OUT,">$OutLL") \|\| return ("ExtractAncestralProbPerNodePerSite: Failed to open OutLL File: '$OutLL' $!");
	1426	print OUT "Ancestral Node,Pos,",join (",",@AB),"\n";
	1427	my @observervedChars=();
	1428	my $AB_Size=0;
	1429	my %Data=();
	1430	my $Positions=1;
	1431	my $line=<FASTML>;
	1432	while (($line!~/([\+]+) marginal log likelihood ([\+]+)/) and ($line)){$line=<FASTML>;} # GO TILL marginal LL SECTION
	1433	while ($line=<FASTML>)
	1434	{
	1435	if ($line=~/^node/)
	1436	{
	1437	chomp ($line);
	1438	@observervedChars=split(",",$line);
	1439	$AB_Size=@observervedChars;
	1440	$AB_Size=$AB_Size-2; # For Node and site
	1441	}
	1442	else
	1443	{
	1444	if ($line=~/([0-9]+)/)
	1445	{
	1446	chomp ($line);
	1447	my @line=split(",",$line);
	1448	for (my $i=2;$i<$AB_Size+2;$i++) # Array start with node and pos
	1449	{
	1450	# print "Data{$line[0]}{$line[1]}{$observervedChars[$i]}=$line[$i]\n";<STDIN>;
	1451	$Data{$line[0]}{$line[1]}{$observervedChars[$i]}=$line[$i]; #Key1:Ancestral Node,key2:pos,key3:char value: prob
	1452	}
	1453	$Positions=$line[1];
	1454	}
	1455	}
	1456	}
	1457	# print "Pos:$Positions\n";
	1458	foreach my $node (sort keys(%Data))
	1459	{
	1460	for (my $pos=1;$pos<=$Positions;$pos++)
	1461	{
	1462	print OUT "$node,$pos,";
	1463	foreach my $Char (@AB)
	1464	{
	1465	if ($Char ne $AB[-1]) # NOT THE LAST POSITION
	1466	{
	1467	if (defined $Data{$node}{$pos}{$Char})
	1468	{
	1469	print OUT "$Data{$node}{$pos}{$Char},";
	1470	}
	1471	else
	1472	{
	1473	print OUT "0,"
	1474	}
	1475	}
	1476	else
	1477	{
	1478	if (defined $Data{$node}{$pos}{$Char})
	1479	{
	1480	print OUT "$Data{$node}{$pos}{$Char}";
	1481	}
	1482	else
	1483	{
	1484	print OUT "0";
	1485	}
	1486	}
	1487	}
	1488	print OUT "\n";
	1489	}
	1490	}
	1491	}
	1492
	1493	sub ExtractAncestralProbPerNodePerSite
	1494	{
	1495	my $FastMLProbFile=shift;
	1496	my $OutProb=shift;
	1497	my $SeqType=shift;
	1498	my @AB=();
	1499	if ($SeqType eq "nuc")
	1500	{
	1501	@AB=qw(A C G T);
	1502	}
	1503	if ($SeqType eq "aa")
	1504	{
	1505	@AB=qw(A C D E F G H I K L M N P Q R S T V W Y);
	1506	}
	1507	if ($SeqType eq "codon")
	1508	{
	1509	@AB=qw(AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT);
	1510	}
	1511
	1512	open (FASTML,$FastMLProbFile) \|\| return ("ExtractAncestralProbPerNodePerSite: Failed to open FastML Prob File: '$FastMLProbFile' $!");
	1513	open (OUT,">$OutProb") \|\| return ("ExtractAncestralProbPerNodePerSite: Failed to open OutProb File: '$OutProb' $!");
	1514	print OUT "Ancestral Node,Pos,".join(",",@AB),"\n";
	1515	my @observervedChars=();
	1516	my $AB_Size=0;
	1517	my %Data=();
	1518	my $Positions=1;
	1519	my $line=<FASTML>;
	1520	while (($line!~/([\+]+) marginal probs ([\+]+)/) and ($line)){$line=<FASTML>;} # GO TILL marginal probs SECTION
	1521	while ($line=<FASTML>)
	1522	{
	1523	if ($line=~/^node/)
	1524	{
	1525	chomp ($line);
	1526	@observervedChars=split(",",$line);
	1527	$AB_Size=@observervedChars;
	1528	$AB_Size=$AB_Size-2; # For Node and site
	1529	}
	1530	elsif ($line=~/([\+]+) marginal log likelihood ([\+]+)/){last;} # finished
	1531	else
	1532	{
	1533	if ($line=~/([0-9]+)/)
	1534	{
	1535	chomp ($line);
	1536	my @line=split(",",$line);
	1537	for (my $i=2;$i<$AB_Size+2;$i++) # Array start with node and pos
	1538	{
	1539	# print "Data{$line[0]}{$line[1]}{$observervedChars[$i]}=$line[$i]\n";#<STDIN>;
	1540	$Data{$line[0]}{$line[1]}{$observervedChars[$i]}=$line[$i]; #Key1:Ancestral Node,key2:pos,key3:char value: prob
	1541	}
	1542	$Positions=$line[1];
	1543	}
	1544	}
	1545	}
	1546	# print "Pos:$Positions\n";
	1547	foreach my $node (sort keys(%Data))
	1548	{
	1549	for (my $pos=1;$pos<=$Positions;$pos++)
	1550	{
	1551	print OUT "$node,$pos,";
	1552	foreach my $Char (@AB)
	1553	{
	1554	if ($Char ne $AB[-1]) # NOT THE LAST POSITION
	1555	{
	1556	if ((defined $Data{$node}{$pos}{$Char}) and ($Data{$node}{$pos}{$Char}>0.0001))
	1557	{
	1558	print OUT "$Data{$node}{$pos}{$Char},";
	1559	}
	1560	else
	1561	{
	1562	print OUT "0,"
	1563	}
	1564	}
	1565	else
	1566	{
	1567	if ((defined $Data{$node}{$pos}{$Char}) and ($Data{$node}{$pos}{$Char}>0.0001))
	1568	{
	1569	print OUT "$Data{$node}{$pos}{$Char}";
	1570	}
	1571	else
	1572	{
	1573	print OUT "0";
	1574	}
	1575	}
	1576	}
	1577	print OUT "\n";
	1578	}
	1579	}
	1580	}
	1581
	1582	sub print_SampleSeq_selection_box
	1583	{
	1584	my $Marginal_ProbFile_CSV=shift;
	1585	my $SeqType=shift;
	1586	open (MARGINAL_PROB,$Marginal_ProbFile_CSV) or exit_on_error ("sys_error',print_SampleSeq_selection_box: Can't open MARGINAL_PROB: '$Marginal_ProbFile_CSV' $!");
	1587	my $return_str="";
	1588	$return_str.="<input type=\"hidden\" name=\"run_num\" value=\"$VARS{RunNumber}\">\n";
	1589	$return_str.="<input type=\"hidden\" name=\"MarginalProbFile\" value=\"$Marginal_ProbFile_CSV\"/>\n";
	1590	$return_str.="<input type=\"hidden\" name=\"SeqType\" value=\"$SeqType\"/>\n";
	1591	$return_str.="<select name=\"Node\" id=\"Node\">\n";
	1592
	1593	my %Nodes=();
	1594	while (my $line=<MARGINAL_PROB>)
	1595	{
	1596	if ($line=~/N([0-9]+)/)
	1597	{
	1598	$Nodes{$1}=1;
	1599	}
	1600	}
	1601	for my $Node ( sort {$a<=>$b} keys %Nodes) {
	1602	$return_str.="<option value=\"N$Node\">N$Node</option>";
	1603	}
	1604	$return_str.="<input type=\"submit\" value=\"Sample sequences\"/>\n";
	1605	$return_str.="</form>\n";
	1606	close (MARGINAL_PROB);
	1607	return $return_str;
	1608	}
	1609
	1610	sub AreThereIndels
	1611	{
	1612	my $MSA=shift; # Fasta Format
	1613	my $AreThereIndels="NO";
	1614	open (MSA,$MSA) \|\| exit_on_error ("sys_error", "AreThereIndels: Can't open the MSA file: '$MSA' $!");
	1615	while (my $line=<MSA>)
	1616	{
	1617	if ($line!~/^>/)
	1618	{
	1619	if ($line=~/-/)
	1620	{
	1621	$AreThereIndels="YES";
	1622	close (MSA);
	1623	return ($AreThereIndels);
	1624	}
	1625	}
	1626	}
	1627	close (MSA);
	1628	return ($AreThereIndels);
	1629
	1630	}
	1631
	1632
	1633	sub removeEndLineExtraChars{
	1634	# remove extra chars on end of lines (^M,spaces);
	1635	my $inputFile = shift;
	1636	my @lines;
	1637	if (open FILE, $inputFile){
	1638	@lines=<FILE>;
	1639	close (FILE);
	1640	}
	1641	if (open (NEWFILE,">$inputFile")){
	1642	my $line;
	1643	foreach $line (@lines){
	1644	# $line=~s/(\r)$/\n/;
	1645	$line=~s/(\s+)$//;
	1646	print NEWFILE "$line\n";
	1647	}
	1648	close NEWFILE;
	1649	}
	1650	}
	1651
	1652	sub estimate_run_time
	1653	{
	1654	my $MSA=shift;
	1655	my $SeqType=shift; # aa\|nuc\|codon
	1656	my $IsTree=shift;
	1657	my $IsGamma=shift;
	1658	my %CODONS_350=(
	1659	50 => '120',
	1660	100 => '330',
	1661	150 => '870',
	1662	200 => '1230',
	1663	250 => '1920',
	1664	300 => '2700'
	1665	);
	1666	my %NUC_350=(
	1667	50 => '2',
	1668	100 => '5',
	1669	150 => '15',
	1670	200 => '35',
	1671	250 => '80',
	1672	300 => '130'
	1673	);
	1674	my %AA_350=(
	1675	50 => '2',
	1676	100 => '6',
	1677	150 => '25',
	1678	200 => '60',
	1679	250 => '180',
	1680	300 => '300'
	1681	);
	1682	my $MSA_Length=0;
	1683	my $NumOfSeq=0;
	1684	open (MSA,$MSA);
	1685	my $Seq="";
	1686	while (my $line=<MSA>)
	1687	{
	1688	chomp ($line);
	1689	if ($line=~/^>/)
	1690	{
	1691	$NumOfSeq++;
	1692	$MSA_Length=length($Seq);
	1693	$Seq=$Seq.$line;
	1694	}
	1695	else
	1696	{
	1697	$Seq=$line;
	1698	}
	1699	}
	1700	close (MSA);
	1701	my $Time=0;
	1702	if ($NumOfSeq<=50)
	1703	{
	1704	if ($SeqType eq "aa"){$Time=$AA_350{50};}
	1705	elsif ($SeqType eq "nuc"){$Time=$NUC_350{50};}
	1706	elsif ($SeqType eq "codon"){$Time=$CODONS_350{50};}
	1707	}
	1708	if (($NumOfSeq>50)and($NumOfSeq<=100))
	1709	{
	1710	if ($SeqType eq "aa"){$Time=$AA_350{100};}
	1711	elsif ($SeqType eq "nuc"){$Time=$NUC_350{100};}
	1712	elsif ($SeqType eq "codon"){$Time=$CODONS_350{100};}
	1713	}
	1714	if (($NumOfSeq>100)and($NumOfSeq<=150))
	1715	{
	1716	if ($SeqType eq "aa"){$Time=$AA_350{150};}
	1717	elsif ($SeqType eq "nuc"){$Time=$NUC_350{150};}
	1718	elsif ($SeqType eq "codon"){$Time=$CODONS_350{150};}
	1719	}
	1720	if (($NumOfSeq>150)and($NumOfSeq<=200))
	1721	{
	1722	if ($SeqType eq "aa"){$Time=$AA_350{200};}
	1723	elsif ($SeqType eq "nuc"){$Time=$NUC_350{200};}
	1724	elsif ($SeqType eq "codon"){$Time=$CODONS_350{200};}
	1725	}
	1726	if (($NumOfSeq>200)and($NumOfSeq<=250))
	1727	{
	1728	if ($SeqType eq "aa"){$Time=$AA_350{2500};}
	1729	elsif ($SeqType eq "nuc"){$Time=$NUC_350{250};}
	1730	elsif ($SeqType eq "codon"){$Time=$CODONS_350{250};}
	1731	}
	1732	if (($NumOfSeq>250)and($NumOfSeq<=300))
	1733	{
	1734	if ($SeqType eq "aa"){$Time=$AA_350{300};}
	1735	elsif ($SeqType eq "nuc"){$Time=$NUC_350{300};}
	1736	elsif ($SeqType eq "codon"){$Time=$CODONS_350{300};}
	1737	}
	1738	if ($NumOfSeq>300)
	1739	{
	1740	if ($SeqType eq "aa"){$Time=$AA_350{300}*2;}
	1741	elsif ($SeqType eq "nuc"){$Time=$NUC_350{300}*2;}
	1742	elsif ($SeqType eq "codon"){$Time=$CODONS_350{300}*3;}
	1743	}
	1744	my $Seq_Factor=$MSA_Length/350;
	1745	print "SeqFactor:$Seq_Factor\n";
	1746	$Time=$Time*$Seq_Factor;
	1747
	1748	if ($IsTree eq "NO")
	1749	{
	1750	$Time=$Time*3;
	1751	}
	1752	if ($IsGamma eq "YES")
	1753	{
	1754	$Time=$Time*18;
	1755	}
	1756	$Time=int($Time);
	1757	print "TIME:$Time\n";#<STDIN>;
	1758	my $RetTime=BIOSEQUENCE_FUNCTIONS::time_in_days_from_minutes($Time);
	1759
	1760
	1761	my @Time=split(/:/,$RetTime);
	1762	my $ElementInTime=@Time;
	1763	if ($ElementInTime==2){$RetTime=$RetTime." minutes";}
	1764	if ($ElementInTime==3){$RetTime=$RetTime." hours";}
	1765	print "TIME:$Time\tRet:$RetTime\n";#<STDIN>;
	1766	return ($RetTime);
	1767
	1768	}
	1769
	1770	sub MSASeqNamesToCode
	1771	{
	1772	my $MSA=shift;
	1773	my $OutDir=shift;
	1774
	1775	my %SeqNamesToCode=();
	1776	my %CodeToSeqName=();
	1777	copy($MSA,$MSA.".ORIG_NAMES");
	1778	open (MSA,$MSA) \|\| exit_on_error("sys_error","MSASeqNamesToCode: Can't open MSA: '$MSA' $!");
	1779	my @MSA=<MSA>;
	1780	close (MSA);
	1781	open (NEW_MSA,">$MSA") \|\| exit_on_error("sys_error","MSASeqNamesToCode: Can't open NEW MSA: '$MSA' for writing $!");
	1782	open (CODES,">$OutDir/SeqCodes") \|\| exit_on_error("sys_error","MSASeqNamesToCode: Can't open SeqCodes '$OutDir/SeqCodes' $!");
	1783	my $SeqCounter=0;
	1784	foreach my $line (@MSA)
	1785	{
	1786	chomp ($line);
	1787	if ($line=~/^>(.*)/)
	1788	{
	1789	my $SeqName=$1;
	1790	my $SeqName_Short=$SeqName;
	1791	my $SeqNameOrig=$SeqName;
	1792	if ((length ($SeqName)>50) and ($SeqName=~/\\|/))
	1793	{
	1794	my @SeqName=split(/\s/,$SeqName);
	1795	$SeqName_Short=$SeqName[0];
	1796	}
	1797	else
	1798	{
	1799	$SeqName_Short=~s/[_\[\]\)\(\s\?\:\-\/\=]+/_/g;
	1800	}
	1801	$SeqCounter++;
	1802	my $SeqCode="S".$SeqCounter;
	1803	$SeqNamesToCode{$SeqNameOrig}=$SeqCode; # Take the orig name a key
	1804	$CodeToSeqName{$SeqCode}{'short'}=$SeqName_Short;
	1805	$CodeToSeqName{$SeqCode}{'full'}=$SeqNameOrig;
	1806	print NEW_MSA ">$SeqCode\n";
	1807	print CODES "$SeqCode\t$CodeToSeqName{$SeqCode}{'short'}\t$CodeToSeqName{$SeqCode}{'full'}\n";
	1808	}
	1809	else
	1810	{
	1811	print NEW_MSA "$line\n";
	1812	}
	1813	}
	1814	close (NEW_MSA);
	1815	return (\%SeqNamesToCode,\%CodeToSeqName);
	1816	}
	1817
	1818	sub TreeNamesToCodes
	1819	{
	1820	my $Tree=shift;
	1821	my $SeqNamesToCode_HashRef=shift;
	1822
	1823	copy($Tree,"$Tree".".ORIG_NAMES");
	1824	open (TREE,$Tree) \|\| exit_on_error("sys_error","TreeNamesToCodes: Can't open Tree: '$Tree' $!");
	1825	my @Tree=<TREE>;
	1826	close (TREE);
	1827	open (TREE,">$Tree") \|\| exit_on_error("sys_error","TreeNamesToCodes: Can't open Tree for writing: '$Tree' $!");
	1828	foreach my $line (@Tree)
	1829	{
	1830	chomp $line;
	1831	my @TreeSplit=split(/,/,$line);
	1832	foreach my $elem (@TreeSplit)
	1833	{
	1834	my @elem=split(/:/,$elem);
	1835	foreach my $elem1 (@elem)
	1836	{
	1837	if ($elem1 =~/([^()]+)/)
	1838	{
	1839	my $TaxID=$1;
	1840	# Comment out Haim 24/2/13 beacuse after we change the name it is no longer match the line one...
	1841	#if ((length ($Tax/ID)>50) and ($TaxID=~/\\|/))
	1842	#{
	1843	# my @TaxID=split(/\s/,$TaxID);
	1844	# $TaxID=$TaxID[0];
	1845	#}
	1846	#else
	1847	#{
	1848	# $TaxID=~s/[_\[\]\)\(\s\?\:\-\/\=]+/_/g;
	1849	#}
	1850	#$TaxID=~s/[_\[\]\)\(\s\?\:\-\/\=]+/_/g;
	1851	# print "$elem1\t$TaxID\n"; # QA
	1852
	1853	if (exists $SeqNamesToCode_HashRef->{$TaxID})
	1854	{
	1855	$line=~s/$TaxID/$SeqNamesToCode_HashRef->{$TaxID}/;
	1856	}
	1857	}
	1858	}
	1859	}
	1860	print TREE "$line";
	1861	}
	1862	print TREE "\n";
	1863	close (TREE);
	1864	}
	1865
	1866
	1867	sub TreeCodesToNames
	1868	{
	1869	my $Tree=shift;
	1870	my $CodeToSeqNames_HashRef=shift;
	1871
	1872	copy($Tree,"$Tree".".CODES");
	1873	open (TREE,$Tree) \|\| exit_on_error("sys_error","TreeCodesToNames:Can't open Tree: '$Tree' $!");
	1874	my @Tree=<TREE>;
	1875	close (TREE);
	1876	open (TREE,">$Tree") \|\| exit_on_error("sys_error","TreeCodesToNames:Can't open Tree for writing: '$Tree' $!");
	1877	foreach my $line (@Tree)
	1878	{
	1879	chomp $line;
	1880	my @TreeSplit=split(/,/,$line);
	1881	foreach my $elem (@TreeSplit)
	1882	{
	1883	my @elem=split(/:/,$elem);
	1884	foreach my $elem1 (@elem)
	1885	{
	1886	if ($elem1 =~/([^()]+)/)
	1887	{
	1888	my $TaxIDCode=$1;
	1889	# print "$elem1\t$TaxIDCode\n"; # QA
	1890
	1891	if (exists $CodeToSeqNames_HashRef->{$TaxIDCode}{'full'})
	1892	{
	1893	$line=~s/$TaxIDCode/$CodeToSeqNames_HashRef->{$TaxIDCode}{'full'}/;
	1894	}
	1895	}
	1896	}
	1897	}
	1898	print TREE "$line";
	1899	}
	1900	print TREE "\n";
	1901	close (TREE);
	1902	}
	1903	sub TreeCodesToNamesShort
	1904	{
	1905	my $Tree=shift;
	1906	my $CodeToSeqNames_HashRef=shift;
	1907
	1908	copy($Tree,"$Tree".".CODES");
	1909	open (TREE,$Tree) \|\| exit_on_error("sys_error","TreeCodesToNames:Can't open Tree: '$Tree' $!");
	1910	my @Tree=<TREE>;
	1911	close (TREE);
	1912	open (TREE,">$Tree") \|\| exit_on_error("sys_error","TreeCodesToNames:Can't open Tree for writing: '$Tree' $!");
	1913	foreach my $line (@Tree)
	1914	{
	1915	chomp $line;
	1916	my @TreeSplit=split(/,/,$line);
	1917	foreach my $elem (@TreeSplit)
	1918	{
	1919	my @elem=split(/:/,$elem);
	1920	foreach my $elem1 (@elem)
	1921	{
	1922	if ($elem1 =~/([^()]+)/)
	1923	{
	1924	my $TaxIDCode=$1;
	1925	# print "$elem1\t$TaxIDCode\n"; # QA
	1926
	1927	if (exists $CodeToSeqNames_HashRef->{$TaxIDCode}{'short'})
	1928	{
	1929	$line=~s/$TaxIDCode/$CodeToSeqNames_HashRef->{$TaxIDCode}{'short'}/;
	1930	}
	1931	}
	1932	}
	1933	}
	1934	print TREE "$line";
	1935	}
	1936	print TREE "\n";
	1937	close (TREE);
	1938	}
	1939	sub MSACodesToNames
	1940	{
	1941	my $MSA=shift;
	1942	my $CodeToSeqNames_HashRef=shift;
	1943	copy($MSA,"$MSA".".CODES");
	1944	open (MSA,$MSA) \|\| exit_on_error("sys_error","MSACodesToNames:Can't open MSA: '$MSA' $!");
	1945	my @MSA=<MSA>;
	1946	close (MSA);
	1947	open (MSA,">$MSA") \|\| exit_on_error("sys_error","MSACodesToNames:Can't open MSA for writing: '$MSA' $!");
	1948	foreach my $line (@MSA)
	1949	{
	1950	chomp ($line);
	1951	if ($line=~/^>(.*)/)
	1952	{
	1953	if (exists $CodeToSeqNames_HashRef->{$1}{'full'})
	1954	{
	1955	print MSA ">$CodeToSeqNames_HashRef->{$1}{'full'}\n";
	1956	}
	1957	else
	1958	{
	1959	print MSA "$line\n";
	1960	}
	1961	}
	1962	else
	1963	{
	1964	print MSA "$line\n";
	1965	}
	1966	}
	1967	close (MSA);
	1968	}
	1969	sub MSACodesToNamesShort
	1970	{
	1971	my $MSA=shift;
	1972	my $CodeToSeqNames_HashRef=shift;
	1973	copy($MSA,"$MSA".".CODES");
	1974	open (MSA,$MSA) \|\| exit_on_error("sys_error","MSACodesToNames:Can't open MSA: '$MSA' $!");
	1975	my @MSA=<MSA>;
	1976	close (MSA);
	1977	open (MSA,">$MSA") \|\| exit_on_error("sys_error","MSACodesToNames:Can't open MSA for writing: '$MSA' $!");
	1978	foreach my $line (@MSA)
	1979	{
	1980	chomp ($line);
	1981	if ($line=~/^>(.*)/)
	1982	{
	1983	if (exists $CodeToSeqNames_HashRef->{$1}{'short'})
	1984	{
	1985	print MSA ">$CodeToSeqNames_HashRef->{$1}{'short'}\n";
	1986	}
	1987	else
	1988	{
	1989	print MSA "$line\n";
	1990	}
	1991	}
	1992	else
	1993	{
	1994	print MSA "$line\n";
	1995	}
	1996	}
	1997	close (MSA);
	1998	}
	1999	sub TabDelFileCodesToNames
	2000	{
	2001	my $File=shift;
	2002	my $Col=shift;
	2003	my $CodeToSeqNames_HashRef=shift;
	2004
	2005	copy($File,"$File".".CODES");
	2006	open (FILE,$File) \|\| exit_on_error("sys_error","TabDelFileCodesToNames:Can't open File: '$File' $!");
	2007	my @File=<FILE>;
	2008	close (FILE);
	2009	open (FILE,">$File") \|\| exit_on_error ("sys_error", "TabDelFileCodesToNames: Can't open File for writing: '$File' $!");
	2010	foreach my $line (@File)
	2011	{
	2012	chomp ($line);
	2013	my @line=split(/\t/,$line);
	2014	if (exists $CodeToSeqNames_HashRef->{$line[$Col]}{'full'})
	2015	{
	2016	$line[$Col]=$CodeToSeqNames_HashRef->{$line[$Col]}{'full'};
	2017	}
	2018	my $line=join("\t",@line);
	2019	print FILE "$line\n";
	2020	}
	2021	close FILE;
	2022	}
	2023
	2024	sub CommaDelFileCodesToNames
	2025	{
	2026	my $File=shift;
	2027	my $Col=shift;
	2028	my $CodeToSeqNames_HashRef=shift;
	2029
	2030	copy($File,"$File".".CODES");
	2031	open (FILE,$File) \|\| exit_on_error("sys_error","CommaDelFileCodesToNames:Can't open File: '$File' $!");
	2032	my @File=<FILE>;
	2033	close (FILE);
	2034	open (FILE,">$File") \|\| exit_on_error ("sys_error", "CommaDelFileCodesToNames: Can't open File for writing: '$File' $!");
	2035	foreach my $line ($File)
	2036	{
	2037	chomp ($line);
	2038	my @line=split(/,/,$line);
	2039	if (exists $CodeToSeqNames_HashRef->{$line[$Col]}{'full'})
	2040	{
	2041	$line[$Col]=$CodeToSeqNames_HashRef->{$line[$Col]}{'full'};
	2042	}
	2043	print FILE join (",",@line),"\n";
	2044	}
	2045	close FILE;
	2046	}
	2047
	2048	sub AncestorFileCodesToNames
	2049	{
	2050	my $File=shift;
	2051	my $CodeToSeqNames_HashRef=shift;
	2052	copy($File,"$File".".CODES");
	2053	open (FILE,$File) \|\| exit_on_error("sys_error","AncestorFileCodesToNames:Can't open File: '$File' $!");
	2054	my @File=<FILE>;
	2055	close (FILE);
	2056	open (FILE,">$File") \|\| exit_on_error ("sys_error", "AncestorFileCodesToNames: Can't open File for writing: '$File' $!");
	2057	foreach my $line (@File)
	2058	{
	2059	chomp ($line);
	2060	my @line=split(/\s+/,$line);
	2061	foreach my $elem (@line)
	2062	{
	2063	if (exists $CodeToSeqNames_HashRef->{$elem}{'full'})
	2064	{
	2065	$line=~s/\b$elem\b/$CodeToSeqNames_HashRef->{$elem}{'full'}/;
	2066	}
	2067	}
	2068	print FILE "$line\n";
	2069	}
	2070	close (FILE);
	2071	}
	2072	sub PrepareJalViewJNLP # Prepare JalView JNLP for Desktop app (not server)
	2073	{
	2074	my $tree=shift;
	2075	my $MSA=shift;
	2076	# my $http=shift;
	2077	my $JalViewJNLP=shift;
	2078	my $Jalview_AnnotFile=shift; # Optional, otherwise NA
	2079	my $JalviewFeaturesFile=shift; # Optional Otherwise NA
	2080	#print "$Jalview_AnnotFile\n$JalviewFeaturesFile\n";
	2081	open (JALVIEW,">$JalViewJNLP") \|\| return ("PrepareJalViewJNLP: Can't open $JalViewJNLP for writing $!");
	2082	print JALVIEW <<JALVIEWDESKTOP;
	2083	<!--
	2084
	2085	If you have downloaded this file after pressing "Launch Full Application" from Jalview on a web page and you don't know what to do with this file, you must install Java from http://www.java.sun.com then try opening this file again.
	2086
	2087	JNLP generated by /jalviewServlet/launchApp
	2088	JNLP generated from http://www.jalview.org/webstart/jalview.jnlp
	2089	Available servlet parameters (please URLEncode):
	2090	open=<alignment file URL>
	2091	jvm-max-heap=heap size in M or G
	2092	features maps to '-features'
	2093	treeFile maps to '-tree'
	2094	tree maps to '-tree'
	2095	annotations maps to '-annotations'
	2096	colour maps to '-colour'
	2097
	2098
	2099	-->
	2100	<?xml version="1.0" encoding="UTF-8"?>
	2101	<jnlp spec="1.0+" codebase="http://www.jalview.org/webstart">
	2102	<information>
	2103	<title>Jalview</title>
	2104	<vendor>The Barton Group</vendor>
	2105	<homepage href="http://www.jalview.org"/>
	2106	<description>Jalview Multiple Alignment Editor</description>
	2107	<description kind="short">Jalview</description>
	2108	<icon href="JalviewLogo_big.png"/>
	2109	<offline-allowed/>
	2110	<association mime-type="application-x/ext-file" extensions="fa"/>
	2111	<association mime-type="application-x/ext-file" extensions="fasta"/>
	2112	<association mime-type="application-x/ext-file" extensions="fastq"/>
	2113	<association mime-type="application-x/ext-file" extensions="blc"/>
	2114	<association mime-type="application-x/ext-file" extensions="msf"/>
	2115	<association mime-type="application-x/ext-file" extensions="pfam"/>
	2116	<association mime-type="application-x/ext-file" extensions="aln"/>
	2117	<association mime-type="application-x/ext-file" extensions="pir"/>
	2118	<association mime-type="application-x/ext-file" extensions="amsa"/>
	2119	<association mime-type="application-x/ext-file" extensions="stk"/>
	2120	<association mime-type="application-x/ext-file" extensions="jar"/>
	2121	</information>
	2122	<security>
	2123	<all-permissions/>
	2124	</security>
	2125	<resources>
	2126	<j2se version="1.6+" initial-heap-size="10M"/>
	2127	<jar href="jalview.jar"/>
	2128	<jar href="JGoogleAnalytics_0.3.jar"/>
	2129	<jar href="Jmol-12.2.4.jar"/>
	2130	<jar href="VARNAv3-9-dev.jar"/>
	2131	<jar href="activation.jar"/>
	2132	<jar href="apache-mime4j-0.6.jar"/>
	2133	<jar href="axis.jar"/>
	2134	<jar href="castor-1.1-cycle-xml.jar"/>
	2135	<jar href="commons-codec-1.3.jar"/>
	2136	<jar href="commons-discovery.jar"/>
	2137	<jar href="commons-logging-1.1.1.jar"/>
	2138	<jar href="groovy-all-1.8.2.jar"/>
	2139	<jar href="httpclient-4.0.3.jar"/>
	2140	<jar href="httpcore-4.0.1.jar"/>
	2141	<jar href="httpmime-4.0.3.jar"/>
	2142	<jar href="jaxrpc.jar"/>
	2143	<jar href="jdas-1.0.4.jar"/>
	2144	<jar href="jhall.jar"/>
	2145	<jar href="jswingreader-0.3.jar"/>
	2146	<jar href="log4j-1.2.8.jar"/>
	2147	<jar href="mail.jar"/>
	2148	<jar href="miglayout-4.0-swing.jar"/>
	2149	<jar href="min-jaba-client-2.0.jar"/>
	2150	<jar href="regex.jar"/>
	2151	<jar href="saaj.jar"/>
	2152	<jar href="spring-core-3.0.5.RELEASE.jar"/>
	2153	<jar href="spring-web-3.0.5.RELEASE.jar"/>
	2154	<jar href="vamsas-client.jar"/>
	2155	<jar href="wsdl4j.jar"/>
	2156	<jar href="xercesImpl.jar"/>
	2157	<jar href="xml-apis.jar"/>
	2158	<property name="jalview.version" value="2.8"/>
	2159	</resources>
	2160	<application-desc main-class="jalview.bin.Jalview">
	2161
	2162	JALVIEWDESKTOP
	2163
	2164
	2165
	2166	print JALVIEW "<argument>-open</argument>\n";
	2167	print JALVIEW "<argument>$MSA</argument>\n";
	2168	if ($JalviewFeaturesFile ne "NA")
	2169	{
	2170
	2171	print JALVIEW "<argument>-features</argument>\n";
	2172	print JALVIEW "<argument>$JalviewFeaturesFile</argument>\n";
	2173	}
	2174	if ($Jalview_AnnotFile ne "NA")
	2175	{
	2176	print JALVIEW "<argument>-annotations</argument>\n";
	2177	print JALVIEW "<argument>$Jalview_AnnotFile</argument>\n"
	2178	}
	2179	print JALVIEW "<argument>-tree</argument>\n";
	2180	print JALVIEW "<argument>$tree</argument>\n";
	2181	close (JALVIEW);
	2182	return ("OK");
	2183
	2184	}

+752

-0

www/fastml/IndelReconstruction_Wrapper.pl less more

	0	use strict;
	1	use Getopt::Long;
	2	use FindBin qw($Bin); # www/FastML_2012/
	3	use File::Copy;
	4	die "USAGE: --MSA_File <MSA_File> --Tree_File <Tree_File> --outDir <outDir> --seqType <aa\|nuc\|codon>
	5	Optional parameters:
	6	--indelCutOff <Cutoff for indel vs Char> deafult =0.5
	7
	8	--CharsMarginalProb <ProbFile> (deafult=prob.marginal.txt) - prob of ancestral sequences - FASTML output
	9
	10	--ML_GapOut <Indels_ML_Prob> # (deafult: IndelsMarginalProb.txt - IndelsMarginalProb (IndelReconstructOutput)
	11	--ML_Ancestral_MSA <Ancestal_ML_MSA> # (deafult: seq.marginal_IndelAndChars.txt) - output for Chars and Gap Ancestral Reconstruction - MSA;
	12	--ML_Chars_ML_Gap <AncestralProb> # (deafult: AncestralMaxMarginalProb_Char_Indel.txt) - File with the max prob of each position on each node
	13
	14	--MP_GapOut <Indels_MP_State> # (deafult: Indels.parsimony) - Indel Satate for each MSA pos by parsimony
	15	--ML_Char_MP_Gap <ML_Char_MP_indels> # (deafult: AncestralMaxProbMarginal_Char_Parsimony_Indel.txt) - File with the max prob char of each position on each node and indel parsimony
	16	--Ancestral_MSA_MP_GAP <MSA_MP_Gap> # (deafult: seq.marginal_Chars_ParsimonyIndels.txt) - MSA Output for Chars and Parsimonuis Gap Ancestral Reconstruction;
	17
	18	--Debug # (deafult: off) printouts debug info
	19	" unless (@ARGV >= 1);
	20
	21	# Assign default
	22	my ($MSA_File,$OutDir,$Tree_File,$IndelsCutoff,$SeqType,$MarginalProb_of_Chars,$GapProb_OutFile,$Ancestral_MSA,$Ancestral_Prob,$GapParsimony_OutFile,$Ancestral_Prob_ParsimonyIndel,$Ancestral_MSA_Parsimony,$DEBUG_F,);
	23	$MSA_File="";
	24	$OutDir="";
	25	$Tree_File="";
	26
	27	$IndelsCutoff=0.5;
	28
	29	my $getoptResult = GetOptions ("MSA_File=s"=>\$MSA_File, # = means that this parameter is required, s means string
	30	"outDir=s"=>\$OutDir,
	31	"Tree_File=s"=>\$Tree_File,
	32	"seqType=s"=>\$SeqType, # aa\|nuc\|codon
	33	"indelCutOff:f"=>\$IndelsCutoff,
	34
	35	"CharsMarginalProb:s"=>\$MarginalProb_of_Chars, # (prob.marginal.txt) - prob of ancestral sequences - FASTML output
	36
	37	"ML_GapOut:s"=>\$GapProb_OutFile, # (IndelsMarginalProb.txt) - IndelsMarginalProb (IndelReconstructOutput)
	38	"ML_Ancestral_MSA:s"=>\$Ancestral_MSA, # (seq.marginal_IndelAndChars.txt) - output for Chars and Gap Ancestral Reconstruction - MSA;
	39	"ML_Chars_ML_Gap:s"=>\$Ancestral_Prob, # (Ancestral_MaxMarginalProb_Char_Indel.txt) - File with the max prob of each position on each node
	40
	41	"MP_GapOut:s"=>\$GapParsimony_OutFile, # (Indels.parsimony.txt) - Indel Satate for each MSA pos by parsimony
	42	"ML_Char_MP_Gap:s"=>\$Ancestral_Prob_ParsimonyIndel, # (Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt) - File with the max prob char of each position on each node and indel parsimony
	43	"Ancestral_MSA_MP_GAP:s"=>\$Ancestral_MSA_Parsimony, # (seq.marginal_Chars_ParsimonyIndels.txt) - MSA Output for Chars and Parsimonuis Gap Ancestral Reconstruction;
	44
	45	"Debug" =>\$DEBUG_F,
	46	);
	47
	48	# default file names
	49	if ($OutDir!~/\/$/) {$OutDir.="/";}
	50	$GapProb_OutFile=$OutDir."IndelsMarginalProb.txt" if ((!defined $GapProb_OutFile) or ($GapProb_OutFile eq ""));
	51	$MarginalProb_of_Chars=$OutDir."prob.marginal.txt" if ((!defined $MarginalProb_of_Chars) or ($MarginalProb_of_Chars eq ""));
	52	$Ancestral_MSA=$OutDir."seq.marginal_IndelAndChars.txt" if ((!defined $Ancestral_MSA) or ($Ancestral_MSA eq ""));
	53	$Ancestral_Prob=$OutDir."Ancestral_MaxMarginalProb_Char_Indel.txt" if ((!defined $Ancestral_Prob) or ($Ancestral_Prob eq ""));
	54
	55	# default file names for PARSIMONY BASED OUTPUT
	56	$GapParsimony_OutFile=$OutDir."Indels.parsimony.txt" if ((!defined $GapParsimony_OutFile) or ($GapParsimony_OutFile eq "")); # Indel Satate for each MSA pos by parsimony
	57	$Ancestral_Prob_ParsimonyIndel=$OutDir."Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt" if ((!defined $Ancestral_Prob_ParsimonyIndel) or ($Ancestral_Prob_ParsimonyIndel eq "")); # File with the max prob char of each position on each node and indel parsimony
	58	$Ancestral_MSA_Parsimony=$OutDir."seq.marginal_Chars_ParsimonyIndels.txt" if ((!defined $Ancestral_MSA_Parsimony) or ($Ancestral_MSA_Parsimony eq "")); # Output for parsimony Chars and Gap Ancestral Reconstruction;
	59
	60	my $DEBUG="NO";
	61	$DEBUG="YES" if ($DEBUG_F);
	62
	63	print "
	64	--MSA_File=$MSA_File
	65	--outDir=$OutDir
	66	--Tree_File=$Tree_File
	67	--seqType=$SeqType
	68	--indelCutOff=$IndelsCutoff
	69
	70	--CharsMarginalProb=$MarginalProb_of_Chars
	71
	72	--ML_GapOut=$GapProb_OutFile
	73	--ML_Ancestral_MSA=$Ancestral_MSA
	74	--ML_Chars_ML_Gap=$Ancestral_Prob
	75
	76	--MP_GapOut=$GapParsimony_OutFile
	77	--ML_Char_MP_Gap=$Ancestral_Prob_ParsimonyIndel
	78	--Ancestral_MSA_MP_GAP=$Ancestral_MSA_Parsimony
	79
	80	--Debug=$DEBUG\n";
	81
	82	#print "WAIT...\n";<STDIN>;
	83
	84
	85	# Constants
	86	my $ParsimonyCostMatrix=2;
	87	my $MSA_Prefix_Name="";
	88	if ($MSA_File=~/([^\/]+?)(.aln\|.faa\|.mfa\|.txt)?$/)
	89	{
	90	$MSA_Prefix_Name=$1;
	91	}
	92	else
	93	{
	94	$MSA_Prefix_Name=$MSA_File;
	95	}
	96	$DEBUG=uc($DEBUG);
	97	if (!defined $DEBUG)
	98	{
	99	$DEBUG="NO";
	100	}
	101
	102	# Programs Path
	103	#my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder";
	104	#my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder.V1.6";
	105	#my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder.V1.71";
	106	my $IndelCoder="$Bin/../../programs/indelCoder/indelCoder";
	107	#my $IndelReconstruction="/bioseq/FastML/IndelReconstruction/gainLoss.V9.9822"; # by gainLoss
	108	#my $IndelReconstruction="/bioseq/FastML/IndelReconstruction/gainLoss.V9.9863"; # by gainLoss
	109	my $IndelReconstruction="$Bin/../../programs/gainLoss/gainLoss"; # by gainLoss
	110
	111	# Globals File Names
	112	$OutDir=$OutDir."/" if ($OutDir!~/\/$/);
	113	my $Indels_Reconstruction_results_Dir=$OutDir."IndelsReconstruction/";
	114	# IndelCoder
	115	my $IndelCoderParamFile="IndelCoderParamFile";
	116	my $indelOutputFastaFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indelOutputFastaFile";
	117	my $indelOutputInfoFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indelOutputInfoFile";
	118	my $nexusFileName="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indel_nexusFile";
	119	my $indelLogFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name"."IndelCoder.log";
	120
	121	# Indel Reconstruction
	122	my $IndelReconstructionParamFile="IndelReconstructionParamFile";
	123	#my $indelOutputFasta_NO_MISSING_DATA_File="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name"."_MISING_DATA_TO0.indelOutputFastaFile"; # For now gainLoss don't handle missing data so we replace '?' with 0
	124	my $AncestralReconstructIndelPosterior="$Indels_Reconstruction_results_Dir/RESULTS/AncestralReconstructPosterior.txt"; # The file with ancestral prob of indel
	125	my $AncestralReconstructParsimony="$Indels_Reconstruction_results_Dir/RESULTS/gainLossMP.".$ParsimonyCostMatrix.".AncestralReconstructSankoff.txt";
	126	# Joint character based Ancestral MSA with Indel Reconstruction
	127
	128	mkdir ($Indels_Reconstruction_results_Dir);
	129
	130	my %Species_On_MSA=(); # All species in the MSA - MAYBE TO REMOVE
	131	open (MSA,$MSA_File);
	132	while (my $line=<MSA>)
	133	{
	134	chomp ($line);
	135	if ($line=~/^>(.*)/)
	136	{
	137	$Species_On_MSA{$1}=1;
	138	}
	139	}
	140	# Read MSA to Hash
	141	my $MSA_Hash_ref=readMSA($MSA_File);
	142	my %MSA_Hash=%{$MSA_Hash_ref};
	143
	144	# Prepare indel Coder ParamFile
	145	open (INDEL_CODER_PARAMS,">$Indels_Reconstruction_results_Dir$IndelCoderParamFile") \|\| die "IndelReconstruction_Wrapper: Can't open IndelCoderParamFile '$Indels_Reconstruction_results_Dir$IndelCoderParamFile' $!";
	146	print INDEL_CODER_PARAMS "_seqFile $MSA_File\n";
	147	print INDEL_CODER_PARAMS "_indelOutputInfoFile $indelOutputInfoFile\n";
	148	print INDEL_CODER_PARAMS "_indelOutputFastaFile $indelOutputFastaFile\n";
	149	print INDEL_CODER_PARAMS "_nexusFileName $nexusFileName\n";
	150	print INDEL_CODER_PARAMS "_logFile $indelLogFile\n";
	151	print INDEL_CODER_PARAMS "_logValue 9\n";
	152	print INDEL_CODER_PARAMS "_codingType SIC\n";
	153	print INDEL_CODER_PARAMS "_isOmitLeadingAndEndingGaps 0\n";
	154
	155	close (INDEL_CODER_PARAMS);
	156
	157	system ("cd $Indels_Reconstruction_results_Dir; $IndelCoder $IndelCoderParamFile");
	158
	159	if (!-e $indelOutputFastaFile)
	160	{
	161	die "IndelReconstruction_Wrapper: $indelOutputFastaFile was not created or empty, please have a look on the indel coder log file at: $indelLogFile";
	162	}
	163
	164	# Run indelReconstruction by gainLoss
	165	my $removed_BP_InternalNodeName=remove_InternalNodeName_or_BPvalues($Tree_File,$Tree_File.".Orig");
	166	copy ($Tree_File,"$Tree_File.ForIndelReconstruction");
	167	move ("$Tree_File.Orig",$Tree_File) if (-e "$Tree_File.Orig");
	168	open (INDEL_RECONSTRUCTION_PARAMS,">$Indels_Reconstruction_results_Dir$IndelReconstructionParamFile") \|\| die "Can't open IndelReconstructionParamFile '$Indels_Reconstruction_results_Dir$IndelReconstructionParamFile' $!";
	169	print INDEL_RECONSTRUCTION_PARAMS "_seqFile $indelOutputFastaFile\n";
	170	print INDEL_RECONSTRUCTION_PARAMS "_treeFile $Tree_File.ForIndelReconstruction\n";
	171	print INDEL_RECONSTRUCTION_PARAMS "_isRootFreqEQstationary 1\n";
	172	print INDEL_RECONSTRUCTION_PARAMS "_calculateAncestralReconstruct 1\n";
	173	print INDEL_RECONSTRUCTION_PARAMS "_costMatrixGainLossRatio 2\n";
	174	print INDEL_RECONSTRUCTION_PARAMS "_minNumOfOnes 1\n";
	175	close (INDEL_RECONSTRUCTION_PARAMS);
	176	system ("cd $Indels_Reconstruction_results_Dir; $IndelReconstruction $IndelReconstructionParamFile");
	177	my %MSA_Pos_Species_to_Indel=();
	178	my %MSAtoIndel=();
	179	my ($MSA_Pos_Species_to_Indel,$MSAtoIndel)=Read_MSA_to_Indels_Info($indelOutputInfoFile,\%MSA_Pos_Species_to_Indel,\%MSAtoIndel); # hash1 - key1:MSA_Pos,key2:species; value:IndelMSAPos; hash2 - key: MSA_Pos;value: IndelsMSA_Pos (array)
	180	my %AncestralReconstructIndelPosterior_Hash=();
	181	my $AncestralReconstructIndelPosterior_Reff=Read_Ancestral_Prob_For_Indel($AncestralReconstructIndelPosterior,\%AncestralReconstructIndelPosterior_Hash); # hash = key1:IndelMSA_Pos,key2:species; value Prob for indel
	182
	183	####### HADLE WITH PROB RECONSTRUCTION
	184	%AncestralReconstructIndelPosterior_Hash=%$AncestralReconstructIndelPosterior_Reff;
	185	my %MSA_Pos_Species_AncestorIndelProb=(); # Will hold for each MSA_Pos and Species the vector of IndelPos_ProbOfIndel
	186	print "HADLE WITH PROB RECONSTRUCTION LOOP\n====================================================\n" if ($DEBUG eq "YES");
	187	## MAKE UNIQ
	188	print "+++++++++++++++++DEBUG - PRINT INDEL POS TO INDEL NOT UNIQ ++++++++++++++++++++++\n" if ($DEBUG eq "YES");
	189	foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel)
	190	{
	191	print "MSA:$MSA_Pos\t",join(",",@{$MSAtoIndel->{$MSA_Pos}}),"\n" if ($DEBUG eq "YES");
	192	my $tmp_array=uniq_array($MSAtoIndel->{$MSA_Pos});
	193	$MSAtoIndel->{$MSA_Pos}=[@{$tmp_array}];
	194	}
	195	print "+++++++++++++++++DEBUG - PRINT INDEL POS TO INDEL UNIQ +++++++++++++++++++++++++\n" if ($DEBUG eq "YES");
	196	foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel)
	197	{
	198	print "MSA:$MSA_Pos\t",join(",",@{$MSAtoIndel->{$MSA_Pos}}),"\n" if ($DEBUG eq "YES");
	199	}
	200	print "+++++++++++++++++ END DEBUG ++++++++++++++++++++++++\n" if ($DEBUG eq "YES");
	201
	202	foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel)
	203	{
	204	print "MSA:$MSA_Pos," if ($DEBUG eq "YES"); # DEBUG
	205	foreach my $IndelPos (@{$MSAtoIndel->{$MSA_Pos}})
	206	{
	207	print "Indel:$IndelPos - $AncestralReconstructIndelPosterior_Hash{$IndelPos}" if ($DEBUG eq "YES"); # empty
	208	foreach my $species (keys %{$AncestralReconstructIndelPosterior_Hash{$IndelPos}})
	209	{
	210	if (!exists $Species_On_MSA{$species}) # Ancestral Node # CONSIDER REMOVE
	211	{
	212	my $IndelPos_ProbOfIndel=$IndelPos."_".$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species};
	213	if (!exists $MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}){$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}=[$IndelPos_ProbOfIndel];}
	214	else {push @{$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}},$IndelPos_ProbOfIndel;}
	215	print "$MSA_Pos\t$IndelPos\t$species\t$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species}\n" if ($DEBUG eq "YES"); # DEBUG
	216	}
	217	}
	218	}
	219	}
	220	open (GAP_PROB,">$GapProb_OutFile") \|\| die "Can't open '$GapProb_OutFile' $!";
	221	print GAP_PROB "Pos\tNode\tProb_Of_Indel\n";
	222	my %MSA_Pos_Node_MaxProbOf_Gap=();
	223	foreach my $MSA_Pos (sort {$a<=>$b} keys %MSA_Pos_Species_AncestorIndelProb)
	224	{
	225	foreach my $species (sort keys %{$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}})
	226	{
	227	if (!exists $Species_On_MSA{$species}) # Ancestral Node # CONSIDER REMOVE
	228	{
	229	print "$MSA_Pos\t$species" if ($DEBUG eq "YES");
	230	print GAP_PROB "$MSA_Pos\t$species";
	231	my $Uniq_Indels_Reff=uniq_array($MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species});
	232	my @Uniq_Indels=@$Uniq_Indels_Reff;
	233	my $NumOfIndelCoverMSA_Pos=@Uniq_Indels;
	234	my @ProbsOfIndel;
	235	for (my $i=0;$i<$NumOfIndelCoverMSA_Pos;$i++)
	236	{
	237	my $Indel_IndelProb=$Uniq_Indels[$i];
	238	my ($Indel_Pos,$IndelProb)=split("_",$Indel_IndelProb);
	239	print "\t$Indel_Pos:$IndelProb" if ($DEBUG eq "YES");
	240	push (@ProbsOfIndel,$IndelProb);
	241	}
	242	my $maxProbOfIndel = (sort { $b <=> $a } @ProbsOfIndel)[0];
	243	print "\tMAX:$maxProbOfIndel\n" if ($DEBUG eq "YES");
	244	print GAP_PROB "\t$maxProbOfIndel\n";
	245	$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$species}=$maxProbOfIndel;
	246	}
	247	}
	248	}
	249	close (GAP_PROB);
	250	my %MSA_Pos_Node_Char_or_Gap=();
	251	# Read the Chars Marginal Prob
	252	my ($MSA_Pos_Node_Char_Marginal_Prob_Reff,$Nodes_Name_Reff,$MSA_Length)=Read_Char_Marginal_Prob($MarginalProb_of_Chars);
	253	print "MSA_Length:$MSA_Length\n" if ($DEBUG eq "YES");
	254	my @Nodes=@$Nodes_Name_Reff;
	255	open (ANCESTRAL_PROB,">$Ancestral_Prob")\|\| die "Can't open Ancestral Prob File: '$Ancestral_Prob' $!\n";
	256	print ANCESTRAL_PROB "Pos_on_MSA\tNode\tChar\tCharProb\n";
	257	foreach my $MSA_Pos (sort {$a<=>$b} keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff})
	258	{
	259	print "MSA:$MSA_Pos\n" if ($DEBUG eq "YES");
	260	foreach my $Node (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}})
	261	{
	262	my $maxProbChar="NA";
	263	my $maxProb=0;
	264	my $Num_Of_1=0;
	265	foreach my $Char (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}})
	266	{
	267
	268	if (($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}>$maxProb)&&(defined $MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}))
	269	{
	270	$maxProbChar=$Char;
	271	$maxProb=$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char};
	272	}
	273	$Num_Of_1++ if ($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}==1);
	274	}
	275
	276	# Decide what is the most probable char on pos
	277	if ($Num_Of_1>1) # GAP
	278	{
	279	if ($SeqType eq "codon")
	280	{
	281	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":1";
	282	}
	283	else
	284	{
	285	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="-".":1";
	286	}
	287
	288	$maxProbChar="NA";
	289	$maxProb=0;
	290	}
	291	else
	292	{
	293	if (!exists $MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}){$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}="NA";}
	294	print "NODE:$Node - $maxProbChar:$maxProb ? -:$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}\n" if ($DEBUG eq "YES");#<STDIN>; # DEBUG
	295	if (($SeqType eq "aa") or ($SeqType eq "nuc"))
	296	{
	297	if ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node} eq "NA")
	298	{
	299	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	300	}
	301	# elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR
	302	#elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}<(1-$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node})) # MOST PROBALBE IS THE CHAR
	303	elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR
	304	{
	305	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	306	}
	307	else
	308	{
	309	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="-".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node};
	310	#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node} if ($SeqType eq "codon");
	311	}
	312	}
	313	elsif ($SeqType eq "codon")
	314	{
	315	# MSA Pos is according to the codon number (i.e ((MSA_Pos-1)/3)+1)
	316	my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA
	317	if (!exists $MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}){$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}="NA";}
	318	if ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node} eq "NA")
	319	{
	320	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	321	}
	322	# elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR
	323	#elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}<(1-$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node})) # MOST PROBALBE IS THE CHAR
	324	elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR
	325	{
	326	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	327	}
	328	else
	329	{
	330	$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node};
	331	}
	332	}
	333	}
	334	my ($CharForPrint,$ProbForPrint)=split(/:/,$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node});
	335	if ($SeqType eq "codon")
	336	{
	337	my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char
	338	print ANCESTRAL_PROB "$MSA_Pos_GAP\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n";
	339	}
	340	else
	341	{
	342	print ANCESTRAL_PROB "$MSA_Pos\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n";
	343	}
	344	}
	345	}
	346
	347
	348	### PRINT THE GAP and CHAR Ancestral MSA
	349	open (MSA_OUT,">$Ancestral_MSA") \|\| die "Can't open Output MSA: '$Ancestral_MSA' $!\n";
	350	foreach my $Node (@Nodes)
	351	{
	352	if (exists $MSA_Hash{$Node}) # Original sequence
	353	{
	354	print MSA_OUT ">$Node\n";
	355	print MSA_OUT "$MSA_Hash{$Node}\n";
	356	}
	357	else # Ancestral seq
	358	{
	359	print MSA_OUT ">$Node\n";
	360	for (my $i=1;$i<=$MSA_Length;$i++)
	361	{
	362	my ($Char,$Prob)=split(":",$MSA_Pos_Node_Char_or_Gap{$i}{$Node});
	363	print MSA_OUT $Char;
	364	}
	365	print MSA_OUT "\n";
	366	}
	367	}
	368	### TO HERE
	369
	370	# For Parsimony (COPY OF THE CODE ABOVE...) TO DO: CHANGE IT SOME DAY...
	371	my %AncestralReconstructIndelParsimony_Hash=();
	372	my $AncestralReconstructIndelParsimony_Reff=Read_Ancestral_Parsimony_State($AncestralReconstructParsimony,\%AncestralReconstructIndelParsimony_Hash); # hash = key1:IndelMSA_Pos,key2:species; value 1 for indel 0 for char
	373	%AncestralReconstructIndelParsimony_Hash=%$AncestralReconstructIndelParsimony_Reff;
	374
	375	my %MSA_Pos_Species_AncestorIndelParsimony=(); # Will hold for each MSA_Pos and Species the vector of IndelPos_ProbOfIndel
	376	foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel)
	377	{
	378	# print "MSA:$MSA_Pos,";
	379	foreach my $IndelPos (@{$MSAtoIndel->{$MSA_Pos}})
	380	{
	381	# print "Indel:$IndelPos - $AncestralReconstructIndelPosterior_Hash{$IndelPos}"; # empty
	382	foreach my $species (keys %{$AncestralReconstructIndelParsimony_Hash{$IndelPos}})
	383	{
	384	my $IndelPos_ProbOfIndel=$IndelPos."_".$AncestralReconstructIndelParsimony_Hash{$IndelPos}{$species};
	385	if (!exists $MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}){$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}=[$IndelPos_ProbOfIndel];}
	386	else {push @{$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}},$IndelPos_ProbOfIndel;}
	387	# print "$MSA_Pos\t$IndelPos\t$species\t$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species}\n";
	388	}
	389	}
	390	}
	391	open (GAP_PARSIMONY,">$GapParsimony_OutFile") \|\| die "Can't open '$GapProb_OutFile' $!";
	392	print GAP_PARSIMONY "Pos\tNode\tGap\n";
	393	my %MSA_Pos_Node_ParsimonyOf_Gap=();
	394	foreach my $MSA_Pos (sort {$a<=>$b} keys %MSA_Pos_Species_AncestorIndelParsimony)
	395	{
	396	foreach my $species (sort keys %{$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}})
	397	{
	398	print "$MSA_Pos\t$species" if ($DEBUG eq "YES");
	399	print GAP_PARSIMONY "$MSA_Pos\t$species" if ($species=~/^N\d+$/); # print only ancestral nodes
	400	my $Uniq_Indels_Reff=uniq_array($MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species});
	401	my @Uniq_Indels=@$Uniq_Indels_Reff;
	402	my $NumOfIndelCoverMSA_Pos=@Uniq_Indels;
	403	my @ParsimonyOfIndel;
	404	for (my $i=0;$i<$NumOfIndelCoverMSA_Pos;$i++)
	405	{
	406	my $Indel_IndelParsimony=$Uniq_Indels[$i];
	407	my ($Indel_Pos,$IndelParsimony)=split("_",$Indel_IndelParsimony);
	408	print "\t$Indel_Pos:$IndelParsimony" if ($DEBUG eq "YES");
	409	push (@ParsimonyOfIndel,$IndelParsimony);
	410	}
	411	# my $minProbOfIndel = (sort { $a <=> $b } @ParsimonyOfIndel)[0]; # WE GAVE PRIORITY TO CHAR (used when we had old (<=1.71) indelCoder)
	412	# print "\tMAX:$minProbOfIndel\n" if ($DEBUG eq "YES");
	413	# print GAP_PARSIMONY "\t$minProbOfIndel\n";
	414	# $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$species}=$minProbOfIndel;
	415	my $maxProbOfIndel = (sort { $b <=> $a } @ParsimonyOfIndel)[0];
	416	print "\tMAX:$maxProbOfIndel\n" if ($DEBUG eq "YES");
	417	print GAP_PARSIMONY "\t$maxProbOfIndel\n" if ($species=~/^N\d+$/); # print only ancestral nodes;
	418	$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$species}=$maxProbOfIndel;
	419	}
	420	}
	421	close (GAP_PARSIMONY);
	422	my %MSA_Pos_Node_Char_or_Gap_Parsimony=();
	423	open (ANCESTRAL_PROB_PARSIMONY_INDEL,">$Ancestral_Prob_ParsimonyIndel")\|\| die "IndelReconstruction_Wrapper::Can't open Ancestral Prob Parsimony Indel File: '$Ancestral_Prob_ParsimonyIndel' $!\n";
	424	print ANCESTRAL_PROB_PARSIMONY_INDEL "Pos_on_MSA\tNode\tChar\tCharProb\n";
	425	foreach my $MSA_Pos (sort {$a<=>$b} keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff})
	426	{
	427	print "MSA:$MSA_Pos\n" if ($DEBUG eq "YES");
	428	foreach my $Node (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}})
	429	{
	430	my $maxProbChar="NA";
	431	my $maxProb=0;
	432	my $Num_Of_1=0;
	433	foreach my $Char (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}})
	434	{
	435
	436	if (($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}>$maxProb)&&(defined $MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}))
	437	{
	438	$maxProbChar=$Char;
	439	$maxProb=$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char};
	440	}
	441	$Num_Of_1++ if ($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}==1);
	442	}
	443	# Decide what is the most probable char on pos
	444	if ($Num_Of_1>1) # GAP ON ORIGINAL SEQ (NOT ANCESTRAL)
	445	{
	446	if ($SeqType eq "codon")
	447	{
	448	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":1";
	449	}
	450	else
	451	{
	452	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="-".":1";
	453	}
	454	$maxProbChar="NA";
	455	$maxProb=0;
	456	}
	457	else
	458	{
	459
	460	if (($SeqType eq "aa") or ($SeqType eq "nuc"))
	461	{
	462	# print "NODE:$Node - $maxProbChar:$maxProb ? -:$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}\n";#<STDIN>;
	463	if (!exists $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node})
	464	{
	465	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	466	}
	467	# elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR
	468	elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node}==0) # NO GAP BY PARSIMONY - MOST PROBALBE IS THE CHAR
	469	{
	470	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	471	}
	472	elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node}==1)
	473	{
	474	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="-".":"."1";
	475	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":"."1" if ($SeqType eq "codon");
	476	}
	477	}
	478	elsif ($SeqType eq "codon")
	479	{
	480	# MSA Pos is according to the codon number (i.e ((MSA_Pos-1)/3)+1)
	481	my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA
	482	if (!exists $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}){$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}="NA";}
	483	if ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node} eq "NA")
	484	{
	485	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	486	}
	487	# elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR
	488	#elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}<(1-$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node})) # MOST PROBALBE IS THE CHAR
	489	elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR
	490	{
	491	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb;
	492	}
	493	else
	494	{
	495	$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node};
	496	}
	497	}
	498	}
	499	my ($CharForPrint,$ProbForPrint)=split(/:/,$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node});
	500	if ($SeqType eq "codon")
	501	{
	502	my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA
	503	print ANCESTRAL_PROB_PARSIMONY_INDEL "$MSA_Pos_GAP\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n";
	504	}
	505	else
	506	{
	507	print ANCESTRAL_PROB_PARSIMONY_INDEL "$MSA_Pos\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n";
	508	}
	509	}
	510	}
	511
	512
	513	### PRINT THE GAP and CHAR Ancestral MSA
	514	open (MSA_OUT_PARSIMONY,">$Ancestral_MSA_Parsimony") \|\| die "Can't open Output MSA PARSIMONY : '$Ancestral_MSA_Parsimony' $!\n";
	515	foreach my $Node (@Nodes)
	516	{
	517	if (exists $MSA_Hash{$Node}) # Original sequence
	518	{
	519	print MSA_OUT_PARSIMONY ">$Node\n";
	520	print MSA_OUT_PARSIMONY "$MSA_Hash{$Node}\n";
	521	}
	522	else
	523	{
	524	print MSA_OUT_PARSIMONY ">$Node\n";
	525	for (my $i=1;$i<=$MSA_Length;$i++)
	526	{
	527	my ($Char,$Prob)=split(":",$MSA_Pos_Node_Char_or_Gap_Parsimony{$i}{$Node});
	528	print MSA_OUT_PARSIMONY $Char;
	529	}
	530	print MSA_OUT_PARSIMONY "\n";
	531	}
	532	}
	533	close (MSA_OUT_PARSIMONY);
	534
	535
	536	sub Read_MSA_to_Indels_Info
	537	# Will create an hash that map each position on the MSA to the translated indel (or indels)
	538	{
	539	#character number: 0
	540	#Start position relative to MSA: 0
	541	#End position relative to MSA: 1
	542	#Length: 1
	543	#Found in species: DQ373066.PTT Start position relative to genome: 0 Length: 1
	544	#ENDCHARACTER
	545
	546	print "MAPPING MSA POS TO INDEL\n==============================================================\n" if ($DEBUG eq "YES");
	547	my $IndelInfo=shift;
	548	my $MSA_Pos_Species_to_Indel_Reff=shift;
	549	my $MSAtoIndel_Reff=shift;
	550	my %MSA_Pos_Species_to_Indel=%$MSA_Pos_Species_to_Indel_Reff;
	551	my %MSAtoIndel=%$MSAtoIndel_Reff;
	552	open (INDELS,$IndelInfo) \|\| die "Can't open IndelInfo File: '$IndelInfo' $!";
	553	my $IndelPos="";
	554	my $MSA_Pos="";
	555	my $Length="";
	556	while (my $line=<INDELS>)
	557	{
	558	chomp ($line);
	559	if ($line=~/character number: ([0-9]+)/)
	560	{
	561	$IndelPos=$1+1; # Indel Pos start from 0
	562	}
	563	elsif ($line =~/Start position relative to MSA: ([0-9]+)/)
	564	{
	565	$MSA_Pos=$1+1; # MSA Pos start from 0
	566	}
	567	elsif ($line=~/Found in species: (.*?) Start position relative to genome: ([0-9]+) Length: ([0-9]+)/)
	568	{
	569	my $Species=$1;
	570	my $length=$3;
	571	for (my $i=0;$i<$length;$i++)
	572	{
	573	my $tmpPosOnMSA=$MSA_Pos+$i;
	574	if (exists $MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}){push (@{$MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}},$IndelPos);}
	575	else {$MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}=[$IndelPos];}
	576
	577	if (exists $MSAtoIndel{$tmpPosOnMSA}){push (@{$MSAtoIndel{$tmpPosOnMSA}},$IndelPos);}
	578	else {$MSAtoIndel{$tmpPosOnMSA}=[$IndelPos];}
	579	print "$tmpPosOnMSA\t",$Species,"\t",join(",",@{$MSAtoIndel{$tmpPosOnMSA}}),"\n" if ($DEBUG eq "YES"); # QA
	580	}
	581	}
	582	print "===========================\n" if ($DEBUG eq "YES");
	583	}
	584	close (INDELS);
	585	return (\%MSA_Pos_Species_to_Indel,\%MSAtoIndel);
	586	}
	587	sub Read_Ancestral_Parsimony_State
	588	{
	589	my $AncestralReconstructParsimony=shift;
	590	my $AncestralReconstructIndelParsimony_Reff=shift;
	591	my %AncestralReconstructIndelState=%$AncestralReconstructIndelParsimony_Reff;
	592
	593	open (ANCESTRAL_INDEL_STATE,$AncestralReconstructParsimony) \|\| die "Can't open AncestralReconstructParsimony: '$AncestralReconstructParsimony' $!";
	594	my $line=<ANCESTRAL_INDEL_STATE>;
	595	$line=<ANCESTRAL_INDEL_STATE>;
	596	$line=<ANCESTRAL_INDEL_STATE>;
	597	$line=<ANCESTRAL_INDEL_STATE>;
	598	$line=<ANCESTRAL_INDEL_STATE>;
	599	$line=<ANCESTRAL_INDEL_STATE>;
	600	# print with MP based on the cost matrix:
	601	# 0->0 =0
	602	# 0->1 =2
	603	# 1->0 =1
	604	# 1->1 =0
	605	#POS Node State
	606
	607	while ($line=<ANCESTRAL_INDEL_STATE>)
	608	{
	609	chomp ($line);
	610	my ($POS,$Node,$State)=split(/\t/,$line);
	611	if ($State==0) #Char
	612	{
	613	$AncestralReconstructIndelState{$POS}{$Node}=0;
	614	}
	615	else # Indel
	616	{
	617	$AncestralReconstructIndelState{$POS}{$Node}=1;
	618	}
	619	}
	620	close (ANCESTRAL_INDEL_STATE);
	621	return \%AncestralReconstructIndelState;
	622	}
	623
	624	sub Read_Ancestral_Prob_For_Indel
	625	{
	626	my $AncestralReconstructPosterior=shift;
	627	my $AncestralReconstructIndelPosterior_Reff=shift;
	628	my %AncestralReconstructIndelPosterior=%$AncestralReconstructIndelPosterior_Reff;
	629	print "Read_Ancestral_Prob_For_Indel: $AncestralReconstructPosterior $AncestralReconstructIndelPosterior_Reff\n=========================================================================================\n" if ($DEBUG eq "YES"); # DEBUG
	630	open (ANCESTRAL_INDEL_PROB,$AncestralReconstructPosterior) \|\| die "IndelReconstruction_Wrapper.pl:Can't open AncestralReconstructPosterior: '$AncestralReconstructPosterior' $!";
	631	my $line=<ANCESTRAL_INDEL_PROB>;
	632	while ($line=<ANCESTRAL_INDEL_PROB>)
	633	{
	634	chomp ($line);
	635	my ($POS,$Node,$State,$Prob)=split(/\t/,$line);
	636	$AncestralReconstructIndelPosterior{$POS}{$Node}=$Prob;
	637	print "AncestralReconstructIndelPosterior{$POS}{$Node}=$Prob\n" if ($DEBUG eq "YES"); # DEBUG
	638	}
	639	close (ANCESTRAL_INDEL_PROB);
	640	return \%AncestralReconstructIndelPosterior;
	641	}
	642
	643	sub remove_InternalNodeName_or_BPvalues {
	644	my $IN_treeFile=shift;
	645	my $OLD_treeFile=shift;
	646	my $treeFileOneLine;
	647	open(TREEFILE,"$IN_treeFile") \|\| die "IndelReconstruction_Wrapper.pl:remove_InternalNodeName_or_BPvalues: Can't open TREEFILE for reading '$IN_treeFile' $!";;
	648	while (<TREEFILE>) {
	649	my $line = $_;
	650	chomp($line);
	651	$treeFileOneLine .= $line;
	652	}
	653	close TREEFILE;
	654	my $changed = "no";
	655	if ($treeFileOneLine =~ m/\)N[0-9]+:/) {
	656	$treeFileOneLine =~ s/\)N[0-9]+:/\):/g; # remove internal nodes names in the BP palce
	657	$changed = "yes";
	658	}
	659	if ($treeFileOneLine =~ m/\)N[0-9];/) {
	660	$treeFileOneLine =~ s/\)N[0-9];/\);/g; # remove last internal node names in the BP palce
	661	$changed = "yes";
	662	}
	663	if ($treeFileOneLine =~ m/\)\d*\.?\d+\:/) {
	664	$treeFileOneLine =~ s/\)\d*\.?\d+\:/\)\:/g; #replace bootstrap values which look like this: ((A:0.02,B:0.03)40:0.3);
	665	$changed = "yes";
	666	}
	667	if ($treeFileOneLine =~ m/\d\.?\d+\[\d\.?\d+\]/) {
	668	$treeFileOneLine =~ s/(\d\.?\d+)\[\d\.?\d+\]/$1/g;#replace bootstrap values which look like this:(A:0.4,(B:0.1,C:0.1):0.3[40]);
	669	$changed = "yes";
	670	}
	671	if ($changed eq "yes") {
	672	rename $IN_treeFile, $OLD_treeFile;
	673	open (TREE_REMOVED,">$IN_treeFile");
	674	print TREE_REMOVED $treeFileOneLine."\n";
	675	close TREE_REMOVED;
	676	}
	677	return $changed;
	678	}
	679	sub uniq_array
	680	{
	681	my $ReffToArray=shift;
	682	my %hash = ();
	683	foreach my $item (@$ReffToArray) {
	684	$hash{$item} = 1;
	685	}
	686	my @unique = sort keys(%hash);
	687	return \@unique;
	688	}
	689	sub Read_Char_Marginal_Prob
	690	{
	691	my $Chars_MarginalProb_File=shift;
	692	my %Chars_MarginalProb=(); #Key1: MSA_Pos, Key2:Species, Key3:Char, Value:MarginalProb
	693	my @Nodes_Name=();
	694	my $MSA_Length=0;
	695	open (MARGINAL_PROB,$Chars_MarginalProb_File) \|\| return "Could Not Open the MarginalProb_File: '$Chars_MarginalProb_File' $!";
	696	my $MSA_Pos="";
	697	while (my $line=<MARGINAL_PROB>)
	698	{
	699	if ($line=~/marginal probabilities at position: ([0-9]+)/)
	700	{
	701	$MSA_Pos=$1;
	702	$MSA_Length++;
	703	# print "POS:$MSA_Pos\t";
	704	}
	705	elsif ($line=~/of node: (.*?): /)
	706	{
	707	my $node=$1;
	708	push (@Nodes_Name,$node) if ($MSA_Pos==1);
	709	# print "$node\t";
	710	my @Chars_Prob=$line=~/p$[A-Z]+$=[0-9\.\-]+/g;
	711	foreach my $Char_Prob (@Chars_Prob)
	712	{
	713	if ($Char_Prob=~/p$([A-Z]+)$=([0-9\.\-]+)/)
	714	{
	715	my $char=$1;
	716	my $prob=$2;
	717	$Chars_MarginalProb{$MSA_Pos}{$node}{$char}=$prob;
	718	# print "Chars_MarginalProb{$MSA_Pos}{$node}{$char}=$prob\n";
	719	}
	720	}
	721	}
	722	}
	723	close (MARGINAL_PROB);
	724	return (\%Chars_MarginalProb,\@Nodes_Name,$MSA_Length);
	725	}
	726	sub readMSA
	727	{
	728	# read MSA in FASTA format return hash where key is seq name and value is sequence
	729	my $MSA=shift;
	730	my %MSA_Hash=();
	731	open (my $in, "<",$MSA) \|\| die "IndelReconstruction_Wrapper:readMSA: Can't read the MSA '$MSA' $!";
	732	## 1.1. Read FASTA header and save it
	733	my $fastaLine = <$in>;
	734	while (defined $fastaLine) {
	735	chomp $fastaLine;
	736	my $header = substr($fastaLine,1);
	737	## 1.2. Read seq until next header
	738	$fastaLine = <$in>;
	739	my $seq = "";
	740	while ((defined $fastaLine) and
	741	(substr($fastaLine,0,1) ne ">" )) {
	742	chomp $fastaLine;
	743	$seq .= $fastaLine;
	744	$fastaLine = <$in>;
	745	}
	746	$MSA_Hash{$header}=$seq;
	747	}
	748	# close file
	749	close ($in);
	750	return \%MSA_Hash;
	751	}

+146

-0

www/fastml/SampleSeqFromProb.pl less more

	0	use strict;
	1	my $FullProbFile=shift;
	2	my $Node=shift;
	3	my $NumOfSeqToSample=shift;
	4	my $SeqType=shift; # aa \| nuc
	5	my $OutFile=shift;
	6	my $isServer=shift;
	7
	8	my @AB=();
	9	my $AB_SIZE;
	10	if ($SeqType eq "nuc")
	11	{
	12	@AB=qw(A C G T);
	13	$AB_SIZE=4;
	14	}
	15	if ($SeqType eq "aa")
	16	{
	17	@AB=qw(A C D E F G H I K L M N P Q R S T V W Y);
	18	$AB_SIZE=20;
	19	}
	20	if ($SeqType eq "codon")
	21	{
	22	@AB=qw(AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT);
	23	$AB_SIZE=61;
	24	}
	25	my %ProbPerSite=(); # hash of array with prob for each pos
	26	open (PROB_FILE,$FullProbFile) \|\| die "Can't open The Full Prob File '$FullProbFile' $!";
	27	my $SeqLength=0;
	28	my $line=<PROB_FILE>; # header
	29	while ($line=<PROB_FILE>)
	30	{
	31	chomp ($line);
	32	my @line=split(",",$line); # NODE,SITE,PROBS BY AB
	33	my $CurrNode=shift(@line);
	34	my $CurrPos=shift(@line);
	35	if ($CurrNode eq $Node)
	36	{
	37	$ProbPerSite{$CurrPos}=[@line];
	38	$SeqLength=$CurrPos if ($CurrPos>$SeqLength);
	39	}
	40	}
	41	close (PROB_FILE);
	42
	43	open (OUT,">$OutFile") \|\| die "Can't open Out: '$OutFile' $!";
	44	for (my $SeqNum=0;$SeqNum<$NumOfSeqToSample;$SeqNum++)
	45	{
	46	my $RandomSeq="";
	47	#if (($SeqType eq "aa") or ($SeqType eq "nuc"))
	48	#{
	49	for (my $pos=1;$pos<=$SeqLength;$pos++)
	50	{
	51	my $Rand=rand();
	52	my $i=0;
	53	my $Size=@{$ProbPerSite{$pos}};
	54	print "SIZE OF PROB VECTOR at POS $pos:$Size\n" if ($Size<$AB_SIZE);
	55	while(($Rand+0.0001 >= $ProbPerSite{$pos}[$i]) and ($i<$AB_SIZE-1))
	56	{
	57	$Rand=$Rand-$ProbPerSite{$pos}[$i];
	58	$i++;
	59	}
	60	print "UNDIFINED:$i for RAND $Rand and vector ",join (",",@{$ProbPerSite{$pos}}) if (!defined $AB[$i]);
	61	$RandomSeq=$RandomSeq.$AB[$i];
	62	}
	63	#}
	64	#elsif ($SeqType eq "codon")
	65	#{
	66	# for (my $pos=1;$pos<=($SeqLength/3);$pos++)
	67	# {
	68	# my $Rand=rand();
	69	# my $i=0;
	70	# my $Size=@{$ProbPerSite{$pos}};
	71	# print "SIZE OF PROB VECTOR at POS $pos:$Size\n" if ($Size<$AB_SIZE);
	72	# while(($Rand+0.0001 >= $ProbPerSite{$pos}[$i]) and ($i<$AB_SIZE-1))
	73	# {
	74	# $Rand=$Rand-$ProbPerSite{$pos}[$i];
	75	# $i++;
	76	# }
	77	# print "UNDIFINED:$i for RAND $Rand and vector ",join (",",@{$ProbPerSite{$pos}}) if (!defined $AB[$i]);
	78	# $RandomSeq=$RandomSeq.$AB[$i];
	79	# }
	80	#}
	81	# print "LENGTH:",length($RandomSeq),"\n";
	82	print OUT ">",$SeqNum+1,"\n$RandomSeq\n";
	83	}
	84	if ($isServer eq "YES")
	85	{
	86	# Update the output page
	87	#######################################
	88
	89	my $OutDir=getDir($OutFile);
	90	my $OutPage=$OutDir."output.html";
	91	if (-e $OutDir."output.php")
	92	{
	93	$OutPage=$OutDir."output.php";
	94	}
	95
	96	open (OUTPUT,"$OutPage") \|\| die "Can't open '$OutPage' $!";
	97	my @out=<OUTPUT>;
	98	close (OUTPUT);
	99	open (OUTPUT,">$OutPage");
	100	my $SampledSeq_Section=0;
	101	foreach my $line (@out)
	102	{
	103	if ($line=~/sequences from the posterior distribution for ancestral node/)
	104	{
	105	$SampledSeq_Section=1;
	106	print OUTPUT $line;
	107	}
	108	elsif (($line=~/form/) and ($SampledSeq_Section==1))
	109	{
	110	print OUTPUT $line;
	111	my $FileNoPath=getFilename($OutFile);
	112	print_message_to_output("<A HREF='$FileNoPath' TARGET=_blank>$NumOfSeqToSample sequences sampled from the posterior distribution for ancestral node $Node</A></p>");
	113	$SampledSeq_Section=0;
	114	}
	115	else
	116	{
	117	print OUTPUT $line;
	118	}
	119	}
	120	close (OUTPUT);
	121	}
	122
	123	#---------------------------------------------
	124	sub print_message_to_output{
	125	#---------------------------------------------
	126	my $msg = shift;
	127	print OUTPUT "\n<ul><li>$msg</li></ul>\n";
	128	}
	129
	130	# Returns the filename without directory
	131	sub getFilename{
	132	my $fullFile = pop @_;
	133	if ($fullFile =~ m/.[\\\/](.)$/) {
	134	return $1;
	135	} else {return $fullFile}
	136
	137	}
	138
	139	sub getDir{
	140	my $fullFile = pop @_;
	141	if ($fullFile =~ m/(.[\\\/]).$/) {
	142	return $1;
	143	} else {return ''}
	144
	145	}

+79

-0

www/fastml/kMostProbSeq.pl less more

	0	use strict;
	1
	2	my $FullLogLikeFile=shift;
	3	my $Node=shift;
	4	my $k=shift;
	5	my $OutDir=shift;
	6	my $seqType=shift;
	7	my $isServer=shift;
	8
	9	$OutDir=$OutDir."/" if ($OutDir!~/\/$/);
	10	my $K_MOST_PROB_SEQ="python /bioseq/pupkoSVN/trunk/www/FastML/kMostProbSeq.py";
	11	my $ProbMatrix=$OutDir."$Node.LogLikelihoodMarginalProb.csv";
	12
	13	my %Profile=(); # Lines=AlephBet size; Col:#Pos
	14	open (FULL_PROB,$FullLogLikeFile) \|\| die "Can't open The Full Log Like File '$FullLogLikeFile' $!";
	15	open (OUT,">$ProbMatrix") \|\| die "Can't open Prob matrix file: '$ProbMatrix' $!";
	16	print OUT "site,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y\n" if ($seqType eq "aa");
	17	print OUT "site,A,C,G,T\n" if ($seqType eq "nuc");
	18	print OUT "site,AAA,AAC,AAG,AAT,ACA,ACC,ACG,ACT,AGA,AGC,AGG,AGT,ATA,ATC,ATG,ATT,CAA,CAC,CAG,CAT,CCA,CCC,CCG,CCT,CGA,CGC,CGG,CGT,CTA,CTC,CTG,CTT,GAA,GAC,GAG,GAT,GCA,GCC,GCG,GCT,GGA,GGC,GGG,GGT,GTA,GTC,GTG,GTT,TAC,TAT,TCA,TCC,TCG,TCT,TGC,TGG,TGT,TTA,TTC,TTG,TTT\n" if ($seqType eq "codon");
	19	while (my $line=<FULL_PROB>)
	20	{
	21	my @line=split(",",$line); # NODE,SITE,PROBS BY AB
	22	my $CurrNode=shift(@line);
	23	if ($CurrNode eq "$Node")
	24	{
	25	print OUT join(",",@line);
	26	}
	27	}
	28	close (FULL_PROB);
	29	close (OUT);
	30	my $OutSeq=$OutDir.$Node.".".$k."MostProbSeq.fasta";
	31	my $cmd="$K_MOST_PROB_SEQ -i $ProbMatrix -o $OutSeq -k $k";
	32	system ($cmd);
	33
	34	if ($isServer eq "YES")
	35	{
	36	# Update the output page
	37	#######################################
	38
	39	my $OutPage=$OutDir."output.html";
	40	if (-e $OutDir."output.php")
	41	{
	42	$OutPage=$OutDir."output.php";
	43	}
	44
	45	open (OUTPUT,"$OutPage") \|\| die "Can't open '$OutPage' $!";
	46	my @out=<OUTPUT>;
	47	close (OUTPUT);
	48	open (OUTPUT,">$OutPage");
	49	my $kMostProb_Section=0;
	50	foreach my $line (@out)
	51	{
	52	if ($line=~/most likely ancestral sequences for ancestral node/)
	53	{
	54	$kMostProb_Section=1;
	55	print OUTPUT $line;
	56	}
	57	elsif (($line=~/form/) and ($kMostProb_Section==1))
	58	{
	59	print OUTPUT $line;
	60	my $FileNoPath=$Node.".".$k."MostProbSeq.fasta";
	61	print_message_to_output("<A HREF='$FileNoPath' TARGET=_blank>$k-most likely ancestral sequences for ancestral node $Node ('marginal' reconstruction)</A></p>");
	62	$kMostProb_Section=0;
	63	}
	64	else
	65	{
	66	print OUTPUT $line;
	67	}
	68	}
	69	close (OUTPUT);
	70	}
	71
	72	#---------------------------------------------
	73	sub print_message_to_output{
	74	#---------------------------------------------
	75	my $msg = shift;
	76	print OUTPUT "\n<ul><li>$msg</li></ul>\n";
	77	}
	78

+116

-0

www/fastml/kMostProbSeq.py less more

	0	#!/bin/python
	1	import csv
	2	import heapq
	3	import sys
	4	import operator
	5
	6	IGNORED_COLUMNS = 1
	7	DEFAULT_REQUIRED_SEQUENCES = 100
	8
	9	import sys
	10	if sys.version_info[0] > 2:
	11	# Python3 ?
	12	xrange = range
	13	new_open = open
	14	def old_open(filename, mode):
	15	if 'b' in mode:
	16	return new_open(filename, mode.replace('b', ''), newline = '')
	17	else:
	18	return new_open(filename, mode)
	19	open = old_open
	20
	21
	22	class PrefixCell(object):
	23	def __init__(self, previous_cell = None, letter = '', likelihood = float('-inf')):
	24	self.previous_cell = previous_cell
	25	self.letter = letter
	26	self.likelihood = likelihood
	27
	28	def iter_cells(self):
	29	cell = self
	30	while cell is not None:
	31	yield cell
	32	cell = cell.previous_cell
	33
	34	def full_prefix(self):
	35	# skips the last cell (which has previous_cell == None)
	36	return [cell.letter for cell in self.iter_cells()][-2::-1]
	37
	38	def __lt__(self, other):
	39	"""
	40	Python3.* uses __lt__
	41	"""
	42	if isinstance(other, PrefixCell):
	43	return self.likelihood < other.likelihood
	44	return super(PrefixCell, self) < other
	45
	46	def __cmp__(self, other):
	47	"""
	48	Python2.* uses __cmp__
	49	"""
	50	return (other < self) - (self < other)
	51
	52	def find_most_likely_sequences(letters, rows, required_sequences):
	53	"""
	54	This is the main calculation.
	55	"""
	56	prefix_cells = ([PrefixCell()] * (len(letters) - 1)) + [PrefixCell(likelihood = 0)]
	57	for row in rows:
	58	new_prefixes = [[PrefixCell(previous_cell = previous_cell,
	59	letter = letter,
	60	likelihood = previous_cell.likelihood + letter_likelihood)
	61	for previous_cell in prefix_cells]
	62	for letter, letter_likelihood in zip(letters, list(row)[IGNORED_COLUMNS:])]
	63	prefix_cells = list(heapq.merge(*new_prefixes))[-required_sequences:]
	64	return [(prefix_cell.full_prefix(), prefix_cell.likelihood)
	65	for prefix_cell in prefix_cells][::-1] # reverse order - show most likely first.
	66
	67
	68	def main(file_obj, required_sequences, output_filename, output_format):
	69	reader = csv.reader(file_obj)
	70	letters = next(reader)[IGNORED_COLUMNS:]
	71	# assert all(len(letter) == 1 for letter in letters), "Invalid letter was found in first row."
	72
	73	sequences_likelihoods = find_most_likely_sequences(letters,
	74	[map(float, row) for row in reader],
	75	required_sequences)
	76	out = sys.stdout
	77	if output_format == 'csv':
	78	if output_filename is not None:
	79	out = open(output_filename, 'wb')
	80	writer = csv.writer(out)
	81	for sequence, likelihood in sequences_likelihoods:
	82	writer.writerow([str(likelihood)] + list(sequence))
	83	elif output_format == 'txt':
	84	if output_filename is not None:
	85	out = open(output_filename, 'wb')
	86	for index, (sequence, likelihood) in enumerate(sequences_likelihoods):
	87	out.write(">%d_%f\n" % (index + 1, likelihood))
	88	out.write(''.join(sequence) + '\n')
	89	if out is not sys.stdout:
	90	out.close()
	91
	92
	93	if __name__ == '__main__':
	94	import optparse
	95	parser = optparse.OptionParser(description = "Finds the most likely sequences")
	96	parser.add_option("-i", "--file", dest = "input_filename", help = "input CSV file (default stdin)", metavar="FILE")
	97	parser.add_option("-o", "--output", dest = "output_filename", help = "output filename (default stdout)", metavar = "FILE")
	98	parser.add_option("-k", "--required", dest = "required_sequences", type="int", help = "required sequences (K) (default: %d)" % (DEFAULT_REQUIRED_SEQUENCES,), default = DEFAULT_REQUIRED_SEQUENCES)
	99	parser.add_option("-f", "--format", dest = "output_format", help = "output format (default: txt)", type = 'choice', choices = ("txt", "csv"), default = "txt")
	100
	101	(options, args) = parser.parse_args()
	102	if len(args) != 0:
	103	parser.error("Unexpected args")
	104
	105	if options.input_filename is None:
	106	import warnings
	107	warnings.warn("Missing input filename - using stdin")
	108	input_file_obj = sys.stdin
	109	else:
	110	input_file_obj = open(options.input_filename,'rb')
	111	main(input_file_obj, options.required_sequences, options.output_filename, options.output_format)
	112
	113
	114
	115