/* This is an example HY-PHY Batch File.



   It reads in a '#' nucleotide dataset data/molclock.seq and performs

   a series of molecular clock tests on the data using the F81 model.

   

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   Also, the likelihood ratio statistic is evaluated and the P-values

   for the tests are reported.

   

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in a DataSet variable.*/



DataSet 		nucleotideSequences = ReadDataFile ("data/molclock.seq");

   

/* 2. Filter the data, specifying that all of the data is to be used

	  and that it is to be treated as nucleotides. */

	  

DataSetFilter	filteredData = CreateFilter (nucleotideSequences,1);



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1);



F81RateMatrix = 

		{{*,mu,mu,mu}

		 {mu,*,mu,mu}

		 {mu,mu,*,mu}

		 {mu,mu,mu,*}};

		 

/*5.  Define the F81 models, by combining the substitution matrix with the vector of observed 

	  (equilibrium) frequencies. We define one for each block, since the equilibrium 

	  frequencies will differ. */



Model 	F81 = (F81RateMatrix, observedFreqs);



/*6.  Now we can define the tree for the data just read taxa. Notice that 

	  some of the internal nodes are named for later use.*/

	  

Tree  theTree = (TAAJ153,(HVRNASS,(RICRSS3,((ZMSUCS1,(OSRSS1A,(TASUCSYN1,HVSSYNMR)Internal1)),(MZESUS1,ORRSS2)Internal2))));

	  			

/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)

	  have been defined we are ready to construct the likelihood function. */

	  

LikelihoodFunction  theLnLik = (filteredData, theTree);



/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues. 

	  We also store the resulting ln-lik and the number of model parameters. */



Optimize (paramValues, theLnLik);

unconstrainedLnLik = paramValues[1][0];

paramCount = paramValues[1][1];



/*9.  Print the tree with optimal branch lengths to the console. */



fprintf  (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik);



/*10. Now we impose the molecular clock constraint on the entire tree, 

	  enforcing the clock on parameter mu.*/

	  

MolecularClock (theTree, mu);



/*11. We maximize the tree with molecular clock constraints and report the results.*/



Optimize (paramValues, theLnLik);



lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n1). Global Molecular Clock; the P-value is:", pValue, "\n", theLnLik);

/*12. We can now try to impose molecular clock only on a subtree of the original tree.

	  First we do that for the subtree starting at the node Internal1 */

	  

ClearConstraints (theTree);

MolecularClock (theTree.Internal1, mu);

Optimize (paramValues, theLnLik);



lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n2). Molecular Clock starting at Internal1; the P-value is:", pValue, "\n", theLnLik);



/*12. Secondly, we apply the clock for the subtree starting at the node Internal2, 

	  in addition to the clock imposed in Step 11. */

	  

MolecularClock (theTree.Internal2, mu);

Optimize (paramValues, theLnLik);

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n3). Molecular Clock starting at Internal1 and Internal2; the P-value is:", pValue, "\n", theLnLik);



