import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Vector;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
-import org.forester.phylogeny.iterators.PreorderTreeIterator;
import org.forester.sdi.RIO;
import org.forester.util.ForesterUtil;
public class rio {
- final static private String PRG_NAME = "RIO";
- final static private String PRG_VERSION = "2.03 ALPHA";
- final static private String PRG_DATE = "2010.01.15";
- final static private String E_MAIL = "czmasek@burnham.org";
- final static private String WWW = "www.phylosoft.org/forester/";
- final static private boolean TIME = true;
- final static private boolean VERBOSE = true;
- // For method getDistances -- calculation of distances.
- final static private boolean MINIMIZE_COST = false;
- // For method getDistances -- calculation of distances.
- final static private boolean MINIMIZE_DUPS = true;
- // For method getDistances -- calculation of distances.
- final static private boolean MINIMIZE_HEIGHT = true;
- final static private int WARN_NO_ORTHOS_DEFAULT = 2;
- final static private int
- // How many sd away from mean to root.
- WARN_MORE_THAN_ONE_ORTHO_DEFAULT = 2;
- // How many sd away from mean to LCA of orthos.
- final static private double THRESHOLD_ULTRA_PARALOGS_DEFAULT = 50;
- // How many sd away from mean to LCA of orthos.
- final static private double WARN_ONE_ORTHO_DEFAULT = 2;
-
- // Factor between the two distances to their LCA
- // (larger/smaller).
- // Factor between the two distances to their LCA
- // (larger/smaller).
- /**
- * Calculates the mean and standard deviation of all nodes of Phylogeny t
- * which have a bootstrap values zero or more. Returns null in case of
- * failure (e.g t has no bootstrap values, or just one).
- * <p>
- *
- * @param t
- * reference to a tree with bootstrap values
- * @return Array of doubles, [0] is the mean, [1] the standard deviation
- */
- private static double[] calculateMeanBoostrapValue( final Phylogeny t ) {
- double b = 0;
- int n = 0;
- long sum = 0;
- double x = 0.0, mean = 0.0;
- final double[] da = new double[ 2 ];
- final Vector<Double> bv = new Vector<Double>();
- PhylogenyNode node = null;
- PreorderTreeIterator i = null;
- i = new PreorderTreeIterator( t );
- // Calculates the mean.
- while ( i.hasNext() ) {
- node = i.next();
- if ( !( ( node.getParent() != null ) && node.getParent().isRoot()
- && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode1() ) > 0 )
- && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode2() ) > 0 ) && ( node
- .getParent().getChildNode2() == node ) ) ) {
- b = PhylogenyMethods.getConfidenceValue( node );
- if ( b > 0 ) {
- sum += b;
- bv.addElement( new Double( b ) );
- n++;
- }
- }
- // i.next();
- }
- if ( n < 2 ) {
- return null;
- }
- mean = ( double ) sum / n;
- // Calculates the standard deviation.
- sum = 0;
- for( int j = 0; j < n; ++j ) {
- b = ( bv.elementAt( j ) ).intValue();
- x = b - mean;
- sum += ( x * x );
- }
- da[ 0 ] = mean;
- da[ 1 ] = java.lang.Math.sqrt( sum / ( n - 1.0 ) );
- return da;
- }
+ final static private String PRG_NAME = "RIO";
+ final static private String PRG_VERSION = "2.03 ALPHA";
+ final static private String PRG_DATE = "2010.01.15";
+ final static private String E_MAIL = "czmasek@burnham.org";
+ final static private String WWW = "www.phylosoft.org/forester/";
+ final static private boolean TIME = true;
+ final static private boolean VERBOSE = true;
private final static void errorInCommandLine() {
System.out.println( "\nrio: Error in command line.\n" );
String seq_name = "";
String arg = "";
boolean output_ultraparalogs = false;
- ArrayList<String> orthologs_al_for_dc = null;
double t_orthologs = 0.0;
double t_orthologs_dc = 0.0;
double threshold_ultra_paralogs = 0.0;
- double[] bs_mean_sd = null;
int sort = 13;
Phylogeny species_tree = null;
RIO rio_instance = null;
private final static boolean ROOT_BY_MINIMIZING_MAPPING_COST = false;
private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true;
private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true;
- private final static boolean TIME = false;
private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
private List<String> _seq_names;
private int _samples;
private int _ext_nodes_;
- private long _time;
/**
* Default constructor.
return m;
}
-
public final int getNumberOfSamples() {
return _samples;
}
}
/**
- * Returns the time (in ms) needed to run "inferOrthologs". Final variable
- * TIME needs to be set to true.
- *
- * @return time (in ms) needed to run method "inferOrthologs"
- */
- public long getTime() {
- return _time;
- }
-
- /**
* Infers the orthologs (as well the "super orthologs", the "subtree
* neighbors", and the "ultra paralogs") for each external node of the gene
* Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
throws IOException, SDIException {
int bs = 0;
- if ( RIO.TIME ) {
- _time = System.currentTimeMillis();
- }
// Read in first tree to get its sequence names
// and strip species_tree.
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
nhx.setIgnoreQuotes( true );
nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
}
- final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ];
- System.out.println( "species " + species_tree.toString() );
+ final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
// Removes from species_tree all species not found in gene_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
- _seq_names = getAllExternalSequenceNames( gene_tree );
+ PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
+ PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_trees[ 0 ] );
+ _seq_names = getAllExternalSequenceNames( gene_trees[ 0 ] );
if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
throw new IOException( "could not get sequence names" );
}
_so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
_up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
// Go through all gene trees in the file.
- final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
final Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ];
+ System.out.println( "gene trees" + gene_trees.length );
int c = 0;
for( final Phylogeny gt : gene_trees ) {
bs++;
// Removes from gene_tree all species not found in species_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
assigned_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query );
- // System.out.println( bs );
}
final IntMatrix m = calculateOrthologTable( assigned_trees );
System.out.println( m.toString() );
- setNumberOfSamples( bs );
- if ( RIO.TIME ) {
- _time = ( System.currentTimeMillis() - _time );
- }
+ setNumberOfSamples( gene_trees.length );
}
public List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
* <li>0 : Ortholog
* <li>1 : Ortholog, Super ortholog
* <li>2 : Super ortholog, Ortholog
- * <li>3 : Ortholog, Distance
- * <li>4 : Distance, Ortholog
- * <li>5 : Ortholog, Super ortholog, Distance
- * <li>6 : Ortholog, Distance, Super ortholog
- * <li>7 : Super ortholog, Ortholog, Distance
- * <li>8 : Super ortholog, Distance, Ortholog
- * <li>9 : Distance, Ortholog, Super ortholog
- * <li>10 : Distance, Super ortholog, Ortholog
- * <li>11 : Ortholog, Subtree neighbor, Distance
- * <li>12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default)
- * <li>13 : Ortholog, Super ortholog, Subtree neighbor, Distance
- * <li>14 : Subtree neighbor, Ortholog, Super ortholog, Distance
- * <li>15 : Subtree neighbor, Distance, Ortholog, Super ortholog
- * <li>16 : Ortholog, Distance, Subtree neighbor, Super ortholog
- * <li>17 : Ortholog, Subtree neighbor, Distance, Super ortholog
* </ul>
* <p>
* Returns "-" if no putative orthologs have been found (given
_seq_names = null;
_samples = 1;
_ext_nodes_ = 0;
- _time = 0;
}
-
private void setNumberOfSamples( int i ) {
if ( i < 1 ) {
i = 1;
}
+ System.out.println( "samples: " + i );
_samples = i;
}
_p = new int[ 2 ];
_p[ 0 ] = _p[ 1 ] = +1;
}
- } // Tuplet
+ } // ResultLine
}