From 4e8cb17afec5430c00ff6a026d1528c188e91cda Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 26 Nov 2012 23:19:29 +0000 Subject: [PATCH] "rio" work --- .../java/src/org/forester/application/rio.java | 91 ++------------------ .../src/org/forester/io/parsers/nhx/NHXParser.java | 6 +- .../org/forester/phylogeny/PhylogenyMethods.java | 8 -- forester/java/src/org/forester/sdi/RIO.java | 53 ++---------- 4 files changed, 17 insertions(+), 141 deletions(-) diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3815c6d..b9a1573 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -30,98 +30,23 @@ package org.forester.application; import java.io.File; import java.io.FileWriter; import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Vector; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; -import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.sdi.RIO; import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "RIO"; - final static private String PRG_VERSION = "2.03 ALPHA"; - final static private String PRG_DATE = "2010.01.15"; - final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "www.phylosoft.org/forester/"; - final static private boolean TIME = true; - final static private boolean VERBOSE = true; - // For method getDistances -- calculation of distances. - final static private boolean MINIMIZE_COST = false; - // For method getDistances -- calculation of distances. - final static private boolean MINIMIZE_DUPS = true; - // For method getDistances -- calculation of distances. - final static private boolean MINIMIZE_HEIGHT = true; - final static private int WARN_NO_ORTHOS_DEFAULT = 2; - final static private int - // How many sd away from mean to root. - WARN_MORE_THAN_ONE_ORTHO_DEFAULT = 2; - // How many sd away from mean to LCA of orthos. - final static private double THRESHOLD_ULTRA_PARALOGS_DEFAULT = 50; - // How many sd away from mean to LCA of orthos. - final static private double WARN_ONE_ORTHO_DEFAULT = 2; - - // Factor between the two distances to their LCA - // (larger/smaller). - // Factor between the two distances to their LCA - // (larger/smaller). - /** - * Calculates the mean and standard deviation of all nodes of Phylogeny t - * which have a bootstrap values zero or more. Returns null in case of - * failure (e.g t has no bootstrap values, or just one). - *

- * - * @param t - * reference to a tree with bootstrap values - * @return Array of doubles, [0] is the mean, [1] the standard deviation - */ - private static double[] calculateMeanBoostrapValue( final Phylogeny t ) { - double b = 0; - int n = 0; - long sum = 0; - double x = 0.0, mean = 0.0; - final double[] da = new double[ 2 ]; - final Vector bv = new Vector(); - PhylogenyNode node = null; - PreorderTreeIterator i = null; - i = new PreorderTreeIterator( t ); - // Calculates the mean. - while ( i.hasNext() ) { - node = i.next(); - if ( !( ( node.getParent() != null ) && node.getParent().isRoot() - && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode1() ) > 0 ) - && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode2() ) > 0 ) && ( node - .getParent().getChildNode2() == node ) ) ) { - b = PhylogenyMethods.getConfidenceValue( node ); - if ( b > 0 ) { - sum += b; - bv.addElement( new Double( b ) ); - n++; - } - } - // i.next(); - } - if ( n < 2 ) { - return null; - } - mean = ( double ) sum / n; - // Calculates the standard deviation. - sum = 0; - for( int j = 0; j < n; ++j ) { - b = ( bv.elementAt( j ) ).intValue(); - x = b - mean; - sum += ( x * x ); - } - da[ 0 ] = mean; - da[ 1 ] = java.lang.Math.sqrt( sum / ( n - 1.0 ) ); - return da; - } + final static private String PRG_NAME = "RIO"; + final static private String PRG_VERSION = "2.03 ALPHA"; + final static private String PRG_DATE = "2010.01.15"; + final static private String E_MAIL = "czmasek@burnham.org"; + final static private String WWW = "www.phylosoft.org/forester/"; + final static private boolean TIME = true; + final static private boolean VERBOSE = true; private final static void errorInCommandLine() { System.out.println( "\nrio: Error in command line.\n" ); @@ -137,11 +62,9 @@ public class rio { String seq_name = ""; String arg = ""; boolean output_ultraparalogs = false; - ArrayList orthologs_al_for_dc = null; double t_orthologs = 0.0; double t_orthologs_dc = 0.0; double threshold_ultra_paralogs = 0.0; - double[] bs_mean_sd = null; int sort = 13; Phylogeny species_tree = null; RIO rio_instance = null; diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index d078f83..dc34b0f 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -272,6 +272,8 @@ public final class NHXParser implements PhylogenyParser { setPhylogenies( new ArrayList() ); setCladeLevel( 0 ); newCurrentAnotation(); + setCurrentPhylogeny( null ); + setCurrentNode( null ); int i = 0; while ( true ) { char c = '\b'; @@ -401,10 +403,6 @@ public final class NHXParser implements PhylogenyParser { return getPhylogeniesAsArray(); } // parse() - public Phylogeny parseNext() throws IOException, NHXFormatException { - return null; - } - /** * Called if a closing paren is encountered. * diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 3c6b3cd..c881906 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -1655,10 +1655,6 @@ public class PhylogenyMethods { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } } - System.out.println( " ref_ext_tax:" ); - for( final String string : ref_ext_taxo ) { - System.out.println( string ); - } final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); @@ -1670,10 +1666,6 @@ public class PhylogenyMethods { nodes_to_delete.add( n ); } } - System.out.println( " to delete:" ); - for( final PhylogenyNode string : nodes_to_delete ) { - System.out.println( string.getNodeData().getTaxonomy().getTaxonomyCode() ); - } for( final PhylogenyNode phylogenyNode : nodes_to_delete ) { to_be_stripped.deleteSubtree( phylogenyNode, true ); } diff --git a/forester/java/src/org/forester/sdi/RIO.java b/forester/java/src/org/forester/sdi/RIO.java index 78e927a..62b2c21 100644 --- a/forester/java/src/org/forester/sdi/RIO.java +++ b/forester/java/src/org/forester/sdi/RIO.java @@ -56,14 +56,12 @@ public final class RIO { private final static boolean ROOT_BY_MINIMIZING_MAPPING_COST = false; private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true; private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true; - private final static boolean TIME = false; private HashMap> _o_hash_maps; private HashMap> _so_hash_maps; private HashMap> _up_hash_maps; private List _seq_names; private int _samples; private int _ext_nodes_; - private long _time; /** * Default constructor. @@ -116,7 +114,6 @@ public final class RIO { return m; } - public final int getNumberOfSamples() { return _samples; } @@ -200,16 +197,6 @@ public final class RIO { } /** - * Returns the time (in ms) needed to run "inferOrthologs". Final variable - * TIME needs to be set to true. - * - * @return time (in ms) needed to run method "inferOrthologs" - */ - public long getTime() { - return _time; - } - - /** * Infers the orthologs (as well the "super orthologs", the "subtree * neighbors", and the "ultra paralogs") for each external node of the gene * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR, @@ -237,9 +224,6 @@ public final class RIO { public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query ) throws IOException, SDIException { int bs = 0; - if ( RIO.TIME ) { - _time = System.currentTimeMillis(); - } // Read in first tree to get its sequence names // and strip species_tree. final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -250,12 +234,11 @@ public final class RIO { nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); } - final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ]; - System.out.println( "species " + species_tree.toString() ); + final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); // Removes from species_tree all species not found in gene_tree. - PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree ); - PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree ); - _seq_names = getAllExternalSequenceNames( gene_tree ); + PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); + PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_trees[ 0 ] ); + _seq_names = getAllExternalSequenceNames( gene_trees[ 0 ] ); if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) { throw new IOException( "could not get sequence names" ); } @@ -266,22 +249,18 @@ public final class RIO { _so_hash_maps.put( query, new HashMap( _seq_names.size() ) ); _up_hash_maps.put( query, new HashMap( _seq_names.size() ) ); // Go through all gene trees in the file. - final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); final Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ]; + System.out.println( "gene trees" + gene_trees.length ); int c = 0; for( final Phylogeny gt : gene_trees ) { bs++; // Removes from gene_tree all species not found in species_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); assigned_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query ); - // System.out.println( bs ); } final IntMatrix m = calculateOrthologTable( assigned_trees ); System.out.println( m.toString() ); - setNumberOfSamples( bs ); - if ( RIO.TIME ) { - _time = ( System.currentTimeMillis() - _time ); - } + setNumberOfSamples( gene_trees.length ); } public List getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) { @@ -394,21 +373,6 @@ public final class RIO { *

  • 0 : Ortholog *
  • 1 : Ortholog, Super ortholog *
  • 2 : Super ortholog, Ortholog - *
  • 3 : Ortholog, Distance - *
  • 4 : Distance, Ortholog - *
  • 5 : Ortholog, Super ortholog, Distance - *
  • 6 : Ortholog, Distance, Super ortholog - *
  • 7 : Super ortholog, Ortholog, Distance - *
  • 8 : Super ortholog, Distance, Ortholog - *
  • 9 : Distance, Ortholog, Super ortholog - *
  • 10 : Distance, Super ortholog, Ortholog - *
  • 11 : Ortholog, Subtree neighbor, Distance - *
  • 12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default) - *
  • 13 : Ortholog, Super ortholog, Subtree neighbor, Distance - *
  • 14 : Subtree neighbor, Ortholog, Super ortholog, Distance - *
  • 15 : Subtree neighbor, Distance, Ortholog, Super ortholog - *
  • 16 : Ortholog, Distance, Subtree neighbor, Super ortholog - *
  • 17 : Ortholog, Subtree neighbor, Distance, Super ortholog * *

    * Returns "-" if no putative orthologs have been found (given @@ -590,14 +554,13 @@ public final class RIO { _seq_names = null; _samples = 1; _ext_nodes_ = 0; - _time = 0; } - private void setNumberOfSamples( int i ) { if ( i < 1 ) { i = 1; } + System.out.println( "samples: " + i ); _samples = i; } @@ -823,5 +786,5 @@ public final class RIO { _p = new int[ 2 ]; _p[ 0 ] = _p[ 1 ] = +1; } - } // Tuplet + } // ResultLine } -- 1.7.10.2