From bb367671ad9b009584da301ad4591c2f1e8f0901 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 13 Dec 2012 02:27:31 +0000 Subject: [PATCH] "rio" work --- .../java/src/org/forester/application/rio.java | 11 +- .../src/org/forester/datastructures/IntMatrix.java | 18 +- .../org/forester/phylogeny/PhylogenyMethods.java | 1538 +++++++++----------- forester/java/src/org/forester/rio/RIO.java | 162 ++- forester/java/src/org/forester/rio/TestRIO.java | 119 ++ forester/java/src/org/forester/sdi/GSDIR.java | 15 +- forester/java/src/org/forester/sdi/TestGSDI.java | 34 +- forester/java/src/org/forester/test/Test.java | 54 +- .../java/src/org/forester/util/ForesterUtil.java | 195 +-- 9 files changed, 1073 insertions(+), 1073 deletions(-) create mode 100644 forester/java/src/org/forester/rio/TestRIO.java diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 5bf670b..2715461 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -96,6 +96,9 @@ public class rio { final File logfile; if ( cla.getNumberOfNames() > 3 ) { logfile = cla.getFile( 3 ); + if ( logfile.exists() ) { + ForesterUtil.fatalError( PRG_NAME, "\"" + logfile + "\" already exists" ); + } } else { logfile = null; @@ -146,9 +149,9 @@ public class rio { algorithm = ALGORITHM.GSDIR; } try { - final RIO rio = new RIO( gene_trees_file, species_tree, algorithm, logfile != null ); + final RIO rio = new RIO( gene_trees_file, species_tree, algorithm, logfile != null, true ); if ( algorithm == ALGORITHM.GSDIR ) { - System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); + ForesterUtil.programMessage( PRG_NAME, "taxonomy linking based on: " + rio.getGSDIRtaxCompBase() ); } tableOutput( othology_outtable, rio ); if ( ( algorithm == ALGORITHM.GSDIR ) && ( logfile != null ) ) { @@ -223,13 +226,13 @@ public class rio { for( int y = 0; y < m.size(); ++y ) { w.print( "\t" ); if ( x == y ) { - if ( m.get( x, y ) != rio.getNumberOfSamples() ) { + if ( m.get( x, y ) != rio.getAnalyzedGeneTrees().length ) { ForesterUtil.unexpectedFatalError( PRG_NAME, "diagonal value is off" ); } w.print( "-" ); } else { - w.print( df.format( ( ( double ) m.get( x, y ) ) / rio.getNumberOfSamples() ) ); + w.print( df.format( ( ( double ) m.get( x, y ) ) / rio.getAnalyzedGeneTrees().length ) ); } } w.println(); diff --git a/forester/java/src/org/forester/datastructures/IntMatrix.java b/forester/java/src/org/forester/datastructures/IntMatrix.java index b8b915d..5fd646f 100644 --- a/forester/java/src/org/forester/datastructures/IntMatrix.java +++ b/forester/java/src/org/forester/datastructures/IntMatrix.java @@ -28,16 +28,26 @@ public final class IntMatrix { return _data[ x ][ y ]; } - final public void set( final int x, final int y, final int value ) { - _data[ x ][ y ] = value; + final public String getLabel( final int x ) { + return _labels[ x ]; + } + + final public String getRowAsString( final int x, final char separator ) { + final StringBuilder sb = new StringBuilder(); + sb.append( getLabel( x ) ); + for( int y = 0; y < size(); ++y ) { + sb.append( separator ); + sb.append( get( x, y ) ); + } + return sb.toString(); } final public void inreaseByOne( final int x, final int y ) { _data[ x ][ y ]++; } - final public String getLabel( final int x ) { - return _labels[ x ]; + final public void set( final int x, final int y, final int value ) { + _data[ x ][ y ] = value; } final public void setLabel( final int x, final String label ) { diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 75250eb..09db386 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -115,10 +115,6 @@ public class PhylogenyMethods { return farthest_d; } - final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { - return calculateLCA( n1, n2 ).getNodeData().getEvent(); - } - @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); @@ -132,22 +128,43 @@ public class PhylogenyMethods { return _farthest_2; } - final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { - if ( n.isInternal() ) { - throw new IllegalArgumentException( "node is not external" ); - } - final ArrayList to_delete = new ArrayList(); - for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode i = it.next(); - if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { - to_delete.add( i ); + public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { + stats.addValue( n.getDistanceToParent() ); } } - for( final PhylogenyNode d : to_delete ) { - phy.deleteSubtree( d, true ); + return stats; + } + + public static List calculatConfidenceStatistics( final Phylogeny phy ) { + final List stats = new ArrayList(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && !n.isRoot() ) { + if ( n.getBranchData().isHasConfidences() ) { + for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { + final Confidence c = n.getBranchData().getConfidences().get( i ); + if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { + stats.add( i, new BasicDescriptiveStatistics() ); + } + if ( !ForesterUtil.isEmpty( c.getType() ) ) { + if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { + if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { + throw new IllegalArgumentException( "support values in node [" + n.toString() + + "] appear inconsistently ordered" ); + } + } + stats.get( i ).setDescription( c.getType() ); + } + stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); + } + } + } } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); + return stats; } /** @@ -195,18 +212,6 @@ public class PhylogenyMethods { throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" ); } - public static final void preOrderReId( final Phylogeny phy ) { - if ( phy.isEmpty() ) { - return; - } - phy.setIdToNodeMap( null ); - int i = PhylogenyNode.getNodeCount(); - for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { - it.next().setId( i++ ); - } - PhylogenyNode.setNodeCount( i ); - } - /** * Returns the LCA of PhylogenyNodes node1 and node2. * Precondition: ids are in pre-order (or level-order). @@ -234,31 +239,84 @@ public class PhylogenyMethods { return node1; } - /** - * Returns all orthologs of the external PhylogenyNode n of this Phylogeny. - * Orthologs are returned as List of node references. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - * @param n - * external PhylogenyNode whose orthologs are to be returned - * @return Vector of references to all orthologous Nodes of PhylogenyNode n - * of this Phylogeny, null if this Phylogeny is empty or if n is - * internal - */ - public final static List getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) { - final List nodes = new ArrayList(); - PhylogenyMethods.preOrderReId( phy ); - final PhylogenyNodeIterator it = phy.iteratorExternalForward(); - while ( it.hasNext() ) { - final PhylogenyNode temp_node = it.next(); - if ( ( temp_node != node ) && !calculateLCAonTreeWithIdsInPreOrder( node, temp_node ).isDuplication() ) { - nodes.add( temp_node ); + public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { + if ( node.isExternal() ) { + return 0; + } + short max = 0; + for( PhylogenyNode d : node.getAllExternalDescendants() ) { + short steps = 0; + while ( d != node ) { + if ( d.isCollapse() ) { + steps = 0; + } + else { + steps++; + } + d = d.getParent(); + } + if ( max < steps ) { + max = steps; } } - return nodes; + return max; + } + + public static int calculateMaxDepth( final Phylogeny phy ) { + int max = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int steps = node.calculateDepth(); + if ( steps > max ) { + max = steps; + } + } + return max; + } + + public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { + double max = 0.0; + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final double d = node.calculateDistanceToRoot(); + if ( d > max ) { + max = d; + } + } + return max; + } + + public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + int x = 0; + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + x++; + } + } + return x; + } + + public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() ) { + stats.addValue( n.getNumberOfDescendants() ); + } + } + return stats; + } + + public static int countNumberOfPolytomies( final Phylogeny phy ) { + int count = 0; + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { + count++; + } + } + return count; } public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { @@ -270,619 +328,94 @@ public class PhylogenyMethods { return nodes; } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { + phy.clearHashIdToNodeMap(); + for( final Integer id : to_delete ) { + phy.deleteSubtree( phy.getNode( id ), true ); } - return trees; + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) - throws IOException { - final List tree_list = new ArrayList(); - for( final File file : files ) { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) + throws IllegalArgumentException { + for( final String element : node_names_to_delete ) { + if ( ForesterUtil.isEmpty( element ) ) { + continue; + } + List nodes = null; + nodes = p.getNodes( element ); + final Iterator it = nodes.iterator(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() ) { + throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); + } + p.deleteSubtree( n, true ); } - tree_list.addAll( Arrays.asList( trees ) ); } - return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + p.clearHashIdToNodeMap(); + p.externalNodesHaveChanged(); } - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { + public static void deleteExternalNodesPositiveSelection( final Set species_to_keep, final Phylogeny phy ) { + // final Set to_delete = new HashSet(); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, "" ) ); - n.setName( "" ); - } + if ( n.getNodeData().isHasTaxonomy() ) { + if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) { + //to_delete.add( n.getNodeId() ); + phy.deleteSubtree( n, true ); } } + else { + throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); + } } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); } - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); + public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, + final Phylogeny p ) { + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + final String[] to_delete = new String[ p.getNumberOfExternalNodes() ]; + int i = 0; + Arrays.sort( node_names_to_keep ); while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " - + e.getLocalizedMessage() ); - } - if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); - n.setName( "" ); - } + final String curent_name = it.next().getName(); + if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) { + to_delete[ i++ ] = curent_name; + } + } + PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p ); + final List deleted = new ArrayList(); + for( final String n : to_delete ) { + if ( !ForesterUtil.isEmpty( n ) ) { + deleted.add( n ); } } + return deleted; } - final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { - class PhylogenyNodeSortTaxonomyPriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - class PhylogenyNodeSortSequencePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - return 0; - } - } - class PhylogenyNodeSortNodeNamePriority implements Comparator { - - @Override - public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { - if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { - return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); - } - if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { - return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { - return n1.getNodeData().getTaxonomy().getTaxonomyCode() - .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { - return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() - .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); - } - } - if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { - return n1.getNodeData().getSequence().getName().toLowerCase() - .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); - } - if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) - && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { - return n1.getNodeData().getSequence().getSymbol() - .compareTo( n2.getNodeData().getSequence().getSymbol() ); - } - if ( ( n1.getNodeData().getSequence().getAccession() != null ) - && ( n2.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) - && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { - return n1.getNodeData().getSequence().getAccession().getValue() - .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); - } - } - return 0; - } - } - Comparator c; - switch ( pri ) { - case SEQUENCE: - c = new PhylogenyNodeSortSequencePriority(); - break; - case NODE_NAME: - c = new PhylogenyNodeSortNodeNamePriority(); - break; - default: - c = new PhylogenyNodeSortTaxonomyPriority(); - } - final List descs = node.getDescendants(); - Collections.sort( descs, c ); - int i = 0; - for( final PhylogenyNode desc : descs ) { - node.setChildNode( i++, desc ); - } - } - - final static public void transferNodeNameToField( final Phylogeny phy, - final PhylogenyMethods.PhylogenyNodeField field, - final boolean external_only ) throws PhyloXmlDataFormatException { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( external_only && n.isInternal() ) { - continue; - } - final String name = n.getName().trim(); - if ( !ForesterUtil.isEmpty( name ) ) { - switch ( field ) { - case TAXONOMY_CODE: - n.setName( "" ); - setTaxonomyCode( n, name ); - break; - case TAXONOMY_SCIENTIFIC_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setScientificName( name ); - break; - case TAXONOMY_COMMON_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setCommonName( name ); - break; - case SEQUENCE_SYMBOL: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setSymbol( name ); - break; - case SEQUENCE_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setName( name ); - break; - case TAXONOMY_ID_UNIPROT_1: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( 0, i ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID_UNIPROT_2: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( i + 1, name.length() ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); - break; - } - } - } - } - } - - static double addPhylogenyDistances( final double a, final double b ) { - if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { - return a + b; - } - else if ( a >= 0.0 ) { - return a; - } - else if ( b >= 0.0 ) { - return b; - } - return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; - } - - public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { - if ( n.isExternal() ) { - return true; - } - else { - if ( n.isDuplication() ) { - for( final PhylogenyNode desc : n.getDescendants() ) { - if ( !isAllDecendentsAreDuplications( desc ) ) { - return false; - } - } - return true; - } - else { - return false; - } - } - } - - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( node.isExternal() ) { - return 0; - } - short max = 0; - for( PhylogenyNode d : node.getAllExternalDescendants() ) { - short steps = 0; - while ( d != node ) { - if ( d.isCollapse() ) { - steps = 0; - } - else { - steps++; - } - d = d.getParent(); - } - if ( max < steps ) { - max = steps; - } - } - return max; - } - - public static int calculateMaxDepth( final Phylogeny phy ) { - int max = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final int steps = node.calculateDepth(); - if ( steps > max ) { - max = steps; - } - } - return max; - } - - public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { - double max = 0.0; - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final double d = node.calculateDistanceToRoot(); - if ( d > max ) { - max = d; - } - } - return max; - } - - public static int countNumberOfPolytomies( final Phylogeny phy ) { - int count = 0; - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { - count++; - } - } - return count; - } - - public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() ) { - stats.addValue( n.getNumberOfDescendants() ); - } - } - return stats; - } - - public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { - stats.addValue( n.getDistanceToParent() ); - } - } - return stats; - } - - public static List calculatConfidenceStatistics( final Phylogeny phy ) { - final List stats = new ArrayList(); - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode n = iter.next(); - if ( !n.isExternal() && !n.isRoot() ) { - if ( n.getBranchData().isHasConfidences() ) { - for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { - final Confidence c = n.getBranchData().getConfidences().get( i ); - if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { - stats.add( i, new BasicDescriptiveStatistics() ); - } - if ( !ForesterUtil.isEmpty( c.getType() ) ) { - if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { - if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { - throw new IllegalArgumentException( "support values in node [" + n.toString() - + "] appear inconsistently ordered" ); - } - } - stats.get( i ).setDescription( c.getType() ); - } - stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); - } - } - } - } - return stats; - } - - /** - * Returns the set of distinct taxonomies of - * all external nodes of node. - * If at least one the external nodes has no taxonomy, - * null is returned. - * - */ - public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final Set tax_set = new HashSet(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - tax_set.add( n.getNodeData().getTaxonomy() ); - } - return tax_set; - } - - /** - * Returns a map of distinct taxonomies of - * all external nodes of node. - * If at least one of the external nodes has no taxonomy, - * null is returned. - * - */ - public static SortedMap obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - final SortedMap tax_map = new TreeMap(); - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - return null; - } - final Taxonomy t = n.getNodeData().getTaxonomy(); - if ( tax_map.containsKey( t ) ) { - tax_map.put( t, tax_map.get( t ) + 1 ); - } - else { - tax_map.put( t, 1 ); - } - } - return tax_map; - } - - public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { - final List descs = node.getAllExternalDescendants(); - int x = 0; - for( final PhylogenyNode n : descs ) { - if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { - x++; - } - } - return x; - } - - /** - * Deep copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTree( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeData(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - /** - * Shallow copies the phylogeny originating from this node. - */ - static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { - if ( source == null ) { - return null; - } - else { - final PhylogenyNode newnode = source.copyNodeDataShallow(); - if ( !source.isExternal() ) { - for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { - newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); - } - } - return newnode; - } - } - - public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { - phy.clearHashIdToNodeMap(); - for( final Integer id : to_delete ) { - phy.deleteSubtree( phy.getNode( id ), true ); - } - phy.clearHashIdToNodeMap(); - phy.externalNodesHaveChanged(); - } - - public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) - throws IllegalArgumentException { - for( final String element : node_names_to_delete ) { - if ( ForesterUtil.isEmpty( element ) ) { - continue; - } - List nodes = null; - nodes = p.getNodes( element ); - final Iterator it = nodes.iterator(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() ) { - throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); - } - p.deleteSubtree( n, true ); - } + final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { + if ( n.isInternal() ) { + throw new IllegalArgumentException( "node is not external" ); } - p.clearHashIdToNodeMap(); - p.externalNodesHaveChanged(); - } - - public static void deleteExternalNodesPositiveSelection( final Set species_to_keep, final Phylogeny phy ) { - // final Set to_delete = new HashSet(); + final ArrayList to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { - final PhylogenyNode n = it.next(); - if ( n.getNodeData().isHasTaxonomy() ) { - if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) { - //to_delete.add( n.getNodeId() ); - phy.deleteSubtree( n, true ); - } - } - else { - throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); + final PhylogenyNode i = it.next(); + if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { + to_delete.add( i ); } } + for( final PhylogenyNode d : to_delete ) { + phy.deleteSubtree( d, true ); + } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); } - public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, - final Phylogeny p ) { - final PhylogenyNodeIterator it = p.iteratorExternalForward(); - final String[] to_delete = new String[ p.getNumberOfExternalNodes() ]; - int i = 0; - Arrays.sort( node_names_to_keep ); - while ( it.hasNext() ) { - final String curent_name = it.next().getName(); - if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) { - to_delete[ i++ ] = curent_name; - } - } - PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p ); - final List deleted = new ArrayList(); - for( final String n : to_delete ) { - if ( !ForesterUtil.isEmpty( n ) ) { - deleted.add( n ); - } - } - return deleted; - } - public static List getAllDescendants( final PhylogenyNode node ) { final List descs = new ArrayList(); final Set encountered = new HashSet(); @@ -952,24 +485,8 @@ public class PhylogenyMethods { return values; } - /** - * Calculates the distance between PhylogenyNodes n1 and n2. - * PRECONDITION: n1 is a descendant of n2. - * - * @param n1 - * a descendant of n2 - * @param n2 - * @return distance between n1 and n2 - */ - private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { - double d = 0.0; - while ( n1 != n2 ) { - if ( n1.getDistanceToParent() > 0.0 ) { - d += n1.getDistanceToParent(); - } - n1 = n1.getParent(); - } - return d; + final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { + return calculateLCA( n1, n2 ).getNodeData().getEvent(); } /** @@ -1064,68 +581,6 @@ public class PhylogenyMethods { } /** - * Returns all Nodes which are connected to external PhylogenyNode n of this - * Phylogeny by a path containing only speciation events. We call these - * "super orthologs". Nodes are returned as Vector of references to Nodes. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - * @param n - * external PhylogenyNode whose strictly speciation related Nodes - * are to be returned - * @return References to all strictly speciation related Nodes of - * PhylogenyNode n of this Phylogeny, null if this Phylogeny is - * empty or if n is internal - */ - public static List getSuperOrthologousNodes( final PhylogenyNode n ) { - // FIXME - PhylogenyNode node = n; - PhylogenyNode deepest = null; - final List v = new ArrayList(); - if ( !node.isExternal() ) { - return null; - } - while ( !node.isRoot() && !node.getParent().isDuplication() ) { - node = node.getParent(); - } - deepest = node; - deepest.setIndicatorsToZero(); - do { - if ( !node.isExternal() ) { - if ( node.getIndicator() == 0 ) { - node.setIndicator( ( byte ) 1 ); - if ( !node.isDuplication() ) { - node = node.getChildNode1(); - } - } - if ( node.getIndicator() == 1 ) { - node.setIndicator( ( byte ) 2 ); - if ( !node.isDuplication() ) { - node = node.getChildNode2(); - } - } - if ( ( node != deepest ) && ( node.getIndicator() == 2 ) ) { - node = node.getParent(); - } - } - else { - if ( node != n ) { - v.add( node ); - } - if ( node != deepest ) { - node = node.getParent(); - } - else { - node.setIndicator( ( byte ) 2 ); - } - } - } while ( ( node != deepest ) || ( deepest.getIndicator() != 2 ) ); - return v; - } - - /** * Convenience method for display purposes. * Not intended for algorithms. */ @@ -1136,37 +591,23 @@ public class PhylogenyMethods { return node.getNodeData().getTaxonomy().getIdentifier().getValue(); } - /** - * Returns all Nodes which are connected to external PhylogenyNode n of this - * Phylogeny by a path containing, and leading to, only duplication events. - * We call these "ultra paralogs". Nodes are returned as Vector of - * references to Nodes. - *

- * PRECONDITION: This tree must be binary and rooted, and speciation - - * duplication need to be assigned for each of its internal Nodes. - *

- * Returns null if this Phylogeny is empty or if n is internal. - *

- * (Last modified: 10/06/01) - * - * @param n - * external PhylogenyNode whose ultra paralogs are to be returned - * @return Vector of references to all ultra paralogs of PhylogenyNode n of - * this Phylogeny, null if this Phylogeny is empty or if n is - * internal - */ - public static List getUltraParalogousNodes( final PhylogenyNode n ) { - // FIXME test me - PhylogenyNode node = n; - if ( !node.isExternal() ) { - throw new IllegalArgumentException( "attempt to get ultra-paralogous nodes of internal node" ); + public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { + if ( n.isExternal() ) { + return true; } - while ( !node.isRoot() && node.getParent().isDuplication() && isAllDecendentsAreDuplications( node.getParent() ) ) { - node = node.getParent(); + else { + if ( n.isDuplication() ) { + for( final PhylogenyNode desc : n.getDescendants() ) { + if ( !isAllDecendentsAreDuplications( desc ) ) { + return false; + } + } + return true; + } + else { + return false; + } } - final List nodes = node.getAllExternalDescendants(); - nodes.remove( n ); - return nodes; } public static boolean isHasExternalDescendant( final PhylogenyNode node ) { @@ -1198,27 +639,6 @@ public class PhylogenyMethods { } } - private static boolean match( final String s, - final String query, - final boolean case_sensitive, - final boolean partial ) { - if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { - return false; - } - String my_s = s.trim(); - String my_query = query.trim(); - if ( !case_sensitive ) { - my_s = my_s.toLowerCase(); - my_query = my_query.toLowerCase(); - } - if ( partial ) { - return my_s.indexOf( my_query ) >= 0; - } - else { - return my_s.equals( my_query ); - } - } - public static void midpointRoot( final Phylogeny phylogeny ) { if ( phylogeny.getNumberOfExternalNodes() < 2 ) { return; @@ -1252,36 +672,123 @@ public class PhylogenyMethods { } } - public static void normalizeBootstrapValues( final Phylogeny phylogeny, - final double max_bootstrap_value, - final double max_normalized_value ) { - for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.isInternal() ) { - final double confidence = getConfidenceValue( node ); - if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { - if ( confidence >= max_bootstrap_value ) { - setBootstrapConfidence( node, max_normalized_value ); - } - else { - setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); + public static void normalizeBootstrapValues( final Phylogeny phylogeny, + final double max_bootstrap_value, + final double max_normalized_value ) { + for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.isInternal() ) { + final double confidence = getConfidenceValue( node ); + if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { + if ( confidence >= max_bootstrap_value ) { + setBootstrapConfidence( node, max_normalized_value ); + } + else { + setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); + } + } + } + } + } + + public static List obtainAllNodesAsList( final Phylogeny phy ) { + final List nodes = new ArrayList(); + if ( phy.isEmpty() ) { + return nodes; + } + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + nodes.add( iter.next() ); + } + return nodes; + } + + /** + * Returns the set of distinct taxonomies of + * all external nodes of node. + * If at least one the external nodes has no taxonomy, + * null is returned. + * + */ + public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final Set tax_set = new HashSet(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + tax_set.add( n.getNodeData().getTaxonomy() ); + } + return tax_set; + } + + /** + * Returns a map of distinct taxonomies of + * all external nodes of node. + * If at least one of the external nodes has no taxonomy, + * null is returned. + * + */ + public static SortedMap obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { + final List descs = node.getAllExternalDescendants(); + final SortedMap tax_map = new TreeMap(); + for( final PhylogenyNode n : descs ) { + if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { + return null; + } + final Taxonomy t = n.getNodeData().getTaxonomy(); + if ( tax_map.containsKey( t ) ) { + tax_map.put( t, tax_map.get( t ) + 1 ); + } + else { + tax_map.put( t, 1 ); + } + } + return tax_map; + } + + /** + * Arranges the order of childern for each node of this Phylogeny in such a + * way that either the branch with more children is on top (right) or on + * bottom (left), dependent on the value of boolean order. + * + * @param order + * decides in which direction to order + * @param pri + */ + public static void orderAppearance( final PhylogenyNode n, + final boolean order, + final boolean order_ext_alphabetically, + final DESCENDANT_SORT_PRIORITY pri ) { + if ( n.isExternal() ) { + return; + } + else { + PhylogenyNode temp = null; + if ( ( n.getNumberOfDescendants() == 2 ) + && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) + && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { + temp = n.getChildNode1(); + n.setChild1( n.getChildNode2() ); + n.setChild2( temp ); + } + else if ( order_ext_alphabetically ) { + boolean all_ext = true; + for( final PhylogenyNode i : n.getDescendants() ) { + if ( !i.isExternal() ) { + all_ext = false; + break; } } + if ( all_ext ) { + PhylogenyMethods.sortNodeDescendents( n, pri ); + } + } + for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { + orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); } } } - public static List obtainAllNodesAsList( final Phylogeny phy ) { - final List nodes = new ArrayList(); - if ( phy.isEmpty() ) { - return nodes; - } - for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - nodes.add( iter.next() ); - } - return nodes; - } - public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); @@ -1309,6 +816,41 @@ public class PhylogenyMethods { } } + public static final void preOrderReId( final Phylogeny phy ) { + if ( phy.isEmpty() ) { + return; + } + phy.setIdToNodeMap( null ); + int i = PhylogenyNode.getNodeCount(); + for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { + it.next().setId( i++ ); + } + PhylogenyNode.setNodeCount( i ); + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + return trees; + } + + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) + throws IOException { + final List tree_list = new ArrayList(); + for( final File file : files ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + tree_list.addAll( Arrays.asList( trees ) ); + } + return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); + } + public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) { if ( remove_me.isRoot() ) { throw new IllegalArgumentException( "ill advised attempt to remove root node" ); @@ -1602,7 +1144,165 @@ public class PhylogenyMethods { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } - node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); + node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); + } + + final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { + class PhylogenyNodeSortTaxonomyPriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + class PhylogenyNodeSortSequencePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + return 0; + } + } + class PhylogenyNodeSortNodeNamePriority implements Comparator { + + @Override + public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { + if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { + return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); + } + if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { + return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + return n1.getNodeData().getTaxonomy().getTaxonomyCode() + .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) { + return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase() + .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() ); + } + } + if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { + return n1.getNodeData().getSequence().getName().toLowerCase() + .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); + } + if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) + && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { + return n1.getNodeData().getSequence().getSymbol() + .compareTo( n2.getNodeData().getSequence().getSymbol() ); + } + if ( ( n1.getNodeData().getSequence().getAccession() != null ) + && ( n2.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() ) + && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) { + return n1.getNodeData().getSequence().getAccession().getValue() + .compareTo( n2.getNodeData().getSequence().getAccession().getValue() ); + } + } + return 0; + } + } + Comparator c; + switch ( pri ) { + case SEQUENCE: + c = new PhylogenyNodeSortSequencePriority(); + break; + case NODE_NAME: + c = new PhylogenyNodeSortNodeNamePriority(); + break; + default: + c = new PhylogenyNodeSortTaxonomyPriority(); + } + final List descs = node.getDescendants(); + Collections.sort( descs, c ); + int i = 0; + for( final PhylogenyNode desc : descs ) { + node.setChildNode( i++, desc ); + } } /** @@ -1650,49 +1350,231 @@ public class PhylogenyMethods { return nodes_to_delete; } - /** - * Arranges the order of childern for each node of this Phylogeny in such a - * way that either the branch with more children is on top (right) or on - * bottom (left), dependent on the value of boolean order. - * - * @param order - * decides in which direction to order - * @param pri - */ - public static void orderAppearance( final PhylogenyNode n, - final boolean order, - final boolean order_ext_alphabetically, - final DESCENDANT_SORT_PRIORITY pri ) { - if ( n.isExternal() ) { - return; + final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + + e.getLocalizedMessage() ); + } + if ( value >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.setName( "" ); + } + } } - else { - PhylogenyNode temp = null; - if ( ( n.getNumberOfDescendants() == 2 ) - && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) - && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { - temp = n.getChildNode1(); - n.setChild1( n.getChildNode2() ); - n.setChild2( temp ); + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, "" ) ); + n.setName( "" ); + } + } } - else if ( order_ext_alphabetically ) { - boolean all_ext = true; - for( final PhylogenyNode i : n.getDescendants() ) { - if ( !i.isExternal() ) { - all_ext = false; + } + } + + final static public void transferNodeNameToField( final Phylogeny phy, + final PhylogenyNodeField field, + final boolean external_only ) throws PhyloXmlDataFormatException { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( external_only && n.isInternal() ) { + continue; + } + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + switch ( field ) { + case TAXONOMY_CODE: + n.setName( "" ); + setTaxonomyCode( n, name ); + break; + case TAXONOMY_SCIENTIFIC_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setScientificName( name ); + break; + case TAXONOMY_COMMON_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setCommonName( name ); + break; + case SEQUENCE_SYMBOL: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setSymbol( name ); + break; + case SEQUENCE_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setName( name ); + break; + case TAXONOMY_ID_UNIPROT_1: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( 0, i ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID_UNIPROT_2: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( i + 1, name.length() ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); break; } } - if ( all_ext ) { - PhylogenyMethods.sortNodeDescendents( n, pri ); + } + } + } + + static double addPhylogenyDistances( final double a, final double b ) { + if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { + return a + b; + } + else if ( a >= 0.0 ) { + return a; + } + else if ( b >= 0.0 ) { + return b; + } + return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + } + + /** + * Deep copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTree( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeData(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); } } - for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { - orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); + return newnode; + } + } + + /** + * Shallow copies the phylogeny originating from this node. + */ + static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { + if ( source == null ) { + return null; + } + else { + final PhylogenyNode newnode = source.copyNodeDataShallow(); + if ( !source.isExternal() ) { + for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { + newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); + } + } + return newnode; + } + } + + /** + * Calculates the distance between PhylogenyNodes n1 and n2. + * PRECONDITION: n1 is a descendant of n2. + * + * @param n1 + * a descendant of n2 + * @param n2 + * @return distance between n1 and n2 + */ + private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { + double d = 0.0; + while ( n1 != n2 ) { + if ( n1.getDistanceToParent() > 0.0 ) { + d += n1.getDistanceToParent(); } + n1 = n1.getParent(); + } + return d; + } + + private static boolean match( final String s, + final String query, + final boolean case_sensitive, + final boolean partial ) { + if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { + return false; + } + String my_s = s.trim(); + String my_query = query.trim(); + if ( !case_sensitive ) { + my_s = my_s.toLowerCase(); + my_query = my_query.toLowerCase(); + } + if ( partial ) { + return my_s.indexOf( my_query ) >= 0; + } + else { + return my_s.equals( my_query ); } } + public static enum DESCENDANT_SORT_PRIORITY { + TAXONOMY, SEQUENCE, NODE_NAME; + } + public static enum PhylogenyNodeField { CLADE_NAME, TAXONOMY_CODE, @@ -1704,8 +1586,4 @@ public class PhylogenyMethods { TAXONOMY_ID_UNIPROT_2, TAXONOMY_ID; } - - public static enum DESCENDANT_SORT_PRIORITY { - TAXONOMY, SEQUENCE, NODE_NAME; - } } diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index fd8ae3f..84d1d15 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -60,28 +60,28 @@ public final class RIO { private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true; private Phylogeny[] _analyzed_gene_trees; private List _removed_gene_tree_nodes; - private int _samples; private int _ext_nodes; private TaxonomyComparisonBase _gsdir_tax_comp_base; private StringBuilder _log; private boolean _produce_log; + private boolean _verbose; public RIO( final File gene_trees_file, final Phylogeny species_tree, final ALGORITHM algorithm, - final boolean produce_log ) throws IOException, SDIException, RIOException { - init( produce_log ); + final boolean produce_log, + final boolean verbose ) throws IOException, SDIException, RIOException { + init( produce_log, verbose ); inferOrthologs( gene_trees_file, species_tree, algorithm ); } - private final void init( final boolean produce_log ) { - _produce_log = produce_log; - _samples = -1; - _ext_nodes = -1; - _log = null; - _gsdir_tax_comp_base = null; - _analyzed_gene_trees = null; - _removed_gene_tree_nodes = null; + public RIO( final Phylogeny[] gene_trees, + final Phylogeny species_tree, + final ALGORITHM algorithm, + final boolean produce_log, + final boolean verbose ) throws IOException, SDIException, RIOException { + init( produce_log, verbose ); + inferOrthologs( gene_trees, species_tree, algorithm ); } public final Phylogeny[] getAnalyzedGeneTrees() { @@ -98,8 +98,12 @@ public final class RIO { return _ext_nodes; } - public final int getNumberOfSamples() { - return _samples; + public final TaxonomyComparisonBase getGSDIRtaxCompBase() { + return _gsdir_tax_comp_base; + } + + public final StringBuilder getLog() { + return _log; } public final List getRemovedGeneTreeNodes() { @@ -110,8 +114,6 @@ public final class RIO { final Phylogeny species_tree, final ALGORITHM algorithm ) throws SDIException, RIOException, FileNotFoundException, IOException { - // Read in first tree to get its sequence names - // and strip species_tree. final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); if ( p instanceof NHXParser ) { @@ -121,6 +123,13 @@ public final class RIO { nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); } final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); + inferOrthologs( gene_trees, species_tree, algorithm ); + } + + private final void inferOrthologs( final Phylogeny[] gene_trees, + final Phylogeny species_tree, + final ALGORITHM algorithm ) throws SDIException, RIOException, + FileNotFoundException, IOException { if ( algorithm == ALGORITHM.SDIR ) { // Removes from species_tree all species not found in gene_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); @@ -130,43 +139,49 @@ public final class RIO { } if ( _produce_log ) { _log = new StringBuilder(); + writeLogSubHeader(); } _analyzed_gene_trees = new Phylogeny[ gene_trees.length ]; - int i = 0; int gene_tree_ext_nodes = 0; - if ( _produce_log ) { - _log.append( "#" ); - _log.append( "\t" ); - _log.append( "with minimal number of duplications" ); - _log.append( "/" ); - _log.append( "root placements" ); - _log.append( "\t[" ); - _log.append( "min" ); - _log.append( "-" ); - _log.append( "max" ); - _log.append( "]" ); - _log.append( ForesterUtil.LINE_SEPARATOR ); + if ( _verbose ) { + System.out.println(); } - for( final Phylogeny gt : gene_trees ) { + for( int i = 0; i < gene_trees.length; ++i ) { + final Phylogeny gt = gene_trees[ i ]; + if ( _verbose ) { + ForesterUtil.updateProgress( ( double ) i / gene_trees.length ); + } + if ( i == 0 ) { + gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); + } + else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { + throw new RIOException( "gene tree #" + ( i + 1 ) + " has a different number of external nodes (" + + gt.getNumberOfExternalNodes() + ") than the preceding gene trees (" + gene_tree_ext_nodes + + ")" ); + } if ( algorithm == ALGORITHM.SDIR ) { // Removes from gene_tree all species not found in species_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); if ( gt.isEmpty() ) { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } - if ( i == 0 ) { - gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); - } - else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { - throw new RIOException( "(cleaned up) gene tree #" + ( i + 1 ) - + " has a different number of external nodes (" + gt.getNumberOfExternalNodes() - + ") than those gene trees preceding it (" + gene_tree_ext_nodes + ")" ); - } } _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, i ); - ++i; } - setNumberOfSamples( gene_trees.length ); + if ( _verbose ) { + System.out.println(); + System.out.println(); + } + } + + private final void init( final boolean produce_log, final boolean verbose ) { + _produce_log = produce_log; + _verbose = verbose; + _ext_nodes = -1; + _log = null; + _gsdir_tax_comp_base = null; + _analyzed_gene_trees = null; + _removed_gene_tree_nodes = null; } private final Phylogeny performOrthologInference( final Phylogeny gene_tree, @@ -195,20 +210,15 @@ public final class RIO { assigned_tree = gsdir.getMinDuplicationsSumGeneTrees().get( 0 ); if ( i == 0 ) { _removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes(); + for( final PhylogenyNode r : _removed_gene_tree_nodes ) { + if ( !r.getNodeData().isHasTaxonomy() ) { + throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #1: " + + r.toString() ); + } + } } if ( _produce_log ) { - final BasicDescriptiveStatistics stats = gsdir.getDuplicationsSumStats(); - _log.append( i ); - _log.append( "\t" ); - _log.append( gsdir.getMinDuplicationsSumGeneTrees().size() ); - _log.append( "/" ); - _log.append( stats.getN() ); - _log.append( "\t[" ); - _log.append( ( int ) stats.getMin() ); - _log.append( "-" ); - _log.append( ( int ) stats.getMax() ); - _log.append( "]" ); - _log.append( ForesterUtil.LINE_SEPARATOR ); + writeStatsToLog( i, gsdir ); } _gsdir_tax_comp_base = gsdir.getTaxCompBase(); break; @@ -217,16 +227,44 @@ public final class RIO { throw new IllegalArgumentException( "illegal algorithm: " + algorithm ); } } - setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() ); + if ( i == 0 ) { + _ext_nodes = assigned_tree.getNumberOfExternalNodes(); + } + else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) { + throw new RIOException( "after stripping gene tree #" + ( i + 1 ) + + " has a different number of external nodes (" + assigned_tree.getNumberOfExternalNodes() + + ") than the preceding gene trees (" + _ext_nodes + ")" ); + } return assigned_tree; } - private final void setExtNodesOfAnalyzedGeneTrees( final int i ) { - _ext_nodes = i; + private void writeLogSubHeader() { + _log.append( "#" ); + _log.append( "\t" ); + _log.append( "with minimal number of duplications" ); + _log.append( "/" ); + _log.append( "root placements" ); + _log.append( "\t[" ); + _log.append( "min" ); + _log.append( "-" ); + _log.append( "max" ); + _log.append( "]" ); + _log.append( ForesterUtil.LINE_SEPARATOR ); } - private final void setNumberOfSamples( final int i ) { - _samples = i; + private final void writeStatsToLog( final int i, final GSDIR gsdir ) { + final BasicDescriptiveStatistics stats = gsdir.getDuplicationsSumStats(); + _log.append( i ); + _log.append( "\t" ); + _log.append( gsdir.getMinDuplicationsSumGeneTrees().size() ); + _log.append( "/" ); + _log.append( stats.getN() ); + _log.append( "\t[" ); + _log.append( ( int ) stats.getMin() ); + _log.append( "-" ); + _log.append( ( int ) stats.getMax() ); + _log.append( "]" ); + _log.append( ForesterUtil.LINE_SEPARATOR ); } public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort ) @@ -246,10 +284,10 @@ public final class RIO { label = n.getName(); } else { - throw new IllegalArgumentException( "node " + n + " has no appropriate label" ); + throw new RIOException( "node " + n + " has no appropriate label" ); } if ( labels_set.contains( label ) ) { - throw new IllegalArgumentException( "label " + label + " is not unique" ); + throw new RIOException( "label " + label + " is not unique" ); } labels_set.add( label ); labels.add( label ); @@ -285,12 +323,4 @@ public final class RIO { } return m; } - - public final TaxonomyComparisonBase getGSDIRtaxCompBase() { - return _gsdir_tax_comp_base; - } - - public final StringBuilder getLog() { - return _log; - } } diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java new file mode 100644 index 0000000..00eab1d --- /dev/null +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -0,0 +1,119 @@ + +package org.forester.rio; + +import org.forester.datastructures.IntMatrix; +import org.forester.io.parsers.nhx.NHXParser; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyMethods.PhylogenyNodeField; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.sdi.SDI.ALGORITHM; +import org.forester.sdi.SDI.TaxonomyComparisonBase; +import org.forester.util.ForesterUtil; + +public final class TestRIO { + + private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + + "test_data" + ForesterUtil.getFileSeparator(); + + public static boolean test() { + if ( !testRIO_GSDIR() ) { + return false; + } + return true; + } + + private static boolean testRIO_GSDIR() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final NHXParser nhx = new NHXParser(); + nhx.setReplaceUnderscores( false ); + nhx.setIgnoreQuotes( true ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; + final Phylogeny[] gene_trees_1 = factory.create( gene_trees_1_str, nhx ); + final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"; + final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; + species_tree_1.setRooted( true ); + PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); + //Archaeopteryx.createApplication( species_trees_1 ); + RIO rio = new RIO( gene_trees_1, species_tree_1, ALGORITHM.GSDIR, true, false ); + if ( rio.getAnalyzedGeneTrees().length != 5 ) { + return false; + } + if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { + return false; + } + if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { + return false; + } + if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { + return false; + } + IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + // System.out.println( m.toString() ); + if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) { + return false; + } + if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) { + return false; + } + if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { + return false; + } + // + final Phylogeny[] gene_trees_2 = factory.create( gene_trees_1_str, nhx ); + final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);"; + final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ]; + species_tree_2.setRooted( true ); + PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true ); + rio = new RIO( gene_trees_2, species_tree_2, ALGORITHM.GSDIR, true, false ); + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + // System.out.println( m.toString() ); + if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) { + return false; + } + if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + public static void main( final String[] args ) { + if ( !testRIO_GSDIR() ) { + System.out.println( "testRIO GSDIR failed" ); + } + else { + System.out.println( "OK" ); + } + } +} \ No newline at end of file diff --git a/forester/java/src/org/forester/sdi/GSDIR.java b/forester/java/src/org/forester/sdi/GSDIR.java index 70cc009..5fd0a3b 100644 --- a/forester/java/src/org/forester/sdi/GSDIR.java +++ b/forester/java/src/org/forester/sdi/GSDIR.java @@ -49,7 +49,7 @@ public class GSDIR extends GSDI { final List gene_tree_branches_post_order = new ArrayList(); for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); - if ( !n.isRoot() ) { + if ( !n.isRoot() && !( n.getParent().isRoot() && n.isFirstChildNode() ) ) { gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) ); } } @@ -62,12 +62,12 @@ public class GSDIR extends GSDI { _gene_tree.reRoot( branch ); PhylogenyMethods.preOrderReId( getSpeciesTree() ); //TEST, remove later - for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { - final PhylogenyNode g = it.next(); - if ( g.isInternal() ) { - g.setLink( null ); - } - } + // for( final PhylogenyNodeIterator it = _gene_tree.iteratorPostorder(); it.hasNext(); ) { + // final PhylogenyNode g = it.next(); + // if ( g.isInternal() ) { + // g.setLink( null ); + // } + // } geneTreePostOrderTraversal(); if ( _duplications_sum < _min_duplications_sum ) { _min_duplications_sum = _duplications_sum; @@ -79,7 +79,6 @@ public class GSDIR extends GSDI { } _duplications_sum_stats.addValue( _duplications_sum ); } - //System.out.println( _duplications_sum_stats.getSummaryAsString() ); } public int getMinDuplicationsSum() { diff --git a/forester/java/src/org/forester/sdi/TestGSDI.java b/forester/java/src/org/forester/sdi/TestGSDI.java index 62ab0f8..a9dfa66 100644 --- a/forester/java/src/org/forester/sdi/TestGSDI.java +++ b/forester/java/src/org/forester/sdi/TestGSDI.java @@ -27,7 +27,6 @@ package org.forester.sdi; import java.io.IOException; -import org.forester.archaeopteryx.Archaeopteryx; import org.forester.development.DevelopmentTools; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.util.ParserUtils; @@ -61,6 +60,9 @@ public final class TestGSDI { if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) { return false; } + if ( !TestGSDI.testGSDIR_general() ) { + return false; + } return true; } @@ -1429,60 +1431,30 @@ public final class TestGSDI { final String s1str = "(((([&&NHX:S=HUMAN],([&&NHX:S=MOUSE],[&&NHX:S=RAT])),([&&NHX:S=CAEEL],[&&NHX:S=CAEBR])),[&&NHX:S=YEAST]),[&&NHX:S=ARATH])"; final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1str, new NHXParser() )[ 0 ]; s1.setRooted( true ); - // Archaeopteryx.createApplication( s1.copy() ); final Phylogeny g1 = TestGSDI .createPhylogeny( "(HUMAN[&&NHX:S=HUMAN],(RAT[&&NHX:S=RAT],(CAEEL[&&NHX:T=:S=CAEEL],YEAST[&&NHX:S=YEAST])))" ); final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false ); if ( sdi1.getMinDuplicationsSum() != 0 ) { return false; } - System.out.println( sdi1.getDuplicationsSumStats().getSummaryAsString() ); - // Archaeopteryx.createApplication( sdi1.getMinDuplicationsSumGeneTrees().get( 0 ) ); - // final Phylogeny g2 = TestGSDI .createPhylogeny( "(((HUMAN[&&NHX:S=HUMAN],RAT[&&NHX:S=RAT]),CAEEL[&&NHX:T=:S=CAEEL]),YEAST[&&NHX:S=YEAST])" ); final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false ); if ( sdi2.getMinDuplicationsSum() != 0 ) { return false; } - System.out.println( sdi2.getDuplicationsSumStats().getSummaryAsString() ); - // Archaeopteryx.createApplication( sdi2.getMinDuplicationsSumGeneTrees().get( 0 ) ); - // final Phylogeny g3 = TestGSDI .createPhylogeny( "(RAT[&&NHX:S=RAT],HUMAN[&&NHX:S=HUMAN],(YEAST[&&NHX:S=YEAST],CAEEL[&&NHX:T=:S=CAEEL]))" ); - // Archaeopteryx.createApplication( g3 ); final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false ); if ( sdi3.getMinDuplicationsSum() != 0 ) { return false; } - System.out.println( sdi3.getDuplicationsSumStats().getSummaryAsString() ); - // Archaeopteryx.createApplication( sdi3.getMinDuplicationsSumGeneTrees().get( 0 ) ); - // final Phylogeny g4 = TestGSDI .createPhylogeny( "(((((MOUSE[&&NHX:S=MOUSE],[&&NHX:S=RAT]),[&&NHX:S=HUMAN]),([&&NHX:S=ARATH],[&&NHX:S=YEAST])),[&&NHX:S=CAEEL]),[&&NHX:S=CAEBR])" ); - Archaeopteryx.createApplication( g4 ); final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false ); if ( sdi4.getMinDuplicationsSum() != 0 ) { return false; } - System.out.println( sdi4.getDuplicationsSumStats().getSummaryAsString() ); - Archaeopteryx.createApplication( sdi4.getMinDuplicationsSumGeneTrees().get( 0 ) ); - // if ( !PhylogenyMethods.calculateLCA( g1.getNode( "B" ), g1.getNode( "A1" ) ).getNodeData().getEvent() - // .isSpeciation() ) { - // return false; - // } - // if ( !PhylogenyMethods.calculateLCA( g1.getNode( "C" ), g1.getNode( "A1" ) ).getNodeData().getEvent() - // .isSpeciationOrDuplication() ) { - // return false; - // } - // if ( !( PhylogenyMethods.calculateLCA( g1.getNode( "A2" ), g1.getNode( "A1" ) ).getNodeData().getEvent() - // .isDuplication() ) ) { - // return false; - // } - // if ( !PhylogenyMethods.calculateLCA( g1.getNode( "D" ), g1.getNode( "A1" ) ).getNodeData().getEvent() - // .isSpeciation() ) { - // return false; - // } } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index be7bee3..0ab8c91 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -38,7 +38,6 @@ import java.util.Locale; import java.util.Set; import org.forester.application.support_transfer; -import org.forester.datastructures.IntMatrix; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -87,8 +86,7 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.Protein; -import org.forester.rio.RIO; -import org.forester.sdi.GSDI; +import org.forester.rio.TestRIO; import org.forester.sdi.SDI; import org.forester.sdi.SDIR; import org.forester.sdi.SDIse; @@ -508,8 +506,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "SDIse: " ); - if ( Test.testSDIse() ) { + System.out.print( "Descriptive statistics: " ); + if ( Test.testDescriptiveStatistics() ) { System.out.println( "OK." ); succeeded++; } @@ -517,8 +515,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "SDIunrooted: " ); - if ( Test.testSDIunrooted() ) { + System.out.print( "Data objects and methods: " ); + if ( Test.testDataObjects() ) { System.out.println( "OK." ); succeeded++; } @@ -526,8 +524,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "GSDI: " ); - if ( TestGSDI.test() ) { + System.out.print( "Properties map: " ); + if ( Test.testPropertiesMap() ) { System.out.println( "OK." ); succeeded++; } @@ -535,8 +533,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Ortholog table: " ); - if ( Test.testOrthologTable() ) { + System.out.print( "SDIse: " ); + if ( Test.testSDIse() ) { System.out.println( "OK." ); succeeded++; } @@ -544,8 +542,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Descriptive statistics: " ); - if ( Test.testDescriptiveStatistics() ) { + System.out.print( "SDIunrooted: " ); + if ( Test.testSDIunrooted() ) { System.out.println( "OK." ); succeeded++; } @@ -553,8 +551,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Data objects and methods: " ); - if ( Test.testDataObjects() ) { + System.out.print( "GSDI: " ); + if ( TestGSDI.test() ) { System.out.println( "OK." ); succeeded++; } @@ -562,8 +560,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Properties map: " ); - if ( Test.testPropertiesMap() ) { + System.out.print( "RIO: " ); + if ( TestRIO.test() ) { System.out.println( "OK." ); succeeded++; } @@ -6876,28 +6874,6 @@ public final class Test { return true; } - private static boolean testOrthologTable() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny s1 = factory.create( Test.PATH_TO_TEST_DATA + "rio_species.xml", new PhyloXmlParser() )[ 0 ]; - final NHXParser p = new NHXParser(); - p.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); - final Phylogeny g1[] = factory.create( new File( Test.PATH_TO_TEST_DATA - + "rio_Bcl-2_e1_20_mafft_05_40_fme.mlt" ), p ); - for( final Phylogeny gt : g1 ) { - gt.setRooted( true ); - final GSDI sdi = new GSDI( gt, s1, true, true, true ); - } - final IntMatrix m = RIO.calculateOrthologTable( g1, true ); - // System.out.println( m.toString() ); - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - private static boolean testSplit() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 336c887..1ca9ba2 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -95,55 +95,12 @@ public final class ForesterUtil { private ForesterUtil() { } - public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); - } - } - - public static void ensurePresenceOfSequence( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasSequence() ) { - node.getNodeData().setSequence( new Sequence() ); - } - } - - final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDistribution() ) { - node.getNodeData().setDistribution( new Distribution( "" ) ); - } - } - - final public static void ensurePresenceOfDate( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDate() ) { - node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); - } - } - final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); } } - public static boolean isWindowns() { - return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1; - } - - final public static String getForesterLibraryInformation() { - return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; - } - - public static boolean seqIsLikelyToBeAa( final String s ) { - final String seq = s.toLowerCase(); - if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) - || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) - || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) - || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { - return true; - } - return false; - } - /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, @@ -234,39 +191,6 @@ public final class ForesterUtil { } } - /** - * Helper method for calcColor methods. - * - * @param smallercolor_component_x - * color component the smaller color - * @param largercolor_component_x - * color component the larger color - * @param x - * factor - * @return an int representing a color component - */ - final private static int calculateColorComponent( final double smallercolor_component_x, - final double largercolor_component_x, - final double x ) { - return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); - } - - /** - * Helper method for calcColor methods. - * - * - * @param value - * the value - * @param larger - * the largest value - * @param smaller - * the smallest value - * @return a normalized value between larger and smaller - */ - final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { - return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); - } - final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } @@ -337,6 +261,10 @@ public final class ForesterUtil { return new BufferedWriter( new FileWriter( file ) ); } + final public static BufferedWriter createBufferedWriter( final String name ) throws IOException { + return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); + } + final public static EasyWriter createEasyWriter( final File file ) throws IOException { return new EasyWriter( createBufferedWriter( file ) ); } @@ -345,10 +273,6 @@ public final class ForesterUtil { return createEasyWriter( createFileForWriting( name ) ); } - final public static BufferedWriter createBufferedWriter( final String name ) throws IOException { - return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); - } - final public static File createFileForWriting( final String name ) throws IOException { final File file = new File( name ); if ( file.exists() ) { @@ -357,6 +281,30 @@ public final class ForesterUtil { return file; } + final public static void ensurePresenceOfDate( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDate() ) { + node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); + } + } + + final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDistribution() ) { + node.getNodeData().setDistribution( new Distribution( "" ) ); + } + } + + public static void ensurePresenceOfSequence( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasSequence() ) { + node.getNodeData().setSequence( new Sequence() ); + } + } + + public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + } + public static void fatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + message ); @@ -471,6 +419,10 @@ public final class ForesterUtil { return line; } + final public static String getForesterLibraryInformation() { + return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; + } + final public static String getLineSeparator() { return ForesterUtil.LINE_SEPARATOR; } @@ -587,6 +539,10 @@ public final class ForesterUtil { return isReadableFile( new File( s ) ); } + public static boolean isWindowns() { + return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1; + } + final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; @@ -785,6 +741,14 @@ public final class ForesterUtil { } final public static void printProgramInformation( final String prg_name, + final String prg_version, + final String date, + final String email, + final String www ) { + printProgramInformation( prg_name, null, prg_version, date, email, www, null ); + } + + final public static void printProgramInformation( final String prg_name, final String desc, final String prg_version, final String date, @@ -815,14 +779,6 @@ public final class ForesterUtil { System.out.println(); } - final public static void printProgramInformation( final String prg_name, - final String prg_version, - final String date, - final String email, - final String www ) { - printProgramInformation( prg_name, null, prg_version, date, email, www, null ); - } - final public static void printWarningMessage( final String prg_name, final String message ) { System.out.println( "[" + prg_name + "] > warning: " + message ); } @@ -900,9 +856,15 @@ public final class ForesterUtil { } } - final private static String[] splitString( final String str ) { - final String regex = "[\\s;,]+"; - return str.split( regex ); + public static boolean seqIsLikelyToBeAa( final String s ) { + final String seq = s.toLowerCase(); + if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) + || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) + || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) + || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { + return true; + } + return false; } final public static String stringArrayToString( final String[] a ) { @@ -980,6 +942,19 @@ public final class ForesterUtil { System.exit( -1 ); } + public final static void updateProgress( final double progress_percentage ) { + final int width = 50; + System.out.print( "\r[" ); + int i = 0; + for( ; i <= ( int ) ( progress_percentage * width ); i++ ) { + System.out.print( "." ); + } + for( ; i < width; i++ ) { + System.out.print( " " ); + } + System.out.print( "]" ); + } + public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0; @@ -1008,4 +983,42 @@ public final class ForesterUtil { } return sb.toString(); } + + /** + * Helper method for calcColor methods. + * + * @param smallercolor_component_x + * color component the smaller color + * @param largercolor_component_x + * color component the larger color + * @param x + * factor + * @return an int representing a color component + */ + final private static int calculateColorComponent( final double smallercolor_component_x, + final double largercolor_component_x, + final double x ) { + return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); + } + + /** + * Helper method for calcColor methods. + * + * + * @param value + * the value + * @param larger + * the largest value + * @param smaller + * the smallest value + * @return a normalized value between larger and smaller + */ + final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { + return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); + } + + final private static String[] splitString( final String str ) { + final String regex = "[\\s;,]+"; + return str.split( regex ); + } } -- 1.7.10.2