- }
-
- final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
- final PhylogenyNodeIterator it = phy.iteratorPostorder();
- while ( it.hasNext() ) {
- final PhylogenyNode n = it.next();
- if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
- double value = -1;
- try {
- value = Double.parseDouble( n.getName() );
- }
- catch ( final NumberFormatException e ) {
- throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
- + e.getLocalizedMessage() );
- }
- if ( value >= 0.0 ) {
- n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
- n.setName( "" );
- }
- }
- }
- }
-
- final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) {
- class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- return 0;
- }
- }
- class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- return 0;
- }
- }
- class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
-
- @Override
- public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
- if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
- return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
- }
- if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
- return n1.getNodeData().getTaxonomy().getTaxonomyCode()
- .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getCommonName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- return n1.getNodeData().getTaxonomy().getCommonName().toLowerCase()
- .compareTo( n2.getNodeData().getTaxonomy().getCommonName().toLowerCase() );
- }
- }
- if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
- return n1.getNodeData().getSequence().getName().toLowerCase()
- .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
- }
- if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
- && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
- return n1.getNodeData().getSequence().getSymbol()
- .compareTo( n2.getNodeData().getSequence().getSymbol() );
- }
- if ( ( n1.getNodeData().getSequence().getAccession() != null )
- && ( n2.getNodeData().getSequence().getAccession() != null )
- && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
- && !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getAccession().getValue() ) ) {
- return n1.getNodeData().getSequence().getAccession().getValue()
- .compareTo( n2.getNodeData().getSequence().getAccession().getValue() );
- }
- }
- return 0;
- }
- }
- Comparator<PhylogenyNode> c;
- switch ( pri ) {
- case SEQUENCE:
- c = new PhylogenyNodeSortSequencePriority();
- break;
- case NODE_NAME:
- c = new PhylogenyNodeSortNodeNamePriority();
- break;
- default:
- c = new PhylogenyNodeSortTaxonomyPriority();
- }
- final List<PhylogenyNode> descs = node.getDescendants();
- Collections.sort( descs, c );
- int i = 0;
- for( final PhylogenyNode desc : descs ) {
- node.setChildNode( i++, desc );
- }
- }
-
- final static public void transferNodeNameToField( final Phylogeny phy,
- final PhylogenyMethods.PhylogenyNodeField field,
- final boolean external_only ) throws PhyloXmlDataFormatException {
- final PhylogenyNodeIterator it = phy.iteratorPostorder();
- while ( it.hasNext() ) {
- final PhylogenyNode n = it.next();
- if ( external_only && n.isInternal() ) {
- continue;
- }
- final String name = n.getName().trim();
- if ( !ForesterUtil.isEmpty( name ) ) {
- switch ( field ) {
- case TAXONOMY_CODE:
- n.setName( "" );
- setTaxonomyCode( n, name );
- break;
- case TAXONOMY_SCIENTIFIC_NAME:
- n.setName( "" );
- if ( !n.getNodeData().isHasTaxonomy() ) {
- n.getNodeData().setTaxonomy( new Taxonomy() );
- }
- n.getNodeData().getTaxonomy().setScientificName( name );
- break;
- case TAXONOMY_COMMON_NAME:
- n.setName( "" );
- if ( !n.getNodeData().isHasTaxonomy() ) {
- n.getNodeData().setTaxonomy( new Taxonomy() );
- }
- n.getNodeData().getTaxonomy().setCommonName( name );
- break;
- case SEQUENCE_SYMBOL:
- n.setName( "" );
- if ( !n.getNodeData().isHasSequence() ) {
- n.getNodeData().setSequence( new Sequence() );
- }
- n.getNodeData().getSequence().setSymbol( name );
- break;
- case SEQUENCE_NAME:
- n.setName( "" );
- if ( !n.getNodeData().isHasSequence() ) {
- n.getNodeData().setSequence( new Sequence() );
- }
- n.getNodeData().getSequence().setName( name );
- break;
- case TAXONOMY_ID_UNIPROT_1: {
- if ( !n.getNodeData().isHasTaxonomy() ) {
- n.getNodeData().setTaxonomy( new Taxonomy() );
- }
- String id = name;
- final int i = name.indexOf( '_' );
- if ( i > 0 ) {
- id = name.substring( 0, i );
- }
- else {
- n.setName( "" );
- }
- n.getNodeData().getTaxonomy()
- .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
- break;
- }
- case TAXONOMY_ID_UNIPROT_2: {
- if ( !n.getNodeData().isHasTaxonomy() ) {
- n.getNodeData().setTaxonomy( new Taxonomy() );
- }
- String id = name;
- final int i = name.indexOf( '_' );
- if ( i > 0 ) {
- id = name.substring( i + 1, name.length() );
- }
- else {
- n.setName( "" );
- }
- n.getNodeData().getTaxonomy()
- .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
- break;
- }
- case TAXONOMY_ID: {
- if ( !n.getNodeData().isHasTaxonomy() ) {
- n.getNodeData().setTaxonomy( new Taxonomy() );
- }
- n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
- break;
- }
- }
- }
- }
- }
-
- static double addPhylogenyDistances( final double a, final double b ) {
- if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
- return a + b;
- }
- else if ( a >= 0.0 ) {
- return a;
- }
- else if ( b >= 0.0 ) {
- return b;
- }
- return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
- }
-
- // Helper for getUltraParalogousNodes( PhylogenyNode ).
- public static boolean areAllChildrenDuplications( final PhylogenyNode n ) {
- if ( n.isExternal() ) {
- return false;
- }
- else {
- if ( n.isDuplication() ) {
- //FIXME test me!
- for( final PhylogenyNode desc : n.getDescendants() ) {
- if ( !areAllChildrenDuplications( desc ) ) {
- return false;
- }
- }
- return true;
- }
- else {
- return false;
- }
- }
- }
-
- public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
- if ( node.isExternal() ) {
- return 0;
- }
- short max = 0;
- for( PhylogenyNode d : node.getAllExternalDescendants() ) {
- short steps = 0;
- while ( d != node ) {
- if ( d.isCollapse() ) {
- steps = 0;
- }
- else {
- steps++;
- }
- d = d.getParent();
- }
- if ( max < steps ) {
- max = steps;
- }
- }
- return max;
- }
-
- public static int calculateMaxDepth( final Phylogeny phy ) {
- int max = 0;
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- final int steps = node.calculateDepth();
- if ( steps > max ) {
- max = steps;
- }
- }
- return max;
- }
-
- public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
- double max = 0.0;
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- final double d = node.calculateDistanceToRoot();
- if ( d > max ) {
- max = d;
- }
- }
- return max;
- }
-
- public static int countNumberOfPolytomies( final Phylogeny phy ) {
- int count = 0;
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) {
- count++;
- }
- }
- return count;
- }
-
- public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( !n.isExternal() ) {
- stats.addValue( n.getNumberOfDescendants() );
- }
- }
- return stats;
- }
-
- public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) {
- stats.addValue( n.getDistanceToParent() );
- }
- }
- return stats;
- }
-
- public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {
- final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( !n.isExternal() && !n.isRoot() ) {
- if ( n.getBranchData().isHasConfidences() ) {
- for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) {
- final Confidence c = n.getBranchData().getConfidences().get( i );
- if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) {
- stats.add( i, new BasicDescriptiveStatistics() );
- }
- if ( !ForesterUtil.isEmpty( c.getType() ) ) {
- if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {
- if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {
- throw new IllegalArgumentException( "support values in node [" + n.toString()
- + "] appear inconsistently ordered" );
- }
- }
- stats.get( i ).setDescription( c.getType() );
- }
- stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 );
- }
- }
- }
- }
- return stats;
- }
-
- /**
- * Returns the set of distinct taxonomies of
- * all external nodes of node.
- * If at least one the external nodes has no taxonomy,
- * null is returned.
- *
- */
- public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- return null;
- }
- tax_set.add( n.getNodeData().getTaxonomy() );
- }
- return tax_set;
- }
-
- /**
- * Returns a map of distinct taxonomies of
- * all external nodes of node.
- * If at least one of the external nodes has no taxonomy,
- * null is returned.
- *
- */
- public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- return null;
- }
- final Taxonomy t = n.getNodeData().getTaxonomy();
- if ( tax_map.containsKey( t ) ) {
- tax_map.put( t, tax_map.get( t ) + 1 );
- }
- else {
- tax_map.put( t, 1 );
- }
- }
- return tax_map;
- }
-
- public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- int x = 0;
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- x++;
- }
- }
- return x;
- }
-
- /**
- * Deep copies the phylogeny originating from this node.
- */
- static PhylogenyNode copySubTree( final PhylogenyNode source ) {
- if ( source == null ) {
- return null;
- }
- else {
- final PhylogenyNode newnode = source.copyNodeData();
- if ( !source.isExternal() ) {
- for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
- newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
- }
- }
- return newnode;
- }
- }
-
- /**
- * Shallow copies the phylogeny originating from this node.
- */
- static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
- if ( source == null ) {
- return null;
- }
- else {
- final PhylogenyNode newnode = source.copyNodeDataShallow();
- if ( !source.isExternal() ) {
- for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
- newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
- }
- }
- return newnode;
- }
- }
-
- public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
- phy.clearHashIdToNodeMap();
- for( final Integer id : to_delete ) {
- phy.deleteSubtree( phy.getNode( id ), true );
- }
- phy.clearHashIdToNodeMap();
- phy.externalNodesHaveChanged();
- }
-
- public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
- throws IllegalArgumentException {
- for( final String element : node_names_to_delete ) {
- if ( ForesterUtil.isEmpty( element ) ) {
- continue;
- }
- List<PhylogenyNode> nodes = null;
- nodes = p.getNodes( element );
- final Iterator<PhylogenyNode> it = nodes.iterator();
- while ( it.hasNext() ) {
- final PhylogenyNode n = it.next();
- if ( !n.isExternal() ) {
- throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" );
- }
- p.deleteSubtree( n, true );
- }
- }
- p.clearHashIdToNodeMap();
- p.externalNodesHaveChanged();
- }
-
- public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
- // final Set<Integer> to_delete = new HashSet<Integer>();
- for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
- final PhylogenyNode n = it.next();
- if ( n.getNodeData().isHasTaxonomy() ) {
- if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
- //to_delete.add( n.getNodeId() );
- phy.deleteSubtree( n, true );
- }
- }
- else {
- throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
- }
- }
- phy.clearHashIdToNodeMap();
- phy.externalNodesHaveChanged();