+ else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
+ && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo
+ .contains( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
+ nodes_to_delete.add( n );
+ }
+ }
+ for( final PhylogenyNode n : nodes_to_delete ) {
+ to_be_stripped.deleteSubtree( n, true );
+ }
+ to_be_stripped.clearHashIdToNodeMap();
+ to_be_stripped.externalNodesHaveChanged();
+ return nodes_to_delete;
+ }
+
+ final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
+ double value = -1;
+ try {
+ value = Double.parseDouble( n.getName() );
+ }
+ catch ( final NumberFormatException e ) {
+ throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
+ + e.getLocalizedMessage() );
+ }
+ if ( value >= 0.0 ) {
+ n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
+ n.setName( "" );
+ }
+ }
+ }
+ }
+
+ final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ if ( !n.isExternal() && !n.isRoot() ) {
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ double value = -1;
+ try {
+ value = Double.parseDouble( n.getName() );
+ }
+ catch ( final NumberFormatException e ) {
+ return false;
+ }
+ if ( ( value < 0.0 ) || ( value > 100 ) ) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy,
+ final String confidence_type ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ transferInternalNodeNameToConfidence( confidence_type, it.next() );
+ }
+ }
+
+ private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+ if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ double d = -1.0;
+ try {
+ d = Double.parseDouble( n.getName() );
+ }
+ catch ( final Exception e ) {
+ d = -1.0;
+ }
+ if ( d >= 0.0 ) {
+ n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+ n.setName( "" );
+ }
+ }
+ }
+ }
+
+ final static public void transferNodeNameToField( final Phylogeny phy,
+ final PhylogenyNodeField field,
+ final boolean external_only )
+ throws PhyloXmlDataFormatException {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ if ( external_only && n.isInternal() ) {
+ continue;
+ }
+ final String name = n.getName().trim();
+ if ( !ForesterUtil.isEmpty( name ) ) {
+ switch ( field ) {
+ case TAXONOMY_CODE:
+ n.setName( "" );
+ setTaxonomyCode( n, name );
+ break;
+ case TAXONOMY_SCIENTIFIC_NAME:
+ n.setName( "" );
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ n.getNodeData().getTaxonomy().setScientificName( name );
+ break;
+ case TAXONOMY_COMMON_NAME:
+ n.setName( "" );
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ n.getNodeData().getTaxonomy().setCommonName( name );
+ break;
+ case SEQUENCE_SYMBOL:
+ n.setName( "" );
+ if ( !n.getNodeData().isHasSequence() ) {
+ n.getNodeData().setSequence( new Sequence() );
+ }
+ n.getNodeData().getSequence().setSymbol( name );
+ break;
+ case SEQUENCE_NAME:
+ n.setName( "" );
+ if ( !n.getNodeData().isHasSequence() ) {
+ n.getNodeData().setSequence( new Sequence() );
+ }
+ n.getNodeData().getSequence().setName( name );
+ break;
+ case TAXONOMY_ID_UNIPROT_1: {
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ String id = name;
+ final int i = name.indexOf( '_' );
+ if ( i > 0 ) {
+ id = name.substring( 0, i );
+ }
+ else {
+ n.setName( "" );
+ }
+ n.getNodeData().getTaxonomy()
+ .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
+ break;
+ }
+ case TAXONOMY_ID_UNIPROT_2: {
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ String id = name;
+ final int i = name.indexOf( '_' );
+ if ( i > 0 ) {
+ id = name.substring( i + 1, name.length() );
+ }
+ else {
+ n.setName( "" );
+ }
+ n.getNodeData().getTaxonomy()
+ .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );
+ break;
+ }
+ case TAXONOMY_ID: {
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
+ break;
+ }
+ case CLADE_NAME:
+ n.setName( name );
+ break;
+ default: {
+ throw new IllegalArgumentException( "don't know what to do with " + field );
+ }
+ }
+ }
+ }
+ }
+
+ static double addPhylogenyDistances( final double a, final double b ) {
+ if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
+ return a + b;
+ }
+ else if ( a >= 0.0 ) {
+ return a;
+ }
+ else if ( b >= 0.0 ) {
+ return b;
+ }
+ return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ }
+
+ static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
+ double d = 0;
+ boolean all_default = true;
+ while ( anc != desc ) {
+ if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
+ d += desc.getDistanceToParent();
+ if ( all_default ) {
+ all_default = false;
+ }
+ }
+ desc = desc.getParent();
+ }
+ if ( all_default ) {
+ return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ }
+ return d;
+ }
+
+ public static double calculateAverageTreeHeight( final PhylogenyNode node ) {
+ final List<PhylogenyNode> ext = node.getAllExternalDescendants();
+ double s = 0;
+ for( PhylogenyNode n : ext ) {
+ while ( n != node ) {
+ if ( n.getDistanceToParent() > 0 ) {
+ s += n.getDistanceToParent();
+ }
+ n = n.getParent();
+ }
+ }
+ return s / ext.size();
+ }
+
+ /**
+ * Deep copies the phylogeny originating from this node.
+ */
+ static PhylogenyNode copySubTree( final PhylogenyNode source ) {
+ if ( source == null ) {
+ return null;
+ }
+ else {
+ final PhylogenyNode newnode = source.copyNodeData();
+ if ( !source.isExternal() ) {
+ for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
+ newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
+ }
+ }
+ return newnode;
+ }
+ }
+
+ /**
+ * Shallow copies the phylogeny originating from this node.
+ */
+ static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
+ if ( source == null ) {
+ return null;
+ }
+ else {
+ final PhylogenyNode newnode = source.copyNodeDataShallow();
+ if ( !source.isExternal() ) {
+ for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
+ newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
+ }
+ }
+ return newnode;
+ }
+ }
+
+ private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
+ final double min_distance_to_root ) {
+ final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
+ final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
+ for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
+ if ( ext.getIndicator() != 0 ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ ext.setIndicator( ( byte ) 1 );
+ l.add( ext );
+ }
+ return l;
+ }
+
+ /**
+ * Calculates the distance between PhylogenyNodes n1 and n2.
+ * PRECONDITION: n1 is a descendant of n2.
+ *
+ * @param n1
+ * a descendant of n2
+ * @param n2
+ * @return distance between n1 and n2
+ */
+ private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
+ double d = 0.0;
+ while ( n1 != n2 ) {
+ if ( n1.getDistanceToParent() > 0.0 ) {
+ d += n1.getDistanceToParent();
+ }
+ n1 = n1.getParent();
+ }
+ return d;
+ }
+
+ private static boolean match( final String s,
+ final String query,
+ final boolean case_sensitive,
+ final boolean partial,
+ final boolean regex ) {
+ if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
+ return false;
+ }
+ String my_s = s.trim();
+ String my_query = query.trim();
+ if ( !case_sensitive && !regex ) {
+ my_s = my_s.toLowerCase();
+ my_query = my_query.toLowerCase();
+ }
+ if ( regex ) {
+ Pattern p = null;
+ try {
+ if ( case_sensitive ) {
+ p = Pattern.compile( my_query );
+ }
+ else {
+ p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE );
+ }
+ }
+ catch ( final PatternSyntaxException e ) {
+ return false;
+ }
+ if ( p != null ) {
+ return p.matcher( my_s ).find();
+ }
+ else {
+ return false;
+ }
+ }
+ else if ( partial ) {
+ return my_s.indexOf( my_query ) >= 0;
+ }
+ else {
+ Pattern p = null;
+ try {
+ p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" );
+ }
+ catch ( final PatternSyntaxException e ) {
+ return false;
+ }
+ if ( p != null ) {
+ return p.matcher( my_s ).find();
+ }
+ else {
+ return false;
+ }
+ }
+ }
+
+ private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
+ PhylogenyNode n = node;
+ PhylogenyNode prev = node;
+ while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
+ prev = n;
+ n = n.getParent();
+ }
+ return prev;
+ }
+
+ public static enum DESCENDANT_SORT_PRIORITY {
+ NODE_NAME,
+ SEQUENCE,
+ TAXONOMY;
+ }
+
+ public static enum PhylogenyNodeField {
+ CLADE_NAME,
+ SEQUENCE_NAME,
+ SEQUENCE_SYMBOL,
+ TAXONOMY_CODE,
+ TAXONOMY_COMMON_NAME,
+ TAXONOMY_ID,
+ TAXONOMY_ID_UNIPROT_1,
+ TAXONOMY_ID_UNIPROT_2,
+ TAXONOMY_SCIENTIFIC_NAME;
+ }
+
+ public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {
+ for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {
+ final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );
+ final PhylogenyNode node = phy.getNode( seq.getIdentifier() );
+ final org.forester.phylogeny.data.Sequence new_seq = new Sequence();
+ new_seq.setMolecularSequenceAligned( true );
+ new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() );
+ new_seq.setName( seq.getIdentifier() );
+ try {
+ new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN );
+ }
+ catch ( final PhyloXmlDataFormatException ignore ) {
+ // do nothing
+ }
+ node.getNodeData().addSequence( new_seq );
+ }
+ }
+
+ final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ return 0;
+ }
+ }
+
+ final private static class PhylogenyNodeSortSequencePriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ return 0;
+ }
+ }
+
+ final private static class PhylogenyNodeSortNodeNamePriority implements Comparator<PhylogenyNode> {
+
+ @Override
+ public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) {
+ return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() );
+ }
+ if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase()
+ .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ return n1.getNodeData().getTaxonomy().getTaxonomyCode()
+ .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() );
+ }
+ }
+ if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) {
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) {
+ return n1.getNodeData().getSequence().getName().toLowerCase()
+ .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+ return n1.getNodeData().getSequence().getGeneName()
+ .compareTo( n2.getNodeData().getSequence().getGeneName() );
+ }
+ if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) )
+ && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) {
+ return n1.getNodeData().getSequence().getSymbol()
+ .compareTo( n2.getNodeData().getSequence().getSymbol() );
+ }
+ }
+ return 0;