X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2FPhylogenyMethods.java;h=9ca908617e7de2893f0a1246e4913b89981c1840;hb=8cb65713b89737f529cedce7bcd39f2b9f9fc8a1;hp=f53e41a2d16ca7c430f5789f1411ced1815944de;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index f53e41a..9ca9086 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -26,6 +26,8 @@ package org.forester.phylogeny; import java.awt.Color; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -35,12 +37,22 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.phyloxml.PhyloXmlUtil; +import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.DomainArchitecture; +import org.forester.phylogeny.data.Identifier; +import org.forester.phylogeny.data.PhylogenyDataUtil; +import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.BasicDescriptiveStatistics; +import org.forester.util.DescriptiveStatistics; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; @@ -168,6 +180,145 @@ public class PhylogenyMethods { return !obtainLCA( node1, node2 ).isDuplication(); } + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + return trees; + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, "" ) ); + n.setName( "" ); + } + } + } + } + } + + final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + + e.getLocalizedMessage() ); + } + if ( value >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.setName( "" ); + } + } + } + } + + final static public void transferNodeNameToField( final Phylogeny phy, + final PhylogenyMethods.PhylogenyNodeField field ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + switch ( field ) { + case TAXONOMY_CODE: + //temp hack + // if ( name.length() > 5 ) { + // n.setName( "" ); + // if ( !n.getNodeData().isHasTaxonomy() ) { + // n.getNodeData().setTaxonomy( new Taxonomy() ); + // } + // n.getNodeData().getTaxonomy().setScientificName( name ); + // break; + // } + // + n.setName( "" ); + setTaxonomyCode( n, name ); + break; + case TAXONOMY_SCIENTIFIC_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setScientificName( name ); + break; + case TAXONOMY_COMMON_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setCommonName( name ); + break; + case SEQUENCE_SYMBOL: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setSymbol( name ); + break; + case SEQUENCE_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setName( name ); + break; + case TAXONOMY_ID_UNIPROT_1: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( 0, i ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID_UNIPROT_2: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( i + 1, name.length() ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + } + } + } + } + static double addPhylogenyDistances( final double a, final double b ) { if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { return a + b; @@ -178,7 +329,7 @@ public class PhylogenyMethods { else if ( b >= 0.0 ) { return b; } - return PhylogenyNode.DISTANCE_DEFAULT; + return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; } // Helper for getUltraParalogousNodes( PhylogenyNode ). @@ -271,15 +422,28 @@ public class PhylogenyMethods { return max; } - public static int calculateMaximumNumberOfDescendantsPerNode( final Phylogeny phy ) { - int max = 0; + public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.getNumberOfDescendants() > max ) { - max = node.getNumberOfDescendants(); + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() ) { + stats.addValue( n.getNumberOfDescendants() ); } } - return max; + return stats; + } + + public static DescriptiveStatistics calculatConfidenceStatistics( final Phylogeny phy ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode n = iter.next(); + if ( !n.isExternal() ) { + if ( n.getBranchData().isHasConfidences() ) { + stats.addValue( n.getBranchData().getConfidence( 0 ).getValue() ); + } + } + } + return stats; } /** @@ -970,15 +1134,16 @@ public class PhylogenyMethods { } } } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match + && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), query, @@ -986,7 +1151,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -996,6 +1161,22 @@ public class PhylogenyMethods { } } } + if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { + Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial ) ) { + match = true; + break I; + } + } + it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial ) ) { + match = true; + break I; + } + } + } if ( match ) { nodes.add( node ); } @@ -1052,15 +1233,16 @@ public class PhylogenyMethods { } } } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match + && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), query, @@ -1068,7 +1250,7 @@ public class PhylogenyMethods { partial ) ) { match = true; } - else if ( node.getNodeData().isHasSequence() + if ( !match && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { @@ -1078,6 +1260,38 @@ public class PhylogenyMethods { } } } + if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { + Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial ) ) { + match = true; + break I; + } + } + it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); + I: while ( it.hasNext() ) { + if ( match( it.next(), query, case_sensitive, partial ) ) { + match = true; + break I; + } + } + // final String[] bcp_ary = node.getNodeData().getBinaryCharacters() + // .getPresentCharactersAsStringArray(); + // I: for( final String bc : bcp_ary ) { + // if ( match( bc, query, case_sensitive, partial ) ) { + // match = true; + // break I; + // } + // } + // final String[] bcg_ary = node.getNodeData().getBinaryCharacters() + // .getGainedCharactersAsStringArray(); + // I: for( final String bc : bcg_ary ) { + // if ( match( bc, query, case_sensitive, partial ) ) { + // match = true; + // break I; + // } + // } + } if ( !match ) { all_matched = false; break; @@ -1185,4 +1399,19 @@ public class PhylogenyMethods { } return nodes_to_delete.size(); } + + public static enum PhylogenyNodeField { + CLADE_NAME, + TAXONOMY_CODE, + TAXONOMY_SCIENTIFIC_NAME, + TAXONOMY_COMMON_NAME, + SEQUENCE_SYMBOL, + SEQUENCE_NAME, + TAXONOMY_ID_UNIPROT_1, + TAXONOMY_ID_UNIPROT_2; + } + + public static enum TAXONOMY_EXTRACTION { + NO, YES, PFAM_STYLE_ONLY; + } }