X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Farchaeopteryx%2FAptxUtil.java;h=e35596765ffbf79ac368e1f9fd7846f21eded4d1;hb=7ad84913635263ddc191fcbebd8a9bd46a9ac7e3;hp=cb3a2c623accdf9c13fe8202762b157f1544abfe;hpb=0feab495c350ff488c86253826bca4b202c46f4d;p=jalview.git diff --git a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java index cb3a2c6..e355967 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java @@ -50,6 +50,8 @@ import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.imageio.IIOImage; import javax.imageio.ImageIO; @@ -60,7 +62,7 @@ import javax.swing.JApplet; import javax.swing.JOptionPane; import javax.swing.text.MaskFormatter; -import org.forester.analysis.AncestralTaxonomyInference; +import org.forester.analysis.TaxonomyDataManager; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.tol.TolParser; @@ -68,6 +70,7 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Sequence; @@ -76,18 +79,45 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; +import org.forester.util.AsciiHistogram; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; -import org.forester.ws.uniprot.UniProtTaxonomy; +import org.forester.ws.seqdb.UniProtTaxonomy; public final class AptxUtil { + private final static Pattern seq_identifier_pattern_1 = Pattern + .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" ); + private final static Pattern seq_identifier_pattern_2 = Pattern + .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" ); private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment() .getAvailableFontFamilyNames(); static { Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED ); } + public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) { + final String n = sequence_name.trim(); + final Matcher matcher1 = seq_identifier_pattern_1.matcher( n ); + String group1 = ""; + String group2 = ""; + if ( matcher1.matches() ) { + group1 = matcher1.group( 1 ); + group2 = matcher1.group( 2 ); + } + else { + final Matcher matcher2 = seq_identifier_pattern_2.matcher( n ); + if ( matcher2.matches() ) { + group1 = matcher2.group( 1 ); + group2 = matcher2.group( 2 ); + } + } + if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) { + return null; + } + return new Accession( group2, group1 ); + } + public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); @@ -434,7 +464,7 @@ public final class AptxUtil { else { UniProtTaxonomy up = null; try { - up = AncestralTaxonomyInference.obtainUniProtTaxonomy( temp_tax, null, null ); + up = TaxonomyDataManager.obtainUniProtTaxonomy( temp_tax, null, null ); } catch ( final Exception e ) { e.printStackTrace(); @@ -507,7 +537,7 @@ public final class AptxUtil { } } - final static String crateBasicInformation( final Phylogeny phy ) { + final static String createBasicInformation( final Phylogeny phy ) { final StringBuilder desc = new StringBuilder(); if ( ( phy != null ) && !phy.isEmpty() ) { if ( !ForesterUtil.isEmpty( phy.getName() ) ) { @@ -544,42 +574,88 @@ public final class AptxUtil { desc.append( "Maximum distance to root: " ); desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) ); desc.append( "\n" ); - desc.append( "Descendants per node statistics: " ); - final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy ); - desc.append( "\n" ); - desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) ); - desc.append( "\n" ); - desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) ); - desc.append( "\n" ); - desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) ); - desc.append( "\n" ); - desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) ); - desc.append( "\n" ); - desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) ); + final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() ); + if ( taxs != null ) { + desc.append( "Distinct external taxonomies: " ); + desc.append( taxs.size() ); + } desc.append( "\n" ); - final DescriptiveStatistics cs = PhylogenyMethods.calculatConfidenceStatistics( phy ); - if ( cs.getN() > 1 ) { - desc.append( "Support statistics: " ); + final DescriptiveStatistics bs = PhylogenyMethods.calculatBranchLengthStatistics( phy ); + if ( bs.getN() > 2 ) { desc.append( "\n" ); - desc.append( " Branches with support: " + cs.getN() ); + desc.append( "Branch-length statistics: " ); desc.append( "\n" ); - desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) ); + desc.append( " Number of branches with non-negative branch-lengths: " + bs.getN() ); desc.append( "\n" ); - desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) ); + desc.append( " Median: " + ForesterUtil.round( bs.median(), 6 ) ); desc.append( "\n" ); - if ( cs.getN() > 2 ) { - desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) ); - desc.append( "\n" ); - } - desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) ); + desc.append( " Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) ); + desc.append( "\n" ); + desc.append( " SD: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) ); + desc.append( "\n" ); + desc.append( " Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) ); desc.append( "\n" ); - desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) ); + desc.append( " Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) ); desc.append( "\n" ); + desc.append( "\n" ); + final AsciiHistogram histo = new AsciiHistogram( bs ); + desc.append( histo.toStringBuffer( 12, '#', 40, 7, " " ) ); } - final Set taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() ); - if ( taxs != null ) { - desc.append( "Distinct external taxonomies: " ); - desc.append( taxs.size() ); + final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy ); + if ( ds.getN() > 2 ) { + desc.append( "\n" ); + desc.append( "Descendants per node statistics: " ); + desc.append( "\n" ); + desc.append( " Median: " + ForesterUtil.round( ds.median(), 2 ) ); + desc.append( "\n" ); + desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) ); + desc.append( "\n" ); + desc.append( " SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) ); + desc.append( "\n" ); + desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) ); + desc.append( "\n" ); + desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) ); + desc.append( "\n" ); + } + List css = null; + try { + css = PhylogenyMethods.calculatConfidenceStatistics( phy ); + } + catch ( final IllegalArgumentException e ) { + ForesterUtil.printWarningMessage( Constants.PRG_NAME, e.getMessage() ); + } + if ( ( css != null ) && ( css.size() > 0 ) ) { + desc.append( "\n" ); + for( int i = 0; i < css.size(); ++i ) { + final DescriptiveStatistics cs = css.get( i ); + if ( ( cs != null ) && ( cs.getN() > 1 ) ) { + if ( css.size() > 1 ) { + desc.append( "Support statistics " + ( i + 1 ) + ": " ); + } + else { + desc.append( "Support statistics: " ); + } + if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) { + desc.append( "\n" ); + desc.append( " Type: " + cs.getDescription() ); + } + desc.append( "\n" ); + desc.append( " Branches with support: " + cs.getN() ); + desc.append( "\n" ); + desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) ); + desc.append( "\n" ); + desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) ); + desc.append( "\n" ); + if ( cs.getN() > 2 ) { + desc.append( " SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) ); + desc.append( "\n" ); + } + desc.append( " Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) ); + desc.append( "\n" ); + desc.append( " Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) ); + desc.append( "\n" ); + } + } } } return desc.toString();