searhc of domains only when domains are shown!
[jalview.git] / forester / java / src / org / forester / archaeopteryx / AptxUtil.java
index cb3a2c6..e355967 100644 (file)
@@ -50,6 +50,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import javax.imageio.IIOImage;
 import javax.imageio.ImageIO;
@@ -60,7 +62,7 @@ import javax.swing.JApplet;
 import javax.swing.JOptionPane;
 import javax.swing.text.MaskFormatter;
 
-import org.forester.analysis.AncestralTaxonomyInference;
+import org.forester.analysis.TaxonomyDataManager;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
 import org.forester.io.parsers.tol.TolParser;
@@ -68,6 +70,7 @@ import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.BranchColor;
 import org.forester.phylogeny.data.Distribution;
 import org.forester.phylogeny.data.Sequence;
@@ -76,18 +79,45 @@ import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.phylogeny.iterators.PreorderTreeIterator;
+import org.forester.util.AsciiHistogram;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterUtil;
-import org.forester.ws.uniprot.UniProtTaxonomy;
+import org.forester.ws.seqdb.UniProtTaxonomy;
 
 public final class AptxUtil {
 
+    private final static Pattern  seq_identifier_pattern_1       = Pattern
+                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
+    private final static Pattern  seq_identifier_pattern_2       = Pattern
+                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" );
     private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment()
                                                                          .getAvailableFontFamilyNames();
     static {
         Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
     }
 
+    public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
+        final String n = sequence_name.trim();
+        final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
+        String group1 = "";
+        String group2 = "";
+        if ( matcher1.matches() ) {
+            group1 = matcher1.group( 1 );
+            group2 = matcher1.group( 2 );
+        }
+        else {
+            final Matcher matcher2 = seq_identifier_pattern_2.matcher( n );
+            if ( matcher2.matches() ) {
+                group1 = matcher2.group( 1 );
+                group2 = matcher2.group( 2 );
+            }
+        }
+        if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) {
+            return null;
+        }
+        return new Accession( group2, group1 );
+    }
+
     public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
         if ( !node.getNodeData().isHasTaxonomy() ) {
             node.getNodeData().setTaxonomy( new Taxonomy() );
@@ -434,7 +464,7 @@ public final class AptxUtil {
                         else {
                             UniProtTaxonomy up = null;
                             try {
-                                up = AncestralTaxonomyInference.obtainUniProtTaxonomy( temp_tax, null, null );
+                                up = TaxonomyDataManager.obtainUniProtTaxonomy( temp_tax, null, null );
                             }
                             catch ( final Exception e ) {
                                 e.printStackTrace();
@@ -507,7 +537,7 @@ public final class AptxUtil {
         }
     }
 
-    final static String crateBasicInformation( final Phylogeny phy ) {
+    final static String createBasicInformation( final Phylogeny phy ) {
         final StringBuilder desc = new StringBuilder();
         if ( ( phy != null ) && !phy.isEmpty() ) {
             if ( !ForesterUtil.isEmpty( phy.getName() ) ) {
@@ -544,42 +574,88 @@ public final class AptxUtil {
             desc.append( "Maximum distance to root: " );
             desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) );
             desc.append( "\n" );
-            desc.append( "Descendants per node statistics: " );
-            final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
-            desc.append( "\n" );
-            desc.append( "    Median: " + ForesterUtil.round( ds.median(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
-            desc.append( "\n" );
-            desc.append( "    Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
-            desc.append( "\n" );
-            desc.append( "    Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+            final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
+            if ( taxs != null ) {
+                desc.append( "Distinct external taxonomies: " );
+                desc.append( taxs.size() );
+            }
             desc.append( "\n" );
-            final DescriptiveStatistics cs = PhylogenyMethods.calculatConfidenceStatistics( phy );
-            if ( cs.getN() > 1 ) {
-                desc.append( "Support statistics: " );
+            final DescriptiveStatistics bs = PhylogenyMethods.calculatBranchLengthStatistics( phy );
+            if ( bs.getN() > 2 ) {
                 desc.append( "\n" );
-                desc.append( "    Branches with support: " + cs.getN() );
+                desc.append( "Branch-length statistics: " );
                 desc.append( "\n" );
-                desc.append( "    Median: " + ForesterUtil.round( cs.median(), 6 ) );
+                desc.append( "    Number of branches with non-negative branch-lengths: " + bs.getN() );
                 desc.append( "\n" );
-                desc.append( "    Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+                desc.append( "    Median: " + ForesterUtil.round( bs.median(), 6 ) );
                 desc.append( "\n" );
-                if ( cs.getN() > 2 ) {
-                    desc.append( "    SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
-                    desc.append( "\n" );
-                }
-                desc.append( "    Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+                desc.append( "    Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) );
+                desc.append( "\n" );
+                desc.append( "    SD: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) );
+                desc.append( "\n" );
+                desc.append( "    Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) );
                 desc.append( "\n" );
-                desc.append( "    Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+                desc.append( "    Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) );
                 desc.append( "\n" );
+                desc.append( "\n" );
+                final AsciiHistogram histo = new AsciiHistogram( bs );
+                desc.append( histo.toStringBuffer( 12, '#', 40, 7, "    " ) );
             }
-            final Set<Taxonomy> taxs = PhylogenyMethods.obtainDistinctTaxonomies( phy.getRoot() );
-            if ( taxs != null ) {
-                desc.append( "Distinct external taxonomies: " );
-                desc.append( taxs.size() );
+            final DescriptiveStatistics ds = PhylogenyMethods.calculatNumberOfDescendantsPerNodeStatistics( phy );
+            if ( ds.getN() > 2 ) {
+                desc.append( "\n" );
+                desc.append( "Descendants per node statistics: " );
+                desc.append( "\n" );
+                desc.append( "    Median: " + ForesterUtil.round( ds.median(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    Mean: " + ForesterUtil.round( ds.arithmeticMean(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    SD: " + ForesterUtil.round( ds.sampleStandardDeviation(), 2 ) );
+                desc.append( "\n" );
+                desc.append( "    Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) );
+                desc.append( "\n" );
+                desc.append( "    Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) );
+                desc.append( "\n" );
+            }
+            List<DescriptiveStatistics> css = null;
+            try {
+                css = PhylogenyMethods.calculatConfidenceStatistics( phy );
+            }
+            catch ( final IllegalArgumentException e ) {
+                ForesterUtil.printWarningMessage( Constants.PRG_NAME, e.getMessage() );
+            }
+            if ( ( css != null ) && ( css.size() > 0 ) ) {
+                desc.append( "\n" );
+                for( int i = 0; i < css.size(); ++i ) {
+                    final DescriptiveStatistics cs = css.get( i );
+                    if ( ( cs != null ) && ( cs.getN() > 1 ) ) {
+                        if ( css.size() > 1 ) {
+                            desc.append( "Support statistics " + ( i + 1 ) + ": " );
+                        }
+                        else {
+                            desc.append( "Support statistics: " );
+                        }
+                        if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) {
+                            desc.append( "\n" );
+                            desc.append( "    Type: " + cs.getDescription() );
+                        }
+                        desc.append( "\n" );
+                        desc.append( "    Branches with support: " + cs.getN() );
+                        desc.append( "\n" );
+                        desc.append( "    Median: " + ForesterUtil.round( cs.median(), 6 ) );
+                        desc.append( "\n" );
+                        desc.append( "    Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) );
+                        desc.append( "\n" );
+                        if ( cs.getN() > 2 ) {
+                            desc.append( "    SD: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) );
+                            desc.append( "\n" );
+                        }
+                        desc.append( "    Minimum: " + ForesterUtil.roundToInt( cs.getMin() ) );
+                        desc.append( "\n" );
+                        desc.append( "    Maximum: " + ForesterUtil.roundToInt( cs.getMax() ) );
+                        desc.append( "\n" );
+                    }
+                }
             }
         }
         return desc.toString();