inprogress
[jalview.git] / forester / java / src / org / forester / phylogeny / PhylogenyMethods.java
index a916d89..01c84c3 100644 (file)
@@ -38,8 +38,10 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.forester.io.parsers.FastaParser;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
@@ -73,6 +75,36 @@ public class PhylogenyMethods {
         throw new CloneNotSupportedException();
     }
 
+    public static boolean extractFastaInformation( final Phylogeny phy ) {
+        boolean could_extract = false;
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+                if ( name_m.lookingAt() ) {
+                    could_extract = true;
+                    final String acc_source = name_m.group( 1 );
+                    final String acc = name_m.group( 2 );
+                    final String seq_name = name_m.group( 3 );
+                    final String tax_sn = name_m.group( 4 );
+                    if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+                    }
+                    if ( !ForesterUtil.isEmpty( seq_name ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setName( seq_name );
+                    }
+                    if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+                        ForesterUtil.ensurePresenceOfTaxonomy( node );
+                        node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+                    }
+                }
+            }
+        }
+        return could_extract;
+    }
+
     public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
@@ -1482,26 +1514,51 @@ public class PhylogenyMethods {
         }
     }
 
-    final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
+    final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
         final PhylogenyNodeIterator it = phy.iteratorPostorder();
         while ( it.hasNext() ) {
             final PhylogenyNode n = it.next();
-            if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+            if ( !n.isExternal() && !n.isRoot() ) {
                 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
-                    double d = -1.0;
+                    double value = -1;
                     try {
-                        d = Double.parseDouble( n.getName() );
+                        value = Double.parseDouble( n.getName() );
                     }
-                    catch ( final Exception e ) {
-                        d = -1.0;
+                    catch ( final NumberFormatException e ) {
+                        return false;
                     }
-                    if ( d >= 0.0 ) {
-                        n.getBranchData().addConfidence( new Confidence( d, "" ) );
-                        n.setName( "" );
+                    if ( ( value < 0.0 ) || ( value > 100 ) ) {
+                        return false;
                     }
                 }
             }
         }
+        return true;
+    }
+
+    final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            transferInternalNodeNameToConfidence( confidence_type, it.next() );
+        }
+    }
+
+    private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+        if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+            if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                double d = -1.0;
+                try {
+                    d = Double.parseDouble( n.getName() );
+                }
+                catch ( final Exception e ) {
+                    d = -1.0;
+                }
+                if ( d >= 0.0 ) {
+                    n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+                    n.setName( "" );
+                }
+            }
+        }
     }
 
     final static public void transferNodeNameToField( final Phylogeny phy,