in progress
[jalview.git] / forester / java / src / org / forester / phylogeny / PhylogenyMethods.java
index a916d89..806f031 100644 (file)
@@ -38,8 +38,10 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.forester.io.parsers.FastaParser;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
@@ -73,6 +75,38 @@ public class PhylogenyMethods {
         throw new CloneNotSupportedException();
     }
 
+    public static void extractFastaInformation( final Phylogeny phy ) {
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+                if ( name_m.lookingAt() ) {
+                    System.out.println();
+                    // System.out.println( name_m.group( 1 ) );
+                    // System.out.println( name_m.group( 2 ) );
+                    // System.out.println( name_m.group( 3 ) );
+                    // System.out.println( name_m.group( 4 ) );
+                    final String acc_source = name_m.group( 1 );
+                    final String acc = name_m.group( 2 );
+                    final String seq_name = name_m.group( 3 );
+                    final String tax_sn = name_m.group( 4 );
+                    if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+                    }
+                    if ( !ForesterUtil.isEmpty( seq_name ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setName( seq_name );
+                    }
+                    if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+                        ForesterUtil.ensurePresenceOfTaxonomy( node );
+                        node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+                    }
+                }
+            }
+        }
+    }
+
     public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {