inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 Apr 2014 02:31:55 +0000 (02:31 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 Apr 2014 02:31:55 +0000 (02:31 +0000)
forester/java/src/org/forester/archaeopteryx/AptxUtil.java
forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java
forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java
forester/java/src/org/forester/archaeopteryx/webservices/WebservicesManager.java
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/phylogeny/data/Accession.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/util/SequenceAccessionTools.java

index 73c2c4f..0e217c4 100644 (file)
@@ -48,8 +48,6 @@ import java.util.Locale;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import javax.imageio.IIOImage;
 import javax.imageio.ImageIO;
@@ -71,7 +69,6 @@ import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
 import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
@@ -84,10 +81,6 @@ public final class AptxUtil {
 
     private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment()
                                                                          .getAvailableFontFamilyNames();
-    private final static Pattern  seq_identifier_pattern_1       = Pattern
-                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
-    private final static Pattern  seq_identifier_pattern_2       = Pattern
-                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" );
     static {
         Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
     }
@@ -191,28 +184,6 @@ public final class AptxUtil {
         return tax_set;
     }
 
-    public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
-        final String n = sequence_name.trim();
-        final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
-        String group1 = "";
-        String group2 = "";
-        if ( matcher1.matches() ) {
-            group1 = matcher1.group( 1 );
-            group2 = matcher1.group( 2 );
-        }
-        else {
-            final Matcher matcher2 = seq_identifier_pattern_2.matcher( n );
-            if ( matcher2.matches() ) {
-                group1 = matcher2.group( 1 );
-                group2 = matcher2.group( 2 );
-            }
-        }
-        if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) {
-            return null;
-        }
-        return new Accession( group2, group1 );
-    }
-
     public final static void printWarningMessage( final String name, final String message ) {
         System.out.println( "[" + name + "] > " + message );
     }
index dad02df..caf9ad8 100644 (file)
@@ -107,7 +107,12 @@ public class UrlTreeReader implements Runnable {
                         parser = new NexusPhylogeniesParser();
                         ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
                         break;
-                    case TREEBASE:
+                    case TREEBASE_TREE:
+                        parser = new NexusPhylogeniesParser();
+                        ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
+                        ( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
+                        break;
+                    case TREEBASE_STUDY:
                         parser = new NexusPhylogeniesParser();
                         ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
                         ( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
index 18bc84f..5efee81 100644 (file)
@@ -35,21 +35,25 @@ import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.phylogeny.iterators.PreorderTreeIterator;
 import org.forester.util.ForesterUtil;
+import org.forester.util.SequenceAccessionTools;
 
 public final class WebserviceUtil {
 
-    public static final String TREE_FAM_INST  = "tree_fam";
     public static final String PFAM_INST      = "pfam";
+    public static final String PFAM_NAME      = "Pfam";
+    public static final String PFAM_SERVER    = "http://pfam.janelia.org";
+    public static final String TOL_NAME       = "Tree of Life";
     public static final String TOL_WEBSERVER  = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="
                                                       + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
-    public static final String TOL_NAME       = "Tree of Life";
+    public static final String TREE_BASE_DESC = "This data set was downloaded from TreeBASE, a relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, Yale University, SDSC and UC Davis. Please do not remove this acknowledgment.";
+    public static final String TREE_BASE_INST = "treebase";
     public static final String TREE_BASE_NAME = "TreeBASE";
+    public static final String TREE_FAM_INST  = "tree_fam";
     public static final String TREE_FAM_NAME  = "TreeFam";
-    public static final String PFAM_NAME      = "Pfam";
-    public static final String PFAM_SERVER    = "http://pfam.janelia.org";
 
     public static List<PhylogeniesWebserviceClient> createDefaultClients() {
         final List<PhylogeniesWebserviceClient> clients = new ArrayList<PhylogeniesWebserviceClient>();
@@ -65,17 +69,29 @@ public final class WebserviceUtil {
                                                            "http://tolweb.org",
                                                            null ) );
         clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
+                                                           "Read Tree(s) from TreeBASE Study...",
+                                                           "Use TreeBASE to obtain evolutionary tree(s) from a study",
+                                                           "Please enter a TreeBASE study (\"S\") identifier (without the \"S\")\n(Examples: 15613, 15632, 14525, 14909)",
+                                                           WsPhylogenyFormat.TREEBASE_STUDY,
+                                                           null,
+                                                           "http://purl.org/phylo/treebase/phylows/study/TB2:S"
+                                                                   + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
+                                                                   + "?format=nexus",
+                                                           true,
+                                                           "http://www.treebase.org",
+                                                           TREE_BASE_INST ) );
+        clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
                                                            "Read Tree from TreeBASE...",
                                                            "Use TreeBASE to obtain a evolutionary tree",
-                                                           "Please enter a TreeBASE tree identifier\n(Examples: 2654, 825, 4931, 2518, 2406, 4934)",
-                                                           WsPhylogenyFormat.TREEBASE,
+                                                           "Please enter a TreeBASE tree (\"Tr\") identifier (without the \"Tr\")\n(Examples: 422, 2654, 825, 4931, 2518, 2406, 4934)",
+                                                           WsPhylogenyFormat.TREEBASE_TREE,
                                                            null,
                                                            "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"
                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
                                                                    + "?format=nexus",
                                                            true,
                                                            "http://www.treebase.org",
-                                                           null ) );
+                                                           TREE_BASE_INST ) );
         clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME,
                                                            "Read Gene Tree from Pfam...",
                                                            "Use  Pfam to obtain gene trees for seed alignments",
@@ -103,6 +119,23 @@ public final class WebserviceUtil {
         return clients;
     }
 
+    public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
+            throws PhyloXmlDataFormatException {
+        if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
+            WebserviceUtil.processTreeFamTrees( phylogeny );
+        }
+        else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
+            WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
+            PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" );
+        }
+        else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_BASE_INST ) ) {
+            if ( PhylogenyMethods.isInternalNamesLookLikeConfidences( phylogeny ) ) {
+                PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "" );
+            }
+            WebserviceUtil.processTreeBaseTrees( phylogeny );
+        }
+    }
+
     static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) {
         final PreorderTreeIterator it = new PreorderTreeIterator( phy );
         while ( it.hasNext() ) {
@@ -123,15 +156,24 @@ public final class WebserviceUtil {
         }
     }
 
-    public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
-            throws PhyloXmlDataFormatException {
-        if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
-            
-            WebserviceUtil.processTreeFamTrees( phylogeny );
+    static void processTreeBaseTrees( final Phylogeny phy ) {
+        phy.setDescription( TREE_BASE_DESC );
+        final PhylogenyNodeIterator it = phy.iteratorExternalForward();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+                if ( acc != null ) {
+                    if ( !n.getNodeData().isHasSequence() ) {
+                        n.getNodeData().addSequence( new Sequence() );
+                    }
+                    final Sequence s = n.getNodeData().getSequence();
+                    if ( s.getAccession() == null ) {
+                        s.setAccession( acc );
+                    }
+                }
+            }
         }
-        else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
-            WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
-            PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" );        }
     }
 
     static void processTreeFamTrees( final Phylogeny phy ) {
@@ -140,15 +182,41 @@ public final class WebserviceUtil {
             final PhylogenyNode n = it.next();
             if ( n.isExternal() ) {
                 n.getNodeData().setEvent( null );
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+                    if ( acc != null ) {
+                        if ( !n.getNodeData().isHasSequence() ) {
+                            n.getNodeData().addSequence( new Sequence() );
+                        }
+                        final Sequence s = n.getNodeData().getSequence();
+                        if ( s.getAccession() == null ) {
+                            s.setAccession( acc );
+                        }
+                    }
+                }
+            }
+            else {
+                if ( ( n.getBranchData() != null ) && n.getBranchData().isHasConfidences()
+                        && ( n.getBranchData().getConfidence( 0 ) != null ) ) {
+                    n.getBranchData().getConfidence( 0 ).setType( "bootstrap" );
+                }
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    if ( !n.getNodeData().isHasTaxonomy() ) {
+                        n.getNodeData().addTaxonomy( new Taxonomy() );
+                    }
+                    final Taxonomy t = n.getNodeData().getTaxonomy();
+                    if ( ForesterUtil.isEmpty( t.getScientificName() ) ) {
+                        t.setScientificName( n.getName() );
+                        n.setName( "" );
+                    }
+                }
             }
-            
             if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
                 n.getNodeData()
                         .getTaxonomy()
-                        .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(), "ncbi" ) );
+                        .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(),
+                                                        "ncbi" ) );
             }
         }
     }
-
-    
 }
index e60528d..42ad0b1 100644 (file)
@@ -58,6 +58,6 @@ public final class WebservicesManager {
     }
 
     public enum WsPhylogenyFormat {
-        NH, NHX, NEXUS, TOL_XML_RESPONSE, PHYLOXML, NH_EXTRACT_TAXONOMY, PFAM, TREEBASE
+        NEXUS, NH, NH_EXTRACT_TAXONOMY, NHX, PFAM, PHYLOXML, TOL_XML_RESPONSE, TREEBASE_STUDY, TREEBASE_TREE
     }
 }
index 46f6548..4a25f4d 100644 (file)
@@ -51,32 +51,40 @@ import org.forester.util.ForesterUtil;
 public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
 
     final private static String  begin_trees               = NexusConstants.BEGIN_TREES.toLowerCase();
+    final private static String  end                       = NexusConstants.END.toLowerCase();
+    final private static String  endblock                  = "endblock";
+    final private static Pattern ROOTEDNESS_PATTERN        = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
     final private static String  taxlabels                 = NexusConstants.TAXLABELS.toLowerCase();
+    final private static Pattern TITLE_PATTERN             = Pattern.compile( "TITLE.?\\s+([^;]+)",
+                                                                              Pattern.CASE_INSENSITIVE );
     final private static String  translate                 = NexusConstants.TRANSLATE.toLowerCase();
     final private static String  tree                      = NexusConstants.TREE.toLowerCase();
-    final private static String  utree                     = NexusConstants.UTREE.toLowerCase();
-    final private static String  end                       = NexusConstants.END.toLowerCase();
-    final private static String  endblock                  = "endblock";
     final private static Pattern TREE_NAME_PATTERN         = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
                                                                               Pattern.CASE_INSENSITIVE );
-    final private static Pattern ROOTEDNESS_PATTERN        = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
-    private Object               _nexus_source;
-    private List<String>         _taxlabels;
-    private Map<String, String>  _translate_map;
-    private boolean              _replace_underscores      = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
-    private boolean              _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
-    private TAXONOMY_EXTRACTION  _taxonomy_extraction      = TAXONOMY_EXTRACTION.NO;
-    private Phylogeny            _next;
+    final private static String  utree                     = NexusConstants.UTREE.toLowerCase();
     private BufferedReader       _br;
-    private boolean              _in_trees_block;
-    private StringBuilder        _nh;
-    private String               _name;
-    private StringBuilder        _translate_sb;
+    private boolean              _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
     private boolean              _in_taxalabels;
     private boolean              _in_translate;
+    private boolean              _in_tree;
+    private boolean              _in_trees_block;
     private boolean              _is_rooted;
+    private String               _name;
+    private Phylogeny            _next;
+    private Object               _nexus_source;
+    private StringBuilder        _nh;
+    private boolean              _replace_underscores      = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
     private boolean              _rooted_info_present;
-    private boolean              _in_tree;
+    private List<String>         _taxlabels;
+    private TAXONOMY_EXTRACTION  _taxonomy_extraction      = TAXONOMY_EXTRACTION.NO;
+    private String               _title;
+    private Map<String, String>  _translate_map;
+    private StringBuilder        _translate_sb;
+
+    @Override
+    public String getName() {
+        return "Nexus Phylogenies Parser";
+    }
 
     @Override
     public final boolean hasNext() {
@@ -110,7 +118,8 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         _translate_map = new HashMap<String, String>();
         _nh = new StringBuilder();
         _name = "";
-        _translate_sb = new StringBuilder();
+        _title = "";
+        _translate_sb = null;
         _next = null;
         _in_trees_block = false;
         _in_taxalabels = false;
@@ -143,7 +152,8 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         _taxonomy_extraction = taxonomy_extraction;
     }
 
-    private final void createPhylogeny( final String name,
+    private final void createPhylogeny( final String title,
+                                        final String name,
                                         final StringBuilder nhx,
                                         final boolean rooted_info_present,
                                         final boolean is_rooted ) throws IOException {
@@ -160,7 +170,19 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         if ( p == null ) {
             throw new PhylogenyParserException( "failed to create phylogeny" );
         }
-        p.setName( name );
+        String myname = null;
+        if ( !ForesterUtil.isEmpty( title ) && !ForesterUtil.isEmpty( name ) ) {
+            myname = title.replace( '_', ' ' ).trim() + " (" + name.trim() + ")";
+        }
+        else if ( !ForesterUtil.isEmpty( title ) ) {
+            myname = title.replace( '_', ' ' ).trim();
+        }
+        else if ( !ForesterUtil.isEmpty( name ) ) {
+            myname = name.trim();
+        }
+        if ( !ForesterUtil.isEmpty( myname ) ) {
+            p.setName( myname );
+        }
         if ( rooted_info_present ) {
             p.setRooted( is_rooted );
         }
@@ -186,6 +208,11 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                 if ( !_replace_underscores && ( ( _taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
                     ParserUtils.extractTaxonomyDataFromNodeName( node, _taxonomy_extraction );
                 }
+                else if ( _replace_underscores ) {
+                    if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                        node.setName( node.getName().replace( '_', ' ' ).trim() );
+                    }
+                }
             }
         }
         _next = p;
@@ -204,6 +231,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                     _in_trees_block = true;
                     _in_taxalabels = false;
                     _in_translate = false;
+                    _title = "";
                 }
                 else if ( line_lc.startsWith( taxlabels ) ) {
                     _in_trees_block = false;
@@ -211,20 +239,25 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                     _in_translate = false;
                 }
                 else if ( line_lc.startsWith( translate ) ) {
+                    _translate_sb = new StringBuilder();
                     _in_taxalabels = false;
                     _in_translate = true;
                 }
                 else if ( _in_trees_block ) {
-                    //FIXME TODO need to work on this "title" and "link"
-                    if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) {
-                        // Do nothing.
+                    if ( line_lc.startsWith( "title" ) ) {
+                        final Matcher title_m = TITLE_PATTERN.matcher( line );
+                        if ( title_m.lookingAt() ) {
+                            _title = title_m.group( 1 );
+                        }
+                    }
+                    else if ( line_lc.startsWith( "link" ) ) {
                     }
                     else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
                         _in_trees_block = false;
                         _in_tree = false;
                         _in_translate = false;
                         if ( _nh.length() > 0 ) {
-                            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                            createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
                             _nh = new StringBuilder();
                             _name = "";
                             _rooted_info_present = false;
@@ -238,7 +271,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                         boolean might = false;
                         if ( _nh.length() > 0 ) {
                             might = true;
-                            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                            createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
                             _nh = new StringBuilder();
                             _name = "";
                             _rooted_info_present = false;
@@ -271,7 +304,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                             && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) {
                         _in_tree = false;
                         _in_translate = false;
-                        createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                        createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
                         _nh = new StringBuilder();
                         _name = "";
                         _rooted_info_present = false;
@@ -316,7 +349,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
             }
         }
         if ( _nh.length() > 0 ) {
-            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+            createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
             if ( _next != null ) {
                 return;
             }
@@ -331,10 +364,10 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
         for( final String pair : s.split( "," ) ) {
             final String[] kv = pair.trim().split( "\\s+" );
             if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
-                throw new IOException( "ill-formatted translate values: " + translate_sb );
+                throw new IOException( "ill-formatted translate values: " + pair );
             }
             if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
-                throw new IOException( "ill-formatted translate values: " + translate_sb );
+                throw new IOException( "ill-formatted translate values: " + pair );
             }
             String key = "";
             String value = "";
@@ -356,9 +389,4 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
     private final static String removeWhiteSpaceBeforeSemicolon( final String s ) {
         return s.replaceAll( "\\s+;", ";" );
     }
-
-    @Override
-    public String getName() {
-        return "Nexus Phylogenies Parser";
-    }
 }
index 1f872cf..c085575 100644 (file)
@@ -1516,26 +1516,51 @@ public class PhylogenyMethods {
         }
     }
 
-    final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+    final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
         final PhylogenyNodeIterator it = phy.iteratorPostorder();
         while ( it.hasNext() ) {
             final PhylogenyNode n = it.next();
-            if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+            if ( !n.isExternal() && !n.isRoot() ) {
                 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
-                    double d = -1.0;
+                    double value = -1;
                     try {
-                        d = Double.parseDouble( n.getName() );
+                        value = Double.parseDouble( n.getName() );
                     }
-                    catch ( final Exception e ) {
-                        d = -1.0;
+                    catch ( final NumberFormatException e ) {
+                        return false;
                     }
-                    if ( d >= 0.0 ) {
-                        n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
-                        n.setName( "" );
+                    if ( ( value < 0.0 ) || ( value > 100 ) ) {
+                        return false;
                     }
                 }
             }
         }
+        return true;
+    }
+
+    final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            transferInternalNodeNameToConfidence( confidence_type, it.next() );
+        }
+    }
+
+    private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+        if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+            if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                double d = -1.0;
+                try {
+                    d = Double.parseDouble( n.getName() );
+                }
+                catch ( final Exception e ) {
+                    d = -1.0;
+                }
+                if ( d >= 0.0 ) {
+                    n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+                    n.setName( "" );
+                }
+            }
+        }
     }
 
     final static public void transferNodeNameToField( final Phylogeny phy,
index b3d99a0..fe11d8d 100644 (file)
@@ -40,7 +40,7 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
     final private String _value;
 
     public enum Source {
-        NCBI, REFSEQ, UNIPROT, GI, EMBL, UNKNOWN;
+        NCBI, REFSEQ, UNIPROT, GI, EMBL, ENSEMBL, UNKNOWN;
 
         @Override
         public String toString() {
@@ -55,6 +55,8 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
                     return "gi";
                 case EMBL:
                     return "embl";
+                case ENSEMBL:
+                    return "ensembl";
                 case UNKNOWN:
                     return "unknown";
                 default:
index 5103e7d..a7a7c8c 100644 (file)
@@ -6656,6 +6656,35 @@ public final class Test {
             if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) {
                 return false;
             }
+            final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser();
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S15613.nex", p2 );
+            if ( phylogenies.length != 9 ) {
+                return false;
+            }
+            if ( !isEqual( 0.48039661496919533, phylogenies[ 0 ].getNode( "Diadocidia_spinosula" )
+                    .getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.3959796191512233, phylogenies[ 0 ].getNode( "Diadocidia_stanfordensis" )
+                    .getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "Family Diadocidiidae MLT (Imported_tree_0)" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.065284, phylogenies[ 7 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.065284, phylogenies[ 8 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -7218,6 +7247,82 @@ public final class Test {
             if ( phy.isRooted() ) {
                 return false;
             }
+            //
+            final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser();
+            p2.setSource( Test.PATH_TO_TEST_DATA + "S15613.nex" );
+            // 0
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) {
+                return false;
+            }
+            // 1
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 2
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 3
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 4
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 5
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 6
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 7
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            // 8
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            if ( !isEqual( 0.065284, phy.getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            if ( phy != null ) {
+                return false;
+            }
+            // 0
+            p2.reset();
+            if ( !p2.hasNext() ) {
+                return false;
+            }
+            phy = p2.next();
+            if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index cce1b2e..5ab000e 100644 (file)
@@ -64,6 +64,7 @@ public final class SequenceAccessionTools {
                                                                .compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" );\r
     public final static Pattern  UNIPROT_KB_PATTERN_2  = Pattern\r
                                                                .compile( "(?:\\b|_)(?:[A-Z0-9]{2,5}|(?:[A-Z][0-9][A-Z0-9]{3}[0-9]))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)" );\r
+    public final static Pattern  ENSEMBL_PATTERN       = Pattern.compile( "(?:\\b|_)(ENS[A-Z]*[0-9]+)(?:\\b|_)" );\r
     // RefSeq accession numbers can be distinguished from GenBank accessions \r
     // by their distinct prefix format of 2 characters followed by an\r
     // underscore character ('_'). For example, a RefSeq protein accession is NP_015325. \r
@@ -243,6 +244,10 @@ public final class SequenceAccessionTools {
             if ( !ForesterUtil.isEmpty( v ) ) {\r
                 return new Accession( v, Source.GI );\r
             }\r
+            v = parseEnsemlAccessorFromString( s );\r
+            if ( !ForesterUtil.isEmpty( v ) ) {\r
+                return new Accession( v, Source.ENSEMBL );\r
+            }\r
         }\r
         return null;\r
     }\r
@@ -287,6 +292,14 @@ public final class SequenceAccessionTools {
         return null;\r
     }\r
 \r
+    public final static String parseEnsemlAccessorFromString( final String s ) {\r
+        final Matcher m = ENSEMBL_PATTERN.matcher( s );\r
+        if ( m.find() ) {\r
+            return m.group( 1 );\r
+        }\r
+        return null;\r
+    }\r
+\r
     public final static String parseRefSeqAccessorFromString( final String s ) {\r
         final Matcher m = REFSEQ_PATTERN.matcher( s );\r
         if ( m.lookingAt() ) {\r