gene -> domain
[jalview.git] / forester / java / src / org / forester / archaeopteryx / webservices / WebserviceUtil.java
index 5ba492e..0dad04a 100644 (file)
@@ -20,7 +20,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.archaeopteryx.webservices;
 
@@ -29,93 +29,117 @@ import java.util.List;
 
 import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
-import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.phylogeny.iterators.PreorderTreeIterator;
 import org.forester.util.ForesterUtil;
+import org.forester.util.SequenceAccessionTools;
 
 public final class WebserviceUtil {
 
-    public static final String TAX_CODE_TO_SCI_NAME = "tax_code_to_sci_name";
-    public static final String TREE_FAM_INST        = "tree_fam";
-    public static final String PFAM_INST            = "pfam";
-    public static final String TOL_WEBSERVER        = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="
-                                                            + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
-    public static final String TOL_NAME             = "Tree of Life";
-    public static final String TREE_BASE_NAME       = "TreeBASE";
-    public static final String TREE_FAM_NAME        = "TreeFam";
-    public static final String PFAM_NAME            = "Pfam";
-    public static final String PFAM_SERVER          = "http://pfam.janelia.org";
+    public static final String PFAM_INST                       = "pfam";
+    public static final String PFAM_NAME                       = "Pfam";
+    public static final String PFAM_SERVER                     = "http://pfam.xfam.org";
+    public static final String TOL_NAME                        = "Tree of Life (ToL)";
+    public static final String TOL_URL_BASE                    = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id=";
+    public static final String TOL_WEBSERVER                   = TOL_URL_BASE
+                                                                       + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
+    public static final String TREE_BASE_DESC                  = "This data set was downloaded from TreeBASE, a relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, Yale University, SDSC and UC Davis. Please do not remove this acknowledgment.";
+    public static final String TREE_BASE_INST                  = "treebase";
+    public static final String TREE_BASE_NAME                  = "TreeBASE";
+    public static final String TREE_FAM_INST                   = "tree_fam";
+    public static final String TREE_FAM_NAME                   = "TreeFam";
+    public static final String TREE_FAM_URL_BASE               = "http://www.treefam.org/family/TF";
+    public static final String TREEBASE_PHYLOWS_STUDY_URL_BASE = "http://purl.org/phylo/treebase/phylows/study/TB2:S";
+    public static final String TREEBASE_PHYLOWS_TREE_URL_BASE  = "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr";
 
     public static List<PhylogeniesWebserviceClient> createDefaultClients() {
         final List<PhylogeniesWebserviceClient> clients = new ArrayList<PhylogeniesWebserviceClient>();
-        clients.add( new BasicPhylogeniesWebserviceClient( TOL_NAME,
-                                                           "Read Tree from Tree of Life...",
-                                                           "Use ToL webservice to obtain a phylogeny",
-                                                           "Please enter a Tree of Life node identifier\n(Examples: "
-                                                                   + "19386 for Cephalopoda, 2461 for Cnidaria, 2466 for Deuterostomia)",
-                                                           WsPhylogenyFormat.TOL_XML_RESPONSE,
-                                                           PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
-                                                           WebserviceUtil.TOL_WEBSERVER,
+        clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
+                                                           "Read Tree(s) from TreeBASE Study...",
+                                                           "Use TreeBASE to obtain evolutionary tree(s) from a study",
+                                                           "Please enter a TreeBASE study (\"S\") identifier (without the \"S\")\n(Examples: 14909, 14525, 15613, 15632)",
+                                                           WsPhylogenyFormat.TREEBASE_STUDY,
+                                                           null,
+                                                           TREEBASE_PHYLOWS_STUDY_URL_BASE
+                                                                   + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
+                                                                   + "?format=nexus",
                                                            true,
-                                                           "http://tolweb.org",
-                                                           null ) );
+                                                           "http://www.treebase.org",
+                                                           TREE_BASE_INST ) );
         clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
                                                            "Read Tree from TreeBASE...",
-                                                           "Use TreeBASE to obtain a phylogeny",
-                                                           "Please enter a TreeBASE tree identifier\n(Examples: 2654, 825, 4931, 2518, 2406, 4934)",
-                                                           WsPhylogenyFormat.NEXUS,
-                                                           PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
-                                                           "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"
+                                                           "Use TreeBASE to obtain a evolutionary tree",
+                                                           "Please enter a TreeBASE tree (\"Tr\") identifier (without the \"Tr\")\n(Examples: 2406, 422, 2654, 825, 4931, 2518, 4934)",
+                                                           WsPhylogenyFormat.TREEBASE_TREE,
+                                                           null,
+                                                           TREEBASE_PHYLOWS_TREE_URL_BASE
                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
                                                                    + "?format=nexus",
                                                            true,
-                                                           "http://treebase.nescent.org",
-                                                           null ) );
+                                                           "http://www.treebase.org",
+                                                           TREE_BASE_INST ) );
         clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME,
-                                                           "Read Gene Tree from Pfam...",
-                                                           "Use  Pfam to obtain a (full) gene tree",
+                                                           "Read Domain Tree from Pfam...",
+                                                           "Use  Pfam to obtain gene trees for seed alignments",
                                                            "Please enter a Pfam (PF) accession number\n(Examples: 01849 for NAC, 00452 for Bcl-2, 00046 for Homeobox)",
                                                            WsPhylogenyFormat.PFAM,
                                                            null,
-                                                           PFAM_SERVER + "/family/tree/download?alnType=full&acc=PF"
-                                                                   + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER,
+                                                           PFAM_SERVER + "/family/PF"
+                                                                   + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
+                                                                   + "/tree/download",
                                                            false,
                                                            PFAM_SERVER,
                                                            PFAM_INST ) );
         clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME,
-                                                           "Read Full Gene Tree from TreeFam...",
-                                                           "Use TreeFam to obtain a (full) gene tree",
+                                                           "Read Gene Tree from TreeFam...",
+                                                           "Use TreeFam to obtain a gene tree",
                                                            "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)",
                                                            WsPhylogenyFormat.NHX,
                                                            null,
-                                                           "http://www.treefam.org/cgi-bin/getdata.pl?ac=TF"
+                                                           TREE_FAM_URL_BASE
                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
-                                                                   + "&f=full.nhx",
+                                                                   + "/tree/newick",
                                                            true,
                                                            "http://www.treefam.org",
                                                            TREE_FAM_INST ) );
-        clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME,
-                                                           "Read Clean Gene Tree from TreeFam...",
-                                                           "Use TreeFam to obtain a (\"clean\") gene tree",
-                                                           "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)",
-                                                           WsPhylogenyFormat.NHX,
-                                                           null,
-                                                           "http://www.treefam.org/cgi-bin/getdata.pl?ac=TF"
-                                                                   + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
-                                                                   + "&f=clean.nhx",
+        clients.add( new BasicPhylogeniesWebserviceClient( TOL_NAME,
+                                                           "Read Tree from Tree of Life (ToL)...",
+                                                           "Use ToL webservice to obtain a evolutionary tree",
+                                                           "Please enter a Tree of Life node identifier\n(Examples: "
+                                                                   + "14923 for ray-finned fishes, 19386 for Cephalopoda, 2461 for Cnidaria)",
+                                                           WsPhylogenyFormat.TOL_XML_RESPONSE,
+                                                           PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
+                                                           WebserviceUtil.TOL_WEBSERVER,
                                                            true,
-                                                           "http://www.treefam.org",
-                                                           TREE_FAM_INST ) );
+                                                           "http://tolweb.org",
+                                                           null ) );
         return clients;
     }
 
+    public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
+            throws PhyloXmlDataFormatException {
+        if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
+            WebserviceUtil.processTreeFamTrees( phylogeny );
+        }
+        else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
+            WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
+            PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" );
+        }
+        else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_BASE_INST ) ) {
+            if ( PhylogenyMethods.isInternalNamesLookLikeConfidences( phylogeny ) ) {
+                PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "" );
+            }
+            WebserviceUtil.processTreeBaseTrees( phylogeny );
+        }
+    }
+
     static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) {
         final PreorderTreeIterator it = new PreorderTreeIterator( phy );
         while ( it.hasNext() ) {
@@ -136,87 +160,67 @@ public final class WebserviceUtil {
         }
     }
 
-    public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
-            throws PhyloXmlDataFormatException {
-        if ( client.getProcessingInstructions().equals( WebserviceUtil.TAX_CODE_TO_SCI_NAME ) ) {
-            WebserviceUtil.transferTaxonomyCodeToScientificName( phylogeny );
-        }
-        else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
-            WebserviceUtil.transferInternalTaxonomyCodeToScientificName( phylogeny );
-            WebserviceUtil.transferExternalScientificNameToTaxonomyCode( phylogeny );
-            WebserviceUtil.transferSequenceNameToSequenceAccession( phylogeny, "ensembl" );
-            WebserviceUtil.setTaxonomyIdentifierType( phylogeny, "ncbi" );
-        }
-        else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
-            WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
-        }
-    }
-
-    static void setTaxonomyIdentifierType( final Phylogeny phy, final String type ) {
-        final PhylogenyNodeIterator it = phy.iteratorPostorder();
-        while ( it.hasNext() ) {
-            final PhylogenyNode n = it.next();
-            if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
-                n.getNodeData()
-                        .getTaxonomy()
-                        .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(), type ) );
-            }
-        }
-    }
-
-    static void transferExternalScientificNameToTaxonomyCode( final Phylogeny phy ) throws PhyloXmlDataFormatException {
-        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+    static void processTreeBaseTrees( final Phylogeny phy ) {
+        phy.setDescription( TREE_BASE_DESC );
+        final PhylogenyNodeIterator it = phy.iteratorExternalForward();
         while ( it.hasNext() ) {
             final PhylogenyNode n = it.next();
-            if ( n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
-                final String name = n.getNodeData().getTaxonomy().getScientificName();
-                if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( name ).matches() ) {
-                    n.getNodeData().getTaxonomy().setScientificName( "" );
-                    n.getNodeData().getTaxonomy().setTaxonomyCode( name );
+            if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+                if ( acc != null ) {
+                    if ( !n.getNodeData().isHasSequence() ) {
+                        n.getNodeData().addSequence( new Sequence() );
+                    }
+                    final Sequence s = n.getNodeData().getSequence();
+                    if ( s.getAccession() == null ) {
+                        s.setAccession( acc );
+                    }
                 }
             }
         }
     }
 
-    static void transferInternalTaxonomyCodeToScientificName( final Phylogeny phy ) throws PhyloXmlDataFormatException {
+    static void processTreeFamTrees( final Phylogeny phy ) {
         final PhylogenyNodeIterator it = phy.iteratorPostorder();
         while ( it.hasNext() ) {
             final PhylogenyNode n = it.next();
-            if ( !n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
-                final String name = n.getNodeData().getTaxonomy().getTaxonomyCode();
-                if ( !ForesterUtil.isEmpty( name ) ) {
-                    n.getNodeData().getTaxonomy().setScientificName( name );
-                    n.getNodeData().getTaxonomy().setTaxonomyCode( "" );
+            if ( n.isExternal() ) {
+                n.getNodeData().setEvent( null );
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+                    if ( acc != null ) {
+                        if ( !n.getNodeData().isHasSequence() ) {
+                            n.getNodeData().addSequence( new Sequence() );
+                        }
+                        final Sequence s = n.getNodeData().getSequence();
+                        if ( s.getAccession() == null ) {
+                            s.setAccession( acc );
+                        }
+                    }
                 }
             }
-        }
-    }
-
-    static void transferSequenceNameToSequenceAccession( final Phylogeny phy, final String source ) {
-        final PhylogenyNodeIterator it = phy.iteratorPostorder();
-        while ( it.hasNext() ) {
-            final PhylogenyNode n = it.next();
-            if ( n.getNodeData().isHasSequence() ) {
-                final String name = n.getNodeData().getSequence().getName();
-                if ( !ForesterUtil.isEmpty( name ) ) {
-                    n.getNodeData().getSequence().setName( "" );
-                    n.getNodeData().getSequence().setAccession( new Accession( name, source ) );
+            else {
+                if ( ( n.getBranchData() != null ) && n.getBranchData().isHasConfidences()
+                        && ( n.getBranchData().getConfidence( 0 ) != null ) ) {
+                    n.getBranchData().getConfidence( 0 ).setType( "bootstrap" );
                 }
-            }
-        }
-    }
-
-    static void transferTaxonomyCodeToScientificName( final Phylogeny phy ) throws PhyloXmlDataFormatException {
-        final PhylogenyNodeIterator it = phy.iteratorPostorder();
-        while ( it.hasNext() ) {
-            final PhylogenyNode n = it.next();
-            if ( n.getNodeData().isHasTaxonomy() ) {
-                final String name = n.getNodeData().getTaxonomy().getTaxonomyCode();
-                if ( !ForesterUtil.isEmpty( name ) ) {
-                    n.getNodeData().getTaxonomy().setScientificName( name );
-                    n.getNodeData().getTaxonomy().setTaxonomyCode( "" );
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    if ( !n.getNodeData().isHasTaxonomy() ) {
+                        n.getNodeData().addTaxonomy( new Taxonomy() );
+                    }
+                    final Taxonomy t = n.getNodeData().getTaxonomy();
+                    if ( ForesterUtil.isEmpty( t.getScientificName() ) ) {
+                        t.setScientificName( n.getName() );
+                        n.setName( "" );
+                    }
                 }
             }
+            if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
+                n.getNodeData()
+                        .getTaxonomy()
+                        .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(),
+                                                        "ncbi" ) );
+            }
         }
     }
 }