inprogress
[jalview.git] / forester / java / src / org / forester / archaeopteryx / webservices / WebserviceUtil.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2010 Christian M. Zmasek
6 // Copyright (C) 2008-2010 Burnham Institute for Medical Research
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21 //
22 // Contact: phylosoft @ gmail . com
23 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
24
25 package org.forester.archaeopteryx.webservices;
26
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat;
31 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
32 import org.forester.phylogeny.Phylogeny;
33 import org.forester.phylogeny.PhylogenyMethods;
34 import org.forester.phylogeny.PhylogenyNode;
35 import org.forester.phylogeny.data.Accession;
36 import org.forester.phylogeny.data.Identifier;
37 import org.forester.phylogeny.data.Sequence;
38 import org.forester.phylogeny.data.Taxonomy;
39 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
40 import org.forester.phylogeny.iterators.PreorderTreeIterator;
41 import org.forester.util.ForesterUtil;
42 import org.forester.util.SequenceAccessionTools;
43
44 public final class WebserviceUtil {
45
46     public static final String PFAM_INST      = "pfam";
47     public static final String PFAM_NAME      = "Pfam";
48     public static final String PFAM_SERVER    = "http://pfam.janelia.org";
49     public static final String TOL_NAME       = "Tree of Life";
50     public static final String TOL_WEBSERVER  = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="
51                                                       + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
52     public static final String TREE_BASE_DESC = "This data set was downloaded from TreeBASE, a relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, Yale University, SDSC and UC Davis. Please do not remove this acknowledgment.";
53     public static final String TREE_BASE_INST = "treebase";
54     public static final String TREE_BASE_NAME = "TreeBASE";
55     public static final String TREE_FAM_INST  = "tree_fam";
56     public static final String TREE_FAM_NAME  = "TreeFam";
57
58     public static List<PhylogeniesWebserviceClient> createDefaultClients() {
59         final List<PhylogeniesWebserviceClient> clients = new ArrayList<PhylogeniesWebserviceClient>();
60         clients.add( new BasicPhylogeniesWebserviceClient( TOL_NAME,
61                                                            "Read Tree from Tree of Life...",
62                                                            "Use ToL webservice to obtain a evolutionary tree",
63                                                            "Please enter a Tree of Life node identifier\n(Examples: "
64                                                                    + "19386 for Cephalopoda, 2461 for Cnidaria, 2466 for Deuterostomia)",
65                                                            WsPhylogenyFormat.TOL_XML_RESPONSE,
66                                                            PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
67                                                            WebserviceUtil.TOL_WEBSERVER,
68                                                            true,
69                                                            "http://tolweb.org",
70                                                            null ) );
71         clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
72                                                            "Read Tree(s) from TreeBASE Study...",
73                                                            "Use TreeBASE to obtain evolutionary tree(s) from a study",
74                                                            "Please enter a TreeBASE study (\"S\") identifier (without the \"S\")\n(Examples: 15613, 15632, 14525, 14909)",
75                                                            WsPhylogenyFormat.TREEBASE_STUDY,
76                                                            null,
77                                                            "http://purl.org/phylo/treebase/phylows/study/TB2:S"
78                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
79                                                                    + "?format=nexus",
80                                                            true,
81                                                            "http://www.treebase.org",
82                                                            TREE_BASE_INST ) );
83         clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
84                                                            "Read Tree from TreeBASE...",
85                                                            "Use TreeBASE to obtain a evolutionary tree",
86                                                            "Please enter a TreeBASE tree (\"Tr\") identifier (without the \"Tr\")\n(Examples: 422, 2654, 825, 4931, 2518, 2406, 4934)",
87                                                            WsPhylogenyFormat.TREEBASE_TREE,
88                                                            null,
89                                                            "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"
90                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
91                                                                    + "?format=nexus",
92                                                            true,
93                                                            "http://www.treebase.org",
94                                                            TREE_BASE_INST ) );
95         clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME,
96                                                            "Read Gene Tree from Pfam...",
97                                                            "Use  Pfam to obtain gene trees for seed alignments",
98                                                            "Please enter a Pfam (PF) accession number\n(Examples: 01849 for NAC, 00452 for Bcl-2, 00046 for Homeobox)",
99                                                            WsPhylogenyFormat.PFAM,
100                                                            null,
101                                                            PFAM_SERVER + "/family/PF"
102                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
103                                                                    + "/tree/download",
104                                                            false,
105                                                            PFAM_SERVER,
106                                                            PFAM_INST ) );
107         clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME,
108                                                            "Read Gene Tree from TreeFam...",
109                                                            "Use TreeFam to obtain a gene tree",
110                                                            "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)",
111                                                            WsPhylogenyFormat.NHX,
112                                                            null,
113                                                            "http://www.treefam.org/family/TF"
114                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
115                                                                    + "/tree/newick",
116                                                            true,
117                                                            "http://www.treefam.org",
118                                                            TREE_FAM_INST ) );
119         return clients;
120     }
121
122     public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
123             throws PhyloXmlDataFormatException {
124         if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
125             WebserviceUtil.processTreeFamTrees( phylogeny );
126         }
127         else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
128             WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
129             PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" );
130         }
131         else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_BASE_INST ) ) {
132             if ( PhylogenyMethods.isInternalNamesLookLikeConfidences( phylogeny ) ) {
133                 PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "" );
134             }
135             WebserviceUtil.processTreeBaseTrees( phylogeny );
136         }
137     }
138
139     static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) {
140         final PreorderTreeIterator it = new PreorderTreeIterator( phy );
141         while ( it.hasNext() ) {
142             final PhylogenyNode n = it.next();
143             if ( !ForesterUtil.isEmpty( n.getName() ) ) {
144                 final String name = n.getName();
145                 final int i = name.lastIndexOf( "/" );
146                 if ( i > 0 ) {
147                     final String acc_str = name.substring( 0, i );
148                     if ( !ForesterUtil.isEmpty( acc_str ) ) {
149                         final Sequence seq = new Sequence();
150                         final Accession acc = new Accession( acc_str, source );
151                         seq.setAccession( acc );
152                         n.getNodeData().setSequence( seq );
153                     }
154                 }
155             }
156         }
157     }
158
159     static void processTreeBaseTrees( final Phylogeny phy ) {
160         phy.setDescription( TREE_BASE_DESC );
161         final PhylogenyNodeIterator it = phy.iteratorExternalForward();
162         while ( it.hasNext() ) {
163             final PhylogenyNode n = it.next();
164             if ( !ForesterUtil.isEmpty( n.getName() ) ) {
165                 final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
166                 if ( acc != null ) {
167                     if ( !n.getNodeData().isHasSequence() ) {
168                         n.getNodeData().addSequence( new Sequence() );
169                     }
170                     final Sequence s = n.getNodeData().getSequence();
171                     if ( s.getAccession() == null ) {
172                         s.setAccession( acc );
173                     }
174                 }
175             }
176         }
177     }
178
179     static void processTreeFamTrees( final Phylogeny phy ) {
180         final PhylogenyNodeIterator it = phy.iteratorPostorder();
181         while ( it.hasNext() ) {
182             final PhylogenyNode n = it.next();
183             if ( n.isExternal() ) {
184                 n.getNodeData().setEvent( null );
185                 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
186                     final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
187                     if ( acc != null ) {
188                         if ( !n.getNodeData().isHasSequence() ) {
189                             n.getNodeData().addSequence( new Sequence() );
190                         }
191                         final Sequence s = n.getNodeData().getSequence();
192                         if ( s.getAccession() == null ) {
193                             s.setAccession( acc );
194                         }
195                     }
196                 }
197             }
198             else {
199                 if ( ( n.getBranchData() != null ) && n.getBranchData().isHasConfidences()
200                         && ( n.getBranchData().getConfidence( 0 ) != null ) ) {
201                     n.getBranchData().getConfidence( 0 ).setType( "bootstrap" );
202                 }
203                 if ( !ForesterUtil.isEmpty( n.getName() ) ) {
204                     if ( !n.getNodeData().isHasTaxonomy() ) {
205                         n.getNodeData().addTaxonomy( new Taxonomy() );
206                     }
207                     final Taxonomy t = n.getNodeData().getTaxonomy();
208                     if ( ForesterUtil.isEmpty( t.getScientificName() ) ) {
209                         t.setScientificName( n.getName() );
210                         n.setName( "" );
211                     }
212                 }
213             }
214             if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
215                 n.getNodeData()
216                         .getTaxonomy()
217                         .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(),
218                                                         "ncbi" ) );
219             }
220         }
221     }
222 }