in progress
[jalview.git] / forester / java / src / org / forester / archaeopteryx / webservices / WebserviceUtil.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2010 Christian M. Zmasek
6 // Copyright (C) 2008-2010 Burnham Institute for Medical Research
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21 //
22 // Contact: phylosoft @ gmail . com
23 // WWW: www.phylosoft.org/forester
24
25 package org.forester.archaeopteryx.webservices;
26
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat;
31 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
32 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
33 import org.forester.phylogeny.Phylogeny;
34 import org.forester.phylogeny.PhylogenyMethods;
35 import org.forester.phylogeny.PhylogenyNode;
36 import org.forester.phylogeny.data.Accession;
37 import org.forester.phylogeny.data.Identifier;
38 import org.forester.phylogeny.data.Sequence;
39 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
40 import org.forester.phylogeny.iterators.PreorderTreeIterator;
41 import org.forester.util.ForesterUtil;
42
43 public final class WebserviceUtil {
44
45     public static final String TAX_CODE_TO_SCI_NAME = "tax_code_to_sci_name";
46     public static final String TREE_FAM_INST        = "tree_fam";
47     public static final String PFAM_INST            = "pfam";
48     public static final String TOL_WEBSERVER        = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="
49                                                             + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
50     public static final String TOL_NAME             = "Tree of Life";
51     public static final String TREE_BASE_NAME       = "TreeBASE";
52     public static final String TREE_FAM_NAME        = "TreeFam";
53     public static final String PFAM_NAME            = "Pfam";
54     public static final String PFAM_SERVER          = "http://pfam.janelia.org";
55
56     public static List<PhylogeniesWebserviceClient> createDefaultClients() {
57         final List<PhylogeniesWebserviceClient> clients = new ArrayList<PhylogeniesWebserviceClient>();
58         clients.add( new BasicPhylogeniesWebserviceClient( TOL_NAME,
59                                                            "Read Tree from Tree of Life...",
60                                                            "Use ToL webservice to obtain a phylogeny",
61                                                            "Please enter a Tree of Life node identifier\n(Examples: "
62                                                                    + "19386 for Cephalopoda, 2461 for Cnidaria, 2466 for Deuterostomia)",
63                                                            WsPhylogenyFormat.TOL_XML_RESPONSE,
64                                                            PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
65                                                            WebserviceUtil.TOL_WEBSERVER,
66                                                            true,
67                                                            "http://tolweb.org",
68                                                            null ) );
69         clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
70                                                            "Read Tree from TreeBASE...",
71                                                            "Use TreeBASE to obtain a phylogeny",
72                                                            "Please enter a TreeBASE tree identifier\n(Examples: 2654, 825, 4931, 2518, 2406, 4934)",
73                                                            WsPhylogenyFormat.NEXUS,
74                                                            PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
75                                                            "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"
76                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
77                                                                    + "?format=nexus",
78                                                            true,
79                                                            "http://treebase.nescent.org",
80                                                            null ) );
81         clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME,
82                                                            "Read Gene Tree from Pfam...",
83                                                            "Use  Pfam to obtain a (full) gene tree",
84                                                            "Please enter a Pfam (PF) accession number\n(Examples: 01849 for NAC, 00452 for Bcl-2, 00046 for Homeobox)",
85                                                            WsPhylogenyFormat.PFAM,
86                                                            null,
87                                                            PFAM_SERVER + "/family/tree/download?alnType=full&acc=PF"
88                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER,
89                                                            false,
90                                                            PFAM_SERVER,
91                                                            PFAM_INST ) );
92         clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME,
93                                                            "Read Full Gene Tree from TreeFam...",
94                                                            "Use TreeFam to obtain a (full) gene tree",
95                                                            "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)",
96                                                            WsPhylogenyFormat.NHX,
97                                                            null,
98                                                            "http://www.treefam.org/cgi-bin/getdata.pl?ac=TF"
99                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
100                                                                    + "&f=full.nhx",
101                                                            true,
102                                                            "http://www.treefam.org",
103                                                            TREE_FAM_INST ) );
104         clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME,
105                                                            "Read Clean Gene Tree from TreeFam...",
106                                                            "Use TreeFam to obtain a (\"clean\") gene tree",
107                                                            "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)",
108                                                            WsPhylogenyFormat.NHX,
109                                                            null,
110                                                            "http://www.treefam.org/cgi-bin/getdata.pl?ac=TF"
111                                                                    + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
112                                                                    + "&f=clean.nhx",
113                                                            true,
114                                                            "http://www.treefam.org",
115                                                            TREE_FAM_INST ) );
116         return clients;
117     }
118
119     static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) {
120         final PreorderTreeIterator it = new PreorderTreeIterator( phy );
121         while ( it.hasNext() ) {
122             final PhylogenyNode n = it.next();
123             if ( !ForesterUtil.isEmpty( n.getName() ) ) {
124                 final String name = n.getName();
125                 final int i = name.lastIndexOf( "/" );
126                 if ( i > 0 ) {
127                     final String acc_str = name.substring( 0, i );
128                     if ( !ForesterUtil.isEmpty( acc_str ) ) {
129                         final Sequence seq = new Sequence();
130                         final Accession acc = new Accession( acc_str, source );
131                         seq.setAccession( acc );
132                         n.getNodeData().setSequence( seq );
133                     }
134                 }
135             }
136         }
137     }
138
139     public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
140             throws PhyloXmlDataFormatException {
141         if ( client.getProcessingInstructions().equals( WebserviceUtil.TAX_CODE_TO_SCI_NAME ) ) {
142             WebserviceUtil.transferTaxonomyCodeToScientificName( phylogeny );
143         }
144         else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
145             WebserviceUtil.transferInternalTaxonomyCodeToScientificName( phylogeny );
146             WebserviceUtil.transferExternalScientificNameToTaxonomyCode( phylogeny );
147             WebserviceUtil.transferSequenceNameToSequenceAccession( phylogeny, "ensembl" );
148             WebserviceUtil.setTaxonomyIdentifierType( phylogeny, "ncbi" );
149         }
150         else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
151             WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
152         }
153     }
154
155     static void setTaxonomyIdentifierType( final Phylogeny phy, final String type ) {
156         final PhylogenyNodeIterator it = phy.iteratorPostorder();
157         while ( it.hasNext() ) {
158             final PhylogenyNode n = it.next();
159             if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
160                 n.getNodeData()
161                         .getTaxonomy()
162                         .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(), type ) );
163             }
164         }
165     }
166
167     static void transferExternalScientificNameToTaxonomyCode( final Phylogeny phy ) throws PhyloXmlDataFormatException {
168         final PhylogenyNodeIterator it = phy.iteratorPostorder();
169         while ( it.hasNext() ) {
170             final PhylogenyNode n = it.next();
171             if ( n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
172                 final String name = n.getNodeData().getTaxonomy().getScientificName();
173                 if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( name ).matches() ) {
174                     n.getNodeData().getTaxonomy().setScientificName( "" );
175                     n.getNodeData().getTaxonomy().setTaxonomyCode( name );
176                 }
177             }
178         }
179     }
180
181     static void transferInternalTaxonomyCodeToScientificName( final Phylogeny phy ) throws PhyloXmlDataFormatException {
182         final PhylogenyNodeIterator it = phy.iteratorPostorder();
183         while ( it.hasNext() ) {
184             final PhylogenyNode n = it.next();
185             if ( !n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
186                 final String name = n.getNodeData().getTaxonomy().getTaxonomyCode();
187                 if ( !ForesterUtil.isEmpty( name ) ) {
188                     n.getNodeData().getTaxonomy().setScientificName( name );
189                     n.getNodeData().getTaxonomy().setTaxonomyCode( "" );
190                 }
191             }
192         }
193     }
194
195     static void transferSequenceNameToSequenceAccession( final Phylogeny phy, final String source ) {
196         final PhylogenyNodeIterator it = phy.iteratorPostorder();
197         while ( it.hasNext() ) {
198             final PhylogenyNode n = it.next();
199             if ( n.getNodeData().isHasSequence() ) {
200                 final String name = n.getNodeData().getSequence().getName();
201                 if ( !ForesterUtil.isEmpty( name ) ) {
202                     n.getNodeData().getSequence().setName( "" );
203                     n.getNodeData().getSequence().setAccession( new Accession( name, source ) );
204                 }
205             }
206         }
207     }
208
209     static void transferTaxonomyCodeToScientificName( final Phylogeny phy ) throws PhyloXmlDataFormatException {
210         final PhylogenyNodeIterator it = phy.iteratorPostorder();
211         while ( it.hasNext() ) {
212             final PhylogenyNode n = it.next();
213             if ( n.getNodeData().isHasTaxonomy() ) {
214                 final String name = n.getNodeData().getTaxonomy().getTaxonomyCode();
215                 if ( !ForesterUtil.isEmpty( name ) ) {
216                     n.getNodeData().getTaxonomy().setScientificName( name );
217                     n.getNodeData().getTaxonomy().setTaxonomyCode( "" );
218                 }
219             }
220         }
221     }
222 }