e835724c69f316239f40b194fa09aa34b09a1f63
[jalview.git] / test / jalview / ws / dbsources / UniprotTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ws.dbsources;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertTrue;
27
28 import jalview.datamodel.SequenceI;
29 import jalview.gui.JvOptionPane;
30 import jalview.xml.binding.uniprot.DbReferenceType;
31 import jalview.xml.binding.uniprot.Entry;
32 import jalview.xml.binding.uniprot.FeatureType;
33 import jalview.xml.binding.uniprot.LocationType;
34 import jalview.xml.binding.uniprot.PositionType;
35
36 import java.io.ByteArrayInputStream;
37 import java.io.InputStream;
38 import java.io.UnsupportedEncodingException;
39 import java.math.BigInteger;
40 import java.util.List;
41
42 import org.testng.Assert;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
45
46 public class UniprotTest
47 {
48
49   @BeforeClass(alwaysRun = true)
50   public void setUpJvOptionPane()
51   {
52     JvOptionPane.setInteractiveMode(false);
53     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
54   }
55
56   // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
57   private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
58           + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
59           + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
60           + "<accession>A9CKP4</accession>"
61           + "<accession>A9CKP5</accession>"
62           + "<name>A9CKP4_AGRT5</name>"
63           + "<name>A9CKP4_AGRT6</name>"
64           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
65           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
66           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
67           + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
68           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
69           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
70           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
71           + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
72           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
73           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
74           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
75           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
76           + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
77           + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
78           + "</uniprot>";
79
80   /**
81    * Test the method that unmarshals XML to a Uniprot model
82    * 
83    * @throws UnsupportedEncodingException
84    */
85   @Test(groups = { "Functional" })
86   public void testGetUniprotEntries() throws UnsupportedEncodingException
87   {
88     Uniprot u = new Uniprot();
89     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
90     List<Entry> entries = u.getUniprotEntries(is);
91     assertEquals(1, entries.size());
92     Entry entry = entries.get(0);
93     assertEquals(2, entry.getName().size());
94     assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
95     assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
96     assertEquals(2, entry.getAccession().size());
97     assertEquals("A9CKP4", entry.getAccession().get(0));
98     assertEquals("A9CKP5", entry.getAccession().get(1));
99
100     assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
101
102     assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
103             .getRecommendedName().getFullName().getValue());
104
105     /*
106      * Check sequence features
107      */
108     List<FeatureType> features = entry.getFeature();
109     assertEquals(9, features.size());
110     FeatureType sf = features.get(0);
111     assertEquals("signal peptide", sf.getType());
112     assertNull(sf.getDescription());
113     assertNull(sf.getStatus());
114     assertNull(sf.getLocation().getPosition());
115     assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
116     assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
117     sf = features.get(1);
118     assertEquals("propeptide", sf.getType());
119     assertEquals("Activation peptide", sf.getDescription());
120     assertNull(sf.getLocation().getPosition());
121     assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
122     assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
123     sf = features.get(2);
124     assertEquals("chain", sf.getType());
125     assertEquals("Granzyme B", sf.getDescription());
126     assertNull(sf.getLocation().getPosition());
127     assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
128     assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
129
130     sf = features.get(3);
131     assertEquals("sequence variant", sf.getType());
132     assertNull(sf.getDescription());
133     assertEquals(41,
134             sf.getLocation().getPosition().getPosition().intValue());
135     assertNull(sf.getLocation().getBegin());
136     assertNull(sf.getLocation().getEnd());
137
138     sf = features.get(4);
139     assertEquals("sequence variant", sf.getType());
140     assertEquals("Pathogenic", sf.getDescription());
141     assertEquals(41,
142             sf.getLocation().getPosition().getPosition().intValue());
143     assertNull(sf.getLocation().getBegin());
144     assertNull(sf.getLocation().getEnd());
145
146     sf = features.get(5);
147     assertEquals("sequence variant", sf.getType());
148     assertEquals("Pathogenic", sf.getDescription());
149     assertEquals(41,
150             sf.getLocation().getPosition().getPosition().intValue());
151     assertNull(sf.getLocation().getBegin());
152     assertNull(sf.getLocation().getEnd());
153
154     sf = features.get(6);
155     assertEquals("sequence variant", sf.getType());
156     assertEquals("Foo",
157             sf.getDescription());
158     assertEquals(42,
159             sf.getLocation().getPosition().getPosition().intValue());
160     assertNull(sf.getLocation().getBegin());
161     assertNull(sf.getLocation().getEnd());
162     Assert.assertEquals(Uniprot.getDescription(sf),
163             "<html>p.Met42Leu" + "<br/>&nbsp;&nbsp;"
164                     + "p.Met42LeuMetVal Foo</html>");
165
166     sf = features.get(7);
167     assertNull(sf.getLocation().getPosition());
168     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
169     assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
170     Assert.assertEquals(Uniprot.getDescription(sf),
171             "<html>p.MetLeu42LeuLeu" + "<br/>&nbsp;&nbsp;"
172                     + "p.MetLeu42LeuMetVal Foo</html>");
173
174     sf = features.get(8);
175     assertNull(sf.getLocation().getPosition());
176     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
177     assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
178     Assert.assertEquals(Uniprot.getDescription(sf),
179             "<html>p.MLML42LeuLeu" + "<br/>&nbsp;&nbsp;"
180                     + "p.MLML42LMVK Foo Too</html>");
181
182     /*
183      * Check cross-references
184      */
185     List<DbReferenceType> xrefs = entry.getDbReference();
186     assertEquals(3, xrefs.size());
187
188     DbReferenceType xref = xrefs.get(0);
189     assertEquals("2FSQ", xref.getId());
190     assertEquals("PDB", xref.getType());
191     assertEquals("X-ray",
192             Uniprot.getProperty(xref.getProperty(), "method"));
193     assertEquals("1.40",
194             Uniprot.getProperty(xref.getProperty(), "resolution"));
195
196     xref = xrefs.get(1);
197     assertEquals("2FSR", xref.getId());
198     assertEquals("PDBsum", xref.getType());
199     assertTrue(xref.getProperty().isEmpty());
200
201     xref = xrefs.get(2);
202     assertEquals("AE007869", xref.getId());
203     assertEquals("EMBL", xref.getType());
204     assertEquals("AAK85932.1",
205             Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
206     assertEquals("Genomic_DNA",
207             Uniprot.getProperty(xref.getProperty(), "molecule type"));
208   }
209
210   @Test(groups = { "Functional" })
211   public void testGetUniprotSequence() throws UnsupportedEncodingException
212   {
213     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
214     Entry entry = new Uniprot().getUniprotEntries(
215             is).get(0);
216     SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
217     assertNotNull(seq);
218     assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL
219
220   }
221
222   /**
223    * Test the method that formats the sequence id
224    * 
225    * @throws UnsupportedEncodingException
226    */
227   @Test(groups = { "Functional" })
228   public void testGetUniprotEntryId() throws UnsupportedEncodingException
229   {
230     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
231     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
232
233     /*
234      * name formatted with Uniprot Entry name
235      */
236     String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
237     assertEquals(expectedName,
238             Uniprot.getUniprotEntryId(entry));
239   }
240
241   /**
242    * Test the method that formats the sequence description
243    * 
244    * @throws UnsupportedEncodingException
245    */
246   @Test(groups = { "Functional" })
247   public void testGetUniprotEntryDescription()
248           throws UnsupportedEncodingException
249   {
250     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
251     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
252
253     assertEquals("Mitogen-activated protein kinase 13",
254             Uniprot.getUniprotEntryDescription(entry));
255   }
256
257   @Test(groups = { "Functional" })
258   public void testGetDescription()
259   {
260     FeatureType ft = new FeatureType();
261     assertEquals("", Uniprot.getDescription(ft));
262
263     ft.setDescription("Hello");
264     assertEquals("Hello", Uniprot.getDescription(ft));
265
266     ft.setLocation(new LocationType());
267     ft.getLocation().setPosition(new PositionType());
268     ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
269     ft.setOriginal("K");
270     ft.getVariation().add("y");
271     assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
272
273     // multiple variants generate an html description over more than one line
274     ft.getVariation().add("W");
275     assertEquals("<html>p.Lys23Tyr<br/>&nbsp;&nbsp;p.Lys23Trp Hello</html>",
276             Uniprot.getDescription(ft));
277
278     /*
279      * indel cases
280      * up to 3 bases (original or variant) are shown using 3 letter code
281      */
282     ft.getVariation().clear();
283     ft.getVariation().add("KWE");
284     ft.setOriginal("KLS");
285     assertEquals("p.LysLeuSer23LysTrpGlu Hello",
286             Uniprot.getDescription(ft));
287
288     // adding a fourth original base switches to single letter code
289     ft.setOriginal("KLST");
290     assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
291
292     // adding a fourth variant switches to single letter code
293     ft.getVariation().clear();
294     ft.getVariation().add("KWES");
295     assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
296
297     ft.getVariation().clear();
298     ft.getVariation().add("z"); // unknown variant - fails gracefully
299     ft.setOriginal("K");
300     assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
301
302     ft.getVariation().clear(); // variant missing - is ignored
303     assertEquals("Hello", Uniprot.getDescription(ft));
304   }
305 }