48f1f43cce92a5fd400fa7ca5a4eb2fcbf25045e
[jalview.git] / test / jalview / ws / dbsources / UniprotTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ws.dbsources;
22
23 import static org.testng.Assert.assertFalse;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertTrue;
28
29 import java.io.ByteArrayInputStream;
30 import java.io.InputStream;
31 import java.io.UnsupportedEncodingException;
32 import java.math.BigInteger;
33 import java.util.List;
34
35 import org.testng.Assert;
36 import org.testng.annotations.BeforeClass;
37 import org.testng.annotations.DataProvider;
38 import org.testng.annotations.Test;
39
40 import jalview.datamodel.DBRefEntry;
41 import jalview.datamodel.DBRefSource;
42 import jalview.datamodel.SequenceI;
43 import jalview.gui.JvOptionPane;
44 import jalview.util.DBRefUtils;
45 import jalview.xml.binding.uniprot.DbReferenceType;
46 import jalview.xml.binding.uniprot.Entry;
47 import jalview.xml.binding.uniprot.FeatureType;
48 import jalview.xml.binding.uniprot.LocationType;
49 import jalview.xml.binding.uniprot.PositionType;
50
51 public class UniprotTest
52 {
53
54   @BeforeClass(alwaysRun = true)
55   public void setUpJvOptionPane()
56   {
57     JvOptionPane.setInteractiveMode(false);
58     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
59   }
60
61   // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
62   private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
63           + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
64           + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
65           + "<accession>A9CKP4</accession>"
66           + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>"
67           + "<name>A9CKP4_AGRT6</name>"
68           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
69           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
70           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
71           + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
72           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
73           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
74           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
75           + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
76           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
77           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
78           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
79           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
80           + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
81           + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
82           + "</uniprot>";
83
84   /**
85    * Test the method that unmarshals XML to a Uniprot model
86    * 
87    * @throws UnsupportedEncodingException
88    */
89   @Test(groups = { "Functional" })
90   public void testGetUniprotEntries() throws UnsupportedEncodingException
91   {
92     Uniprot u = new Uniprot();
93     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
94     List<Entry> entries = u.getUniprotEntries(is);
95     assertEquals(1, entries.size());
96     Entry entry = entries.get(0);
97     assertEquals(2, entry.getName().size());
98     assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
99     assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
100     assertEquals(2, entry.getAccession().size());
101     assertEquals("A9CKP4", entry.getAccession().get(0));
102     assertEquals("A9CKP5", entry.getAccession().get(1));
103
104     assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
105
106     assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
107             .getRecommendedName().getFullName().getValue());
108
109     /*
110      * Check sequence features
111      */
112     List<FeatureType> features = entry.getFeature();
113     assertEquals(9, features.size());
114     FeatureType sf = features.get(0);
115     assertEquals("signal peptide", sf.getType());
116     assertNull(sf.getDescription());
117     assertNull(sf.getStatus());
118     assertNull(sf.getLocation().getPosition());
119     assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
120     assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
121     sf = features.get(1);
122     assertEquals("propeptide", sf.getType());
123     assertEquals("Activation peptide", sf.getDescription());
124     assertNull(sf.getLocation().getPosition());
125     assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
126     assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
127     sf = features.get(2);
128     assertEquals("chain", sf.getType());
129     assertEquals("Granzyme B", sf.getDescription());
130     assertNull(sf.getLocation().getPosition());
131     assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
132     assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
133
134     sf = features.get(3);
135     assertEquals("sequence variant", sf.getType());
136     assertNull(sf.getDescription());
137     assertEquals(41,
138             sf.getLocation().getPosition().getPosition().intValue());
139     assertNull(sf.getLocation().getBegin());
140     assertNull(sf.getLocation().getEnd());
141
142     sf = features.get(4);
143     assertEquals("sequence variant", sf.getType());
144     assertEquals("Pathogenic", sf.getDescription());
145     assertEquals(41,
146             sf.getLocation().getPosition().getPosition().intValue());
147     assertNull(sf.getLocation().getBegin());
148     assertNull(sf.getLocation().getEnd());
149
150     sf = features.get(5);
151     assertEquals("sequence variant", sf.getType());
152     assertEquals("Pathogenic", sf.getDescription());
153     assertEquals(41,
154             sf.getLocation().getPosition().getPosition().intValue());
155     assertNull(sf.getLocation().getBegin());
156     assertNull(sf.getLocation().getEnd());
157
158     sf = features.get(6);
159     assertEquals("sequence variant", sf.getType());
160     assertEquals("Foo", sf.getDescription());
161     assertEquals(42,
162             sf.getLocation().getPosition().getPosition().intValue());
163     assertNull(sf.getLocation().getBegin());
164     assertNull(sf.getLocation().getEnd());
165     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu"
166             + "<br/>&nbsp;&nbsp;" + "p.Met42LeuMetVal Foo</html>");
167
168     sf = features.get(7);
169     assertNull(sf.getLocation().getPosition());
170     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
171     assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
172     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu"
173             + "<br/>&nbsp;&nbsp;" + "p.MetLeu42LeuMetVal Foo</html>");
174
175     sf = features.get(8);
176     assertNull(sf.getLocation().getPosition());
177     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
178     assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
179     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu"
180             + "<br/>&nbsp;&nbsp;" + "p.MLML42LMVK Foo Too</html>");
181
182     /*
183      * Check cross-references
184      */
185     List<DbReferenceType> xrefs = entry.getDbReference();
186     assertEquals(3, xrefs.size());
187
188     DbReferenceType xref = xrefs.get(0);
189     assertEquals("2FSQ", xref.getId());
190     assertEquals("PDB", xref.getType());
191     assertEquals("X-ray",
192             Uniprot.getProperty(xref.getProperty(), "method"));
193     assertEquals("1.40",
194             Uniprot.getProperty(xref.getProperty(), "resolution"));
195
196     xref = xrefs.get(1);
197     assertEquals("2FSR", xref.getId());
198     assertEquals("PDBsum", xref.getType());
199     assertTrue(xref.getProperty().isEmpty());
200
201     xref = xrefs.get(2);
202     assertEquals("AE007869", xref.getId());
203     assertEquals("EMBL", xref.getType());
204     assertEquals("AAK85932.1",
205             Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
206     assertEquals("Genomic_DNA",
207             Uniprot.getProperty(xref.getProperty(), "molecule type"));
208   }
209
210   @Test(groups = { "Functional" })
211   public void testGetUniprotSequence() throws UnsupportedEncodingException
212   {
213     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
214     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
215     SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
216     assertNotNull(seq);
217     assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL
218     assertEquals(seq.getSequenceAsString(),
219             seq.createDatasetSequence().getSequenceAsString());
220     assertEquals(2, seq.getPrimaryDBRefs().size());
221     List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(),
222             "A9CKP4");
223     assertEquals(1, res.size());
224     assertTrue(res.get(0).isCanonical());
225     res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
226             DBRefSource.UNIPROT);
227     assertEquals(2, res.size());
228     /*
229      * NB this test fragile - relies on ordering being preserved
230      */
231     assertTrue(res.get(0).isCanonical());
232     assertFalse(res.get(1).isCanonical());
233
234     // check version is preserved for EMBLCDS
235     res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
236     assertEquals(1, res.size());
237     // Ideally we would expect AAK85932.1 -> AAK85932
238     // assertTrue("1".equals(res.get(0).getVersion()));
239     // but it also passes through DBrefUtils.ensurePrimaries which adds
240     // (promoted) to the version string
241     // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
242     // ignore it !
243     assertEquals("1 (promoted)", (res.get(0).getVersion()));
244   }
245
246   /**
247    * Test the method that formats the sequence id
248    * 
249    * @throws UnsupportedEncodingException
250    */
251   @Test(groups = { "Functional" })
252   public void testGetUniprotEntryId() throws UnsupportedEncodingException
253   {
254     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
255     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
256
257     /*
258      * name formatted with Uniprot Entry name
259      */
260     String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
261     assertEquals(expectedName, Uniprot.getUniprotEntryId(entry));
262   }
263
264   /**
265    * Test the method that formats the sequence description
266    * 
267    * @throws UnsupportedEncodingException
268    */
269   @Test(groups = { "Functional" })
270   public void testGetUniprotEntryDescription()
271           throws UnsupportedEncodingException
272   {
273     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
274     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
275
276     assertEquals("Mitogen-activated protein kinase 13",
277             Uniprot.getUniprotEntryDescription(entry));
278   }
279
280   @Test(groups = { "Functional" })
281   public void testGetDescription()
282   {
283     FeatureType ft = new FeatureType();
284     assertEquals("", Uniprot.getDescription(ft));
285
286     ft.setDescription("Hello");
287     assertEquals("Hello", Uniprot.getDescription(ft));
288
289     ft.setLocation(new LocationType());
290     ft.getLocation().setPosition(new PositionType());
291     ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
292     ft.setOriginal("K");
293     ft.getVariation().add("y");
294     assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
295
296     // multiple variants generate an html description over more than one line
297     ft.getVariation().add("W");
298     assertEquals("<html>p.Lys23Tyr<br/>&nbsp;&nbsp;p.Lys23Trp Hello</html>",
299             Uniprot.getDescription(ft));
300
301     /*
302      * indel cases
303      * up to 3 bases (original or variant) are shown using 3 letter code
304      */
305     ft.getVariation().clear();
306     ft.getVariation().add("KWE");
307     ft.setOriginal("KLS");
308     assertEquals("p.LysLeuSer23LysTrpGlu Hello",
309             Uniprot.getDescription(ft));
310
311     // adding a fourth original base switches to single letter code
312     ft.setOriginal("KLST");
313     assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
314
315     // adding a fourth variant switches to single letter code
316     ft.getVariation().clear();
317     ft.getVariation().add("KWES");
318     assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
319
320     ft.getVariation().clear();
321     ft.getVariation().add("z"); // unknown variant - fails gracefully
322     ft.setOriginal("K");
323     assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
324
325     ft.getVariation().clear(); // variant missing - is ignored
326     assertEquals("Hello", Uniprot.getDescription(ft));
327   }
328
329   @DataProvider
330   public Object[][] problemEntries()
331   {
332     return new Object[][] {
333         new Object[]
334         { new String(
335                 "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n"
336                         + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n"
337                         + "<accession>Q29079</accession>\n"
338                         + "<accession>Q29017</accession>\n"
339                         + "<name>PAG2_PIG</name>\n" + "<protein>\n"
340                         + "<recommendedName>\n"
341                         + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n"
342                         + "<shortName>PAG 2</shortName>\n"
343                         + "<ecNumber>3.4.23.-</ecNumber>\n"
344                         + "</recommendedName>\n" + "</protein>\n"
345                         + "<gene>\n"
346                         + "<name type=\"primary\">PAG2</name>\n"
347                         + "</gene>\n" + "<organism>\n"
348                         + "<name type=\"scientific\">Sus scrofa</name>\n"
349                         + "<name type=\"common\">Pig</name>\n"
350                         + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n"
351                         + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n"
352                         + "<taxon>Metazoa</taxon>\n"
353                         + "<taxon>Chordata</taxon>\n"
354                         + "<taxon>Craniata</taxon>\n"
355                         + "<taxon>Vertebrata</taxon>\n"
356                         + "<taxon>Euteleostomi</taxon>\n"
357                         + "<taxon>Mammalia</taxon>\n"
358                         + "<taxon>Eutheria</taxon>\n"
359                         + "<taxon>Laurasiatheria</taxon>\n"
360                         + "<taxon>Artiodactyla</taxon>\n"
361                         + "<taxon>Suina</taxon>\n"
362                         + "<taxon>Suidae</taxon>\n" + "<taxon>Sus</taxon>\n"
363                         + "</lineage>\n" + "</organism>\n"
364                         + "<reference key=\"1\">\n"
365                         + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n"
366                         + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n"
367                         + "<authorList>\n"
368                         + "<person name=\"Szafranska B.\"/>\n"
369                         + "<person name=\"Xie S.\"/>\n"
370                         + "<person name=\"Green J.\"/>\n"
371                         + "<person name=\"Roberts R.M.\"/>\n"
372                         + "</authorList>\n"
373                         + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n"
374                         + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n"
375                         + "</citation>\n"
376                         + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
377                         + "</reference>\n" + "<reference key=\"2\">\n"
378                         + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n"
379                         + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n"
380                         + "<authorList>\n"
381                         + "<person name=\"Szafranska B.\"/>\n"
382                         + "<person name=\"Miura R.\"/>\n"
383                         + "<person name=\"Ghosh D.\"/>\n"
384                         + "<person name=\"Ezashi T.\"/>\n"
385                         + "<person name=\"Xie S.\"/>\n"
386                         + "<person name=\"Roberts R.M.\"/>\n"
387                         + "<person name=\"Green J.A.\"/>\n"
388                         + "</authorList>\n"
389                         + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n"
390                         + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n"
391                         + "</citation>\n"
392                         + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
393                         + "<source>\n" + "<tissue>Placenta</tissue>\n"
394                         + "</source>\n" + "</reference>\n"
395                         + "<comment type=\"subcellular location\">\n"
396                         + "<subcellularLocation>\n"
397                         + "<location>Secreted</location>\n"
398                         + "<location>Extracellular space</location>\n"
399                         + "</subcellularLocation>\n" + "</comment>\n"
400                         + "<comment type=\"tissue specificity\">\n"
401                         + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n"
402                         + "</comment>\n"
403                         + "<comment type=\"developmental stage\">\n"
404                         + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n"
405                         + "</comment>\n" + "<comment type=\"similarity\">\n"
406                         + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n"
407                         + "</comment>\n"
408                         + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n"
409                         + "<dbReference type=\"EMBL\" id=\"U39763\">\n"
410                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
411                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
412                         + "</dbReference>\n"
413                         + "<dbReference type=\"EMBL\" id=\"U41421\">\n"
414                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
415                         + "<property type=\"status\" value=\"JOINED\"/>\n"
416                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
417                         + "</dbReference>\n"
418                         + "<dbReference type=\"EMBL\" id=\"U41422\">\n"
419                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
420                         + "<property type=\"status\" value=\"JOINED\"/>\n"
421                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
422                         + "</dbReference>\n"
423                         + "<dbReference type=\"EMBL\" id=\"U39199\">\n"
424                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
425                         + "<property type=\"status\" value=\"JOINED\"/>\n"
426                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
427                         + "</dbReference>\n"
428                         + "<dbReference type=\"EMBL\" id=\"U41423\">\n"
429                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
430                         + "<property type=\"status\" value=\"JOINED\"/>\n"
431                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
432                         + "</dbReference>\n"
433                         + "<dbReference type=\"EMBL\" id=\"U41424\">\n"
434                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
435                         + "<property type=\"status\" value=\"JOINED\"/>\n"
436                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
437                         + "</dbReference>\n"
438                         + "<dbReference type=\"EMBL\" id=\"U39762\">\n"
439                         + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
440                         + "<property type=\"status\" value=\"JOINED\"/>\n"
441                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
442                         + "</dbReference>\n"
443                         + "<dbReference type=\"EMBL\" id=\"L34361\">\n"
444                         + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n"
445                         + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
446                         + "</dbReference>\n"
447                         + "<dbReference type=\"PIR\" id=\"I46617\">\n"
448                         + "<property type=\"entry name\" value=\"I46617\"/>\n"
449                         + "</dbReference>\n"
450                         + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n"
451                         + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n"
452                         + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n"
453                         + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n"
454                         + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n"
455                         + "</dbReference>\n"
456                         + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n"
457                         + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n"
458                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
459                         + "</dbReference>\n"
460                         + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n"
461                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
462                         + "</dbReference>\n"
463                         + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n"
464                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
465                         + "</dbReference>\n"
466                         + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n"
467                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
468                         + "</dbReference>\n"
469                         + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n"
470                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
471                         + "</dbReference>\n"
472                         + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n"
473                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
474                         + "</dbReference>\n"
475                         + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n"
476                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
477                         + "</dbReference>\n"
478                         + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n"
479                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
480                         + "</dbReference>\n"
481                         + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n"
482                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
483                         + "</dbReference>\n"
484                         + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n"
485                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
486                         + "</dbReference>\n"
487                         + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n"
488                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
489                         + "</dbReference>\n"
490                         + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n"
491                         + "<property type=\"component\" value=\"Unplaced\"/>\n"
492                         + "</dbReference>\n"
493                         + "<dbReference type=\"GO\" id=\"GO:0005615\">\n"
494                         + "<property type=\"term\" value=\"C:extracellular space\"/>\n"
495                         + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n"
496                         + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n"
497                         + "</dbReference>\n"
498                         + "<dbReference type=\"GO\" id=\"GO:0004190\">\n"
499                         + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n"
500                         + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
501                         + "<property type=\"project\" value=\"GO_Central\"/>\n"
502                         + "</dbReference>\n"
503                         + "<dbReference type=\"GO\" id=\"GO:0006508\">\n"
504                         + "<property type=\"term\" value=\"P:proteolysis\"/>\n"
505                         + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
506                         + "<property type=\"project\" value=\"GO_Central\"/>\n"
507                         + "</dbReference>\n"
508                         + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n"
509                         + "<property type=\"match status\" value=\"1\"/>\n"
510                         + "</dbReference>\n"
511                         + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n"
512                         + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n"
513                         + "<property type=\"match status\" value=\"3\"/>\n"
514                         + "</dbReference>\n"
515                         + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n"
516                         + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n"
517                         + "</dbReference>\n"
518                         + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n"
519                         + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n"
520                         + "</dbReference>\n"
521                         + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n"
522                         + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n"
523                         + "</dbReference>\n"
524                         + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n"
525                         + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
526                         + "</dbReference>\n"
527                         + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n"
528                         + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n"
529                         + "</dbReference>\n"
530                         + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n"
531                         + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n"
532                         + "<property type=\"match status\" value=\"1\"/>\n"
533                         + "</dbReference>\n"
534                         + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n"
535                         + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n"
536                         + "<property type=\"match status\" value=\"1\"/>\n"
537                         + "</dbReference>\n"
538                         + "<dbReference type=\"Pfam\" id=\"PF07966\">\n"
539                         + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n"
540                         + "<property type=\"match status\" value=\"1\"/>\n"
541                         + "</dbReference>\n"
542                         + "<dbReference type=\"Pfam\" id=\"PF00026\">\n"
543                         + "<property type=\"entry name\" value=\"Asp\"/>\n"
544                         + "<property type=\"match status\" value=\"2\"/>\n"
545                         + "</dbReference>\n"
546                         + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n"
547                         + "<property type=\"entry name\" value=\"PEPSIN\"/>\n"
548                         + "</dbReference>\n"
549                         + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n"
550                         + "<property type=\"entry name\" value=\"Acid proteases\"/>\n"
551                         + "<property type=\"match status\" value=\"2\"/>\n"
552                         + "</dbReference>\n"
553                         + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n"
554                         + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n"
555                         + "<property type=\"match status\" value=\"2\"/>\n"
556                         + "</dbReference>\n"
557                         + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n"
558                         + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
559                         + "<property type=\"match status\" value=\"1\"/>\n"
560                         + "</dbReference>\n"
561                         + "<proteinExistence type=\"evidence at transcript level\"/>\n"
562                         + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n"
563                         + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n"
564                         + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n"
565                         + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n"
566                         + "<keyword id=\"KW-0645\">Protease</keyword>\n"
567                         + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n"
568                         + "<keyword id=\"KW-0964\">Secreted</keyword>\n"
569                         + "<keyword id=\"KW-0732\">Signal</keyword>\n"
570                         + "<keyword id=\"KW-0865\">Zymogen</keyword>\n"
571                         + "<feature type=\"signal peptide\" evidence=\"2\">\n"
572                         + "<location>\n" + "<begin position=\"1\"/>\n"
573                         + "<end position=\"15\"/>\n" + "</location>\n"
574                         + "</feature>\n"
575                         + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n"
576                         + "<location>\n" + "<begin position=\"16\"/>\n"
577                         + "<end status=\"unknown\"/>\n" + "</location>\n"
578                         + "</feature>\n"
579                         + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n"
580                         + "<location>\n" + "<begin status=\"unknown\"/>\n"
581                         + "<end position=\"420\"/>\n" + "</location>\n"
582                         + "</feature>\n"
583                         + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n"
584                         + "<location>\n" + "<begin position=\"76\"/>\n"
585                         + "<end position=\"417\"/>\n" + "</location>\n"
586                         + "</feature>\n"
587                         + "<feature type=\"active site\" evidence=\"4\">\n"
588                         + "<location>\n" + "<position position=\"94\"/>\n"
589                         + "</location>\n" + "</feature>\n"
590                         + "<feature type=\"active site\" evidence=\"4\">\n"
591                         + "<location>\n" + "<position position=\"277\"/>\n"
592                         + "</location>\n" + "</feature>\n"
593                         + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
594                         + "<location>\n" + "<position position=\"56\"/>\n"
595                         + "</location>\n" + "</feature>\n"
596                         + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
597                         + "<location>\n" + "<position position=\"79\"/>\n"
598                         + "</location>\n" + "</feature>\n"
599                         + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
600                         + "<location>\n" + "<begin position=\"107\"/>\n"
601                         + "<end position=\"112\"/>\n" + "</location>\n"
602                         + "</feature>\n"
603                         + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
604                         + "<location>\n" + "<begin position=\"268\"/>\n"
605                         + "<end position=\"272\"/>\n" + "</location>\n"
606                         + "</feature>\n"
607                         + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
608                         + "<location>\n" + "<begin position=\"341\"/>\n"
609                         + "<end position=\"376\"/>\n" + "</location>\n"
610                         + "</feature>\n"
611                         + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n"
612                         + "<location>\n" + "<begin position=\"335\"/>\n"
613                         + "<end position=\"367\"/>\n" + "</location>\n"
614                         + "</feature>\n"
615                         + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n"
616                         + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n"
617                         + "<evidence type=\"ECO:0000255\" key=\"3\">\n"
618                         + "<source>\n"
619                         + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n"
620                         + "</source>\n" + "</evidence>\n"
621                         + "<evidence type=\"ECO:0000255\" key=\"4\">\n"
622                         + "<source>\n"
623                         + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n"
624                         + "</source>\n" + "</evidence>\n"
625                         + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n"
626                         + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n"
627                         + "</entry>\n"
628                         + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n"
629                         + "</uniprot>") } };
630   }
631
632   @Test(groups = "Functional", dataProvider = "problemEntries")
633   public void testimportOfProblemEntries(String entry)
634   {
635     Uniprot u = new Uniprot();
636     InputStream is = new ByteArrayInputStream(entry.getBytes());
637     List<Entry> entries = u.getUniprotEntries(is);
638     assertEquals(1, entries.size());
639     SequenceI sq = u.uniprotEntryToSequence(entries.get(0));
640     assertNotNull(sq);
641   }
642 }