JAL-4090 JAL-4334 position status when start, end or pos is unknown is stored in...
[jalview.git] / test / jalview / ws / dbsources / UniprotTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ws.dbsources;
22
23 import static org.testng.Assert.assertFalse;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertTrue;
28
29 import java.io.ByteArrayInputStream;
30 import java.io.InputStream;
31 import java.io.UnsupportedEncodingException;
32 import java.math.BigInteger;
33 import java.util.List;
34
35 import org.testng.Assert;
36 import org.testng.annotations.BeforeClass;
37 import org.testng.annotations.DataProvider;
38 import org.testng.annotations.Test;
39
40 import jalview.datamodel.DBRefEntry;
41 import jalview.datamodel.DBRefSource;
42 import jalview.datamodel.SequenceFeature;
43 import jalview.datamodel.SequenceI;
44 import jalview.gui.JvOptionPane;
45 import jalview.util.DBRefUtils;
46 import jalview.xml.binding.uniprot.DbReferenceType;
47 import jalview.xml.binding.uniprot.Entry;
48 import jalview.xml.binding.uniprot.FeatureType;
49 import jalview.xml.binding.uniprot.LocationType;
50 import jalview.xml.binding.uniprot.PositionType;
51
52 public class UniprotTest
53 {
54
55   @BeforeClass(alwaysRun = true)
56   public void setUpJvOptionPane()
57   {
58     JvOptionPane.setInteractiveMode(false);
59     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60   }
61
62   // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
63   private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
64           + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
65           + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
66           + "<accession>A9CKP4</accession>"
67           + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>"
68           + "<name>A9CKP4_AGRT6</name>"
69           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
70           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
71           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
72           + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
73           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
74           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
75           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
76           + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
77           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
78           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
79           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
80           + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
81           + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
82           + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
83           + "</uniprot>";
84
85   /**
86    * Test the method that unmarshals XML to a Uniprot model
87    * 
88    * @throws UnsupportedEncodingException
89    */
90   @Test(groups = { "Functional" })
91   public void testGetUniprotEntries() throws UnsupportedEncodingException
92   {
93     Uniprot u = new Uniprot();
94     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
95     List<Entry> entries = u.getUniprotEntries(is);
96     assertEquals(1, entries.size());
97     Entry entry = entries.get(0);
98     assertEquals(2, entry.getName().size());
99     assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
100     assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
101     assertEquals(2, entry.getAccession().size());
102     assertEquals("A9CKP4", entry.getAccession().get(0));
103     assertEquals("A9CKP5", entry.getAccession().get(1));
104
105     assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
106
107     assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
108             .getRecommendedName().getFullName().getValue());
109
110     /*
111      * Check sequence features
112      */
113     List<FeatureType> features = entry.getFeature();
114     assertEquals(9, features.size());
115     FeatureType sf = features.get(0);
116     assertEquals("signal peptide", sf.getType());
117     assertNull(sf.getDescription());
118     assertNull(sf.getStatus());
119     assertNull(sf.getLocation().getPosition());
120     assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
121     assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
122     sf = features.get(1);
123     assertEquals("propeptide", sf.getType());
124     assertEquals("Activation peptide", sf.getDescription());
125     assertNull(sf.getLocation().getPosition());
126     assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
127     assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
128     sf = features.get(2);
129     assertEquals("chain", sf.getType());
130     assertEquals("Granzyme B", sf.getDescription());
131     assertNull(sf.getLocation().getPosition());
132     assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
133     assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
134
135     sf = features.get(3);
136     assertEquals("sequence variant", sf.getType());
137     assertNull(sf.getDescription());
138     assertEquals(41,
139             sf.getLocation().getPosition().getPosition().intValue());
140     assertNull(sf.getLocation().getBegin());
141     assertNull(sf.getLocation().getEnd());
142
143     sf = features.get(4);
144     assertEquals("sequence variant", sf.getType());
145     assertEquals("Pathogenic", sf.getDescription());
146     assertEquals(41,
147             sf.getLocation().getPosition().getPosition().intValue());
148     assertNull(sf.getLocation().getBegin());
149     assertNull(sf.getLocation().getEnd());
150
151     sf = features.get(5);
152     assertEquals("sequence variant", sf.getType());
153     assertEquals("Pathogenic", sf.getDescription());
154     assertEquals(41,
155             sf.getLocation().getPosition().getPosition().intValue());
156     assertNull(sf.getLocation().getBegin());
157     assertNull(sf.getLocation().getEnd());
158
159     sf = features.get(6);
160     assertEquals("sequence variant", sf.getType());
161     assertEquals("Foo", sf.getDescription());
162     assertEquals(42,
163             sf.getLocation().getPosition().getPosition().intValue());
164     assertNull(sf.getLocation().getBegin());
165     assertNull(sf.getLocation().getEnd());
166     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu"
167             + "<br/>&nbsp;&nbsp;" + "p.Met42LeuMetVal Foo</html>");
168
169     sf = features.get(7);
170     assertNull(sf.getLocation().getPosition());
171     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
172     assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
173     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu"
174             + "<br/>&nbsp;&nbsp;" + "p.MetLeu42LeuMetVal Foo</html>");
175
176     sf = features.get(8);
177     assertNull(sf.getLocation().getPosition());
178     assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
179     assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
180     Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu"
181             + "<br/>&nbsp;&nbsp;" + "p.MLML42LMVK Foo Too</html>");
182
183     /*
184      * Check cross-references
185      */
186     List<DbReferenceType> xrefs = entry.getDbReference();
187     assertEquals(3, xrefs.size());
188
189     DbReferenceType xref = xrefs.get(0);
190     assertEquals("2FSQ", xref.getId());
191     assertEquals("PDB", xref.getType());
192     assertEquals("X-ray",
193             Uniprot.getProperty(xref.getProperty(), "method"));
194     assertEquals("1.40",
195             Uniprot.getProperty(xref.getProperty(), "resolution"));
196
197     xref = xrefs.get(1);
198     assertEquals("2FSR", xref.getId());
199     assertEquals("PDBsum", xref.getType());
200     assertTrue(xref.getProperty().isEmpty());
201
202     xref = xrefs.get(2);
203     assertEquals("AE007869", xref.getId());
204     assertEquals("EMBL", xref.getType());
205     assertEquals("AAK85932.1",
206             Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
207     assertEquals("Genomic_DNA",
208             Uniprot.getProperty(xref.getProperty(), "molecule type"));
209   }
210
211   @Test(groups = { "Functional" })
212   public void testGetUniprotSequence() throws UnsupportedEncodingException
213   {
214     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
215     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
216     SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
217     assertNotNull(seq);
218     assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL
219     assertEquals(seq.getSequenceAsString(),
220             seq.createDatasetSequence().getSequenceAsString());
221     assertEquals(2, seq.getPrimaryDBRefs().size());
222     List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(),
223             "A9CKP4");
224     assertEquals(1, res.size());
225     assertTrue(res.get(0).isCanonical());
226     res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
227             DBRefSource.UNIPROT);
228     assertEquals(2, res.size());
229     /*
230      * NB this test fragile - relies on ordering being preserved
231      */
232     assertTrue(res.get(0).isCanonical());
233     assertFalse(res.get(1).isCanonical());
234
235     // check version is preserved for EMBLCDS
236     res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
237     assertEquals(1, res.size());
238     // Ideally we would expect AAK85932.1 -> AAK85932
239     // assertTrue("1".equals(res.get(0).getVersion()));
240     // but it also passes through DBrefUtils.ensurePrimaries which adds
241     // (promoted) to the version string
242     // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
243     // ignore it !
244     assertEquals("1 (promoted)", (res.get(0).getVersion()));
245   }
246
247   /**
248    * Test the method that formats the sequence id
249    * 
250    * @throws UnsupportedEncodingException
251    */
252   @Test(groups = { "Functional" })
253   public void testGetUniprotEntryId() throws UnsupportedEncodingException
254   {
255     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
256     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
257
258     /*
259      * name formatted with Uniprot Entry name
260      */
261     String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
262     assertEquals(expectedName, Uniprot.getUniprotEntryId(entry));
263   }
264
265   /**
266    * Test the method that formats the sequence description
267    * 
268    * @throws UnsupportedEncodingException
269    */
270   @Test(groups = { "Functional" })
271   public void testGetUniprotEntryDescription()
272           throws UnsupportedEncodingException
273   {
274     InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
275     Entry entry = new Uniprot().getUniprotEntries(is).get(0);
276
277     assertEquals("Mitogen-activated protein kinase 13",
278             Uniprot.getUniprotEntryDescription(entry));
279   }
280
281   @Test(groups = { "Functional" })
282   public void testGetDescription()
283   {
284     FeatureType ft = new FeatureType();
285     assertEquals("", Uniprot.getDescription(ft));
286
287     ft.setDescription("Hello");
288     assertEquals("Hello", Uniprot.getDescription(ft));
289
290     ft.setLocation(new LocationType());
291     ft.getLocation().setPosition(new PositionType());
292     ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
293     ft.setOriginal("K");
294     ft.getVariation().add("y");
295     assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
296
297     // multiple variants generate an html description over more than one line
298     ft.getVariation().add("W");
299     assertEquals("<html>p.Lys23Tyr<br/>&nbsp;&nbsp;p.Lys23Trp Hello</html>",
300             Uniprot.getDescription(ft));
301
302     /*
303      * indel cases
304      * up to 3 bases (original or variant) are shown using 3 letter code
305      */
306     ft.getVariation().clear();
307     ft.getVariation().add("KWE");
308     ft.setOriginal("KLS");
309     assertEquals("p.LysLeuSer23LysTrpGlu Hello",
310             Uniprot.getDescription(ft));
311
312     // adding a fourth original base switches to single letter code
313     ft.setOriginal("KLST");
314     assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
315
316     // adding a fourth variant switches to single letter code
317     ft.getVariation().clear();
318     ft.getVariation().add("KWES");
319     assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
320
321     ft.getVariation().clear();
322     ft.getVariation().add("z"); // unknown variant - fails gracefully
323     ft.setOriginal("K");
324     assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
325
326     ft.getVariation().clear(); // variant missing - is ignored
327     assertEquals("Hello", Uniprot.getDescription(ft));
328   }
329
330   public static String Q29079 = Q29079 = new String(
331           "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n"
332                   + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n"
333                   + "<accession>Q29079</accession>\n"
334                   + "<accession>Q29017</accession>\n"
335                   + "<name>PAG2_PIG</name>\n" + "<protein>\n"
336                   + "<recommendedName>\n"
337                   + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n"
338                   + "<shortName>PAG 2</shortName>\n"
339                   + "<ecNumber>3.4.23.-</ecNumber>\n"
340                   + "</recommendedName>\n" + "</protein>\n" + "<gene>\n"
341                   + "<name type=\"primary\">PAG2</name>\n" + "</gene>\n"
342                   + "<organism>\n"
343                   + "<name type=\"scientific\">Sus scrofa</name>\n"
344                   + "<name type=\"common\">Pig</name>\n"
345                   + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n"
346                   + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n"
347                   + "<taxon>Metazoa</taxon>\n" + "<taxon>Chordata</taxon>\n"
348                   + "<taxon>Craniata</taxon>\n"
349                   + "<taxon>Vertebrata</taxon>\n"
350                   + "<taxon>Euteleostomi</taxon>\n"
351                   + "<taxon>Mammalia</taxon>\n"
352                   + "<taxon>Eutheria</taxon>\n"
353                   + "<taxon>Laurasiatheria</taxon>\n"
354                   + "<taxon>Artiodactyla</taxon>\n"
355                   + "<taxon>Suina</taxon>\n" + "<taxon>Suidae</taxon>\n"
356                   + "<taxon>Sus</taxon>\n" + "</lineage>\n"
357                   + "</organism>\n" + "<reference key=\"1\">\n"
358                   + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n"
359                   + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n"
360                   + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
361                   + "<person name=\"Xie S.\"/>\n"
362                   + "<person name=\"Green J.\"/>\n"
363                   + "<person name=\"Roberts R.M.\"/>\n" + "</authorList>\n"
364                   + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n"
365                   + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n"
366                   + "</citation>\n"
367                   + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
368                   + "</reference>\n" + "<reference key=\"2\">\n"
369                   + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n"
370                   + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n"
371                   + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
372                   + "<person name=\"Miura R.\"/>\n"
373                   + "<person name=\"Ghosh D.\"/>\n"
374                   + "<person name=\"Ezashi T.\"/>\n"
375                   + "<person name=\"Xie S.\"/>\n"
376                   + "<person name=\"Roberts R.M.\"/>\n"
377                   + "<person name=\"Green J.A.\"/>\n" + "</authorList>\n"
378                   + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n"
379                   + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n"
380                   + "</citation>\n"
381                   + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
382                   + "<source>\n" + "<tissue>Placenta</tissue>\n"
383                   + "</source>\n" + "</reference>\n"
384                   + "<comment type=\"subcellular location\">\n"
385                   + "<subcellularLocation>\n"
386                   + "<location>Secreted</location>\n"
387                   + "<location>Extracellular space</location>\n"
388                   + "</subcellularLocation>\n" + "</comment>\n"
389                   + "<comment type=\"tissue specificity\">\n"
390                   + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n"
391                   + "</comment>\n"
392                   + "<comment type=\"developmental stage\">\n"
393                   + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n"
394                   + "</comment>\n" + "<comment type=\"similarity\">\n"
395                   + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n"
396                   + "</comment>\n"
397                   + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n"
398                   + "<dbReference type=\"EMBL\" id=\"U39763\">\n"
399                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
400                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
401                   + "</dbReference>\n"
402                   + "<dbReference type=\"EMBL\" id=\"U41421\">\n"
403                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
404                   + "<property type=\"status\" value=\"JOINED\"/>\n"
405                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
406                   + "</dbReference>\n"
407                   + "<dbReference type=\"EMBL\" id=\"U41422\">\n"
408                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
409                   + "<property type=\"status\" value=\"JOINED\"/>\n"
410                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
411                   + "</dbReference>\n"
412                   + "<dbReference type=\"EMBL\" id=\"U39199\">\n"
413                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
414                   + "<property type=\"status\" value=\"JOINED\"/>\n"
415                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
416                   + "</dbReference>\n"
417                   + "<dbReference type=\"EMBL\" id=\"U41423\">\n"
418                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
419                   + "<property type=\"status\" value=\"JOINED\"/>\n"
420                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
421                   + "</dbReference>\n"
422                   + "<dbReference type=\"EMBL\" id=\"U41424\">\n"
423                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
424                   + "<property type=\"status\" value=\"JOINED\"/>\n"
425                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
426                   + "</dbReference>\n"
427                   + "<dbReference type=\"EMBL\" id=\"U39762\">\n"
428                   + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
429                   + "<property type=\"status\" value=\"JOINED\"/>\n"
430                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
431                   + "</dbReference>\n"
432                   + "<dbReference type=\"EMBL\" id=\"L34361\">\n"
433                   + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n"
434                   + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
435                   + "</dbReference>\n"
436                   + "<dbReference type=\"PIR\" id=\"I46617\">\n"
437                   + "<property type=\"entry name\" value=\"I46617\"/>\n"
438                   + "</dbReference>\n"
439                   + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n"
440                   + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n"
441                   + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n"
442                   + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n"
443                   + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n"
444                   + "</dbReference>\n"
445                   + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n"
446                   + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n"
447                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
448                   + "</dbReference>\n"
449                   + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n"
450                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
451                   + "</dbReference>\n"
452                   + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n"
453                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
454                   + "</dbReference>\n"
455                   + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n"
456                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
457                   + "</dbReference>\n"
458                   + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n"
459                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
460                   + "</dbReference>\n"
461                   + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n"
462                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
463                   + "</dbReference>\n"
464                   + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n"
465                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
466                   + "</dbReference>\n"
467                   + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n"
468                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
469                   + "</dbReference>\n"
470                   + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n"
471                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
472                   + "</dbReference>\n"
473                   + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n"
474                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
475                   + "</dbReference>\n"
476                   + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n"
477                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
478                   + "</dbReference>\n"
479                   + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n"
480                   + "<property type=\"component\" value=\"Unplaced\"/>\n"
481                   + "</dbReference>\n"
482                   + "<dbReference type=\"GO\" id=\"GO:0005615\">\n"
483                   + "<property type=\"term\" value=\"C:extracellular space\"/>\n"
484                   + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n"
485                   + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n"
486                   + "</dbReference>\n"
487                   + "<dbReference type=\"GO\" id=\"GO:0004190\">\n"
488                   + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n"
489                   + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
490                   + "<property type=\"project\" value=\"GO_Central\"/>\n"
491                   + "</dbReference>\n"
492                   + "<dbReference type=\"GO\" id=\"GO:0006508\">\n"
493                   + "<property type=\"term\" value=\"P:proteolysis\"/>\n"
494                   + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
495                   + "<property type=\"project\" value=\"GO_Central\"/>\n"
496                   + "</dbReference>\n"
497                   + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n"
498                   + "<property type=\"match status\" value=\"1\"/>\n"
499                   + "</dbReference>\n"
500                   + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n"
501                   + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n"
502                   + "<property type=\"match status\" value=\"3\"/>\n"
503                   + "</dbReference>\n"
504                   + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n"
505                   + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n"
506                   + "</dbReference>\n"
507                   + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n"
508                   + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n"
509                   + "</dbReference>\n"
510                   + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n"
511                   + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n"
512                   + "</dbReference>\n"
513                   + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n"
514                   + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
515                   + "</dbReference>\n"
516                   + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n"
517                   + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n"
518                   + "</dbReference>\n"
519                   + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n"
520                   + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n"
521                   + "<property type=\"match status\" value=\"1\"/>\n"
522                   + "</dbReference>\n"
523                   + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n"
524                   + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n"
525                   + "<property type=\"match status\" value=\"1\"/>\n"
526                   + "</dbReference>\n"
527                   + "<dbReference type=\"Pfam\" id=\"PF07966\">\n"
528                   + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n"
529                   + "<property type=\"match status\" value=\"1\"/>\n"
530                   + "</dbReference>\n"
531                   + "<dbReference type=\"Pfam\" id=\"PF00026\">\n"
532                   + "<property type=\"entry name\" value=\"Asp\"/>\n"
533                   + "<property type=\"match status\" value=\"2\"/>\n"
534                   + "</dbReference>\n"
535                   + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n"
536                   + "<property type=\"entry name\" value=\"PEPSIN\"/>\n"
537                   + "</dbReference>\n"
538                   + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n"
539                   + "<property type=\"entry name\" value=\"Acid proteases\"/>\n"
540                   + "<property type=\"match status\" value=\"2\"/>\n"
541                   + "</dbReference>\n"
542                   + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n"
543                   + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n"
544                   + "<property type=\"match status\" value=\"2\"/>\n"
545                   + "</dbReference>\n"
546                   + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n"
547                   + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
548                   + "<property type=\"match status\" value=\"1\"/>\n"
549                   + "</dbReference>\n"
550                   + "<proteinExistence type=\"evidence at transcript level\"/>\n"
551                   + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n"
552                   + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n"
553                   + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n"
554                   + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n"
555                   + "<keyword id=\"KW-0645\">Protease</keyword>\n"
556                   + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n"
557                   + "<keyword id=\"KW-0964\">Secreted</keyword>\n"
558                   + "<keyword id=\"KW-0732\">Signal</keyword>\n"
559                   + "<keyword id=\"KW-0865\">Zymogen</keyword>\n"
560                   + "<feature type=\"signal peptide\" evidence=\"2\">\n"
561                   + "<location>\n" + "<begin position=\"1\"/>\n"
562                   + "<end position=\"15\"/>\n" + "</location>\n"
563                   + "</feature>\n"
564                   + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n"
565                   + "<location>\n" + "<begin position=\"16\"/>\n"
566                   + "<end status=\"unknown\"/>\n" + "</location>\n"
567                   + "</feature>\n"
568                   + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n"
569                   + "<location>\n" + "<begin status=\"unknown\"/>\n"
570                   + "<end position=\"420\"/>\n" + "</location>\n"
571                   + "</feature>\n"
572                   + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n"
573                   + "<location>\n" + "<begin position=\"76\"/>\n"
574                   + "<end position=\"417\"/>\n" + "</location>\n"
575                   + "</feature>\n"
576                   + "<feature type=\"active site\" evidence=\"4\">\n"
577                   + "<location>\n" + "<position position=\"94\"/>\n"
578                   + "</location>\n" + "</feature>\n"
579                   + "<feature type=\"active site\" evidence=\"4\">\n"
580                   + "<location>\n" + "<position position=\"277\"/>\n"
581                   + "</location>\n" + "</feature>\n"
582                   + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
583                   + "<location>\n" + "<position position=\"56\"/>\n"
584                   + "</location>\n" + "</feature>\n"
585                   + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
586                   + "<location>\n" + "<position position=\"79\"/>\n"
587                   + "</location>\n" + "</feature>\n"
588                   + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
589                   + "<location>\n" + "<begin position=\"107\"/>\n"
590                   + "<end position=\"112\"/>\n" + "</location>\n"
591                   + "</feature>\n"
592                   + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
593                   + "<location>\n" + "<begin position=\"268\"/>\n"
594                   + "<end position=\"272\"/>\n" + "</location>\n"
595                   + "</feature>\n"
596                   + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
597                   + "<location>\n" + "<begin position=\"341\"/>\n"
598                   + "<end position=\"376\"/>\n" + "</location>\n"
599                   + "</feature>\n"
600                   + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n"
601                   + "<location>\n" + "<begin position=\"335\"/>\n"
602                   + "<end position=\"367\"/>\n" + "</location>\n"
603                   + "</feature>\n"
604                   + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n"
605                   + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n"
606                   + "<evidence type=\"ECO:0000255\" key=\"3\">\n"
607                   + "<source>\n"
608                   + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n"
609                   + "</source>\n" + "</evidence>\n"
610                   + "<evidence type=\"ECO:0000255\" key=\"4\">\n"
611                   + "<source>\n"
612                   + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n"
613                   + "</source>\n" + "</evidence>\n"
614                   + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n"
615                   + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n"
616                   + "</entry>\n"
617                   + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n"
618                   + "</uniprot>");
619
620   @DataProvider
621   public Object[][] problemEntries()
622   {
623     return new Object[][] { new Object[] { Q29079 } };
624   }
625
626   @Test(groups = "Functional", dataProvider = "problemEntries")
627   public SequenceI testimportOfProblemEntries(String entry)
628   {
629     Uniprot u = new Uniprot();
630     InputStream is = new ByteArrayInputStream(entry.getBytes());
631     List<Entry> entries = u.getUniprotEntries(is);
632     assertEquals(1, entries.size());
633     SequenceI sq = u.uniprotEntryToSequence(entries.get(0));
634     assertNotNull(sq);
635     return sq;
636   }
637
638   @Test(groups = "Functional")
639   public void checkIndefiniteSequenceFeatures()
640   {
641     SequenceI upseq = testimportOfProblemEntries(Q29079);
642     List<SequenceFeature> sf = upseq.getFeatures()
643             .getPositionalFeatures("chain");
644     assertNotNull(sf);
645     assertTrue(sf.size() == 1);
646     SequenceFeature chainFeaure = sf.get(0);
647     assertTrue(chainFeaure.getBegin() == 1);
648     assertTrue(chainFeaure.getEnd() == upseq.getEnd());
649     assertNotNull(chainFeaure.getValueAsString("start_status"));
650     assertNull(chainFeaure.getValueAsString("end_status"));
651     assertTrue(
652             "unknown".equals(chainFeaure.getValueAsString("start_status")));
653   }
654 }