2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.dbsources;
23 import static org.testng.Assert.assertFalse;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertTrue;
29 import java.io.ByteArrayInputStream;
30 import java.io.InputStream;
31 import java.io.UnsupportedEncodingException;
32 import java.math.BigInteger;
33 import java.util.List;
35 import org.testng.Assert;
36 import org.testng.annotations.BeforeClass;
37 import org.testng.annotations.DataProvider;
38 import org.testng.annotations.Test;
40 import jalview.datamodel.DBRefEntry;
41 import jalview.datamodel.DBRefSource;
42 import jalview.datamodel.SequenceFeature;
43 import jalview.datamodel.SequenceI;
44 import jalview.gui.JvOptionPane;
45 import jalview.util.DBRefUtils;
46 import jalview.xml.binding.uniprot.DbReferenceType;
47 import jalview.xml.binding.uniprot.Entry;
48 import jalview.xml.binding.uniprot.FeatureType;
49 import jalview.xml.binding.uniprot.LocationType;
50 import jalview.xml.binding.uniprot.PositionType;
52 public class UniprotTest
55 @BeforeClass(alwaysRun = true)
56 public void setUpJvOptionPane()
58 JvOptionPane.setInteractiveMode(false);
59 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
62 // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
63 private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
64 + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
65 + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
66 + "<accession>A9CKP4</accession>"
67 + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>"
68 + "<name>A9CKP4_AGRT6</name>"
69 + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
70 + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
71 + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
72 + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
73 + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
74 + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
75 + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
76 + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
77 + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
78 + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
79 + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
80 + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
81 + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
82 + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
86 * Test the method that unmarshals XML to a Uniprot model
88 * @throws UnsupportedEncodingException
90 @Test(groups = { "Functional" })
91 public void testGetUniprotEntries() throws UnsupportedEncodingException
93 Uniprot u = new Uniprot();
94 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
95 List<Entry> entries = u.getUniprotEntries(is);
96 assertEquals(1, entries.size());
97 Entry entry = entries.get(0);
98 assertEquals(2, entry.getName().size());
99 assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
100 assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
101 assertEquals(2, entry.getAccession().size());
102 assertEquals("A9CKP4", entry.getAccession().get(0));
103 assertEquals("A9CKP5", entry.getAccession().get(1));
105 assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
107 assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
108 .getRecommendedName().getFullName().getValue());
111 * Check sequence features
113 List<FeatureType> features = entry.getFeature();
114 assertEquals(9, features.size());
115 FeatureType sf = features.get(0);
116 assertEquals("signal peptide", sf.getType());
117 assertNull(sf.getDescription());
118 assertNull(sf.getStatus());
119 assertNull(sf.getLocation().getPosition());
120 assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
121 assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
122 sf = features.get(1);
123 assertEquals("propeptide", sf.getType());
124 assertEquals("Activation peptide", sf.getDescription());
125 assertNull(sf.getLocation().getPosition());
126 assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
127 assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
128 sf = features.get(2);
129 assertEquals("chain", sf.getType());
130 assertEquals("Granzyme B", sf.getDescription());
131 assertNull(sf.getLocation().getPosition());
132 assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
133 assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
135 sf = features.get(3);
136 assertEquals("sequence variant", sf.getType());
137 assertNull(sf.getDescription());
139 sf.getLocation().getPosition().getPosition().intValue());
140 assertNull(sf.getLocation().getBegin());
141 assertNull(sf.getLocation().getEnd());
143 sf = features.get(4);
144 assertEquals("sequence variant", sf.getType());
145 assertEquals("Pathogenic", sf.getDescription());
147 sf.getLocation().getPosition().getPosition().intValue());
148 assertNull(sf.getLocation().getBegin());
149 assertNull(sf.getLocation().getEnd());
151 sf = features.get(5);
152 assertEquals("sequence variant", sf.getType());
153 assertEquals("Pathogenic", sf.getDescription());
155 sf.getLocation().getPosition().getPosition().intValue());
156 assertNull(sf.getLocation().getBegin());
157 assertNull(sf.getLocation().getEnd());
159 sf = features.get(6);
160 assertEquals("sequence variant", sf.getType());
161 assertEquals("Foo", sf.getDescription());
163 sf.getLocation().getPosition().getPosition().intValue());
164 assertNull(sf.getLocation().getBegin());
165 assertNull(sf.getLocation().getEnd());
166 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu"
167 + "<br/> " + "p.Met42LeuMetVal Foo</html>");
169 sf = features.get(7);
170 assertNull(sf.getLocation().getPosition());
171 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
172 assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
173 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu"
174 + "<br/> " + "p.MetLeu42LeuMetVal Foo</html>");
176 sf = features.get(8);
177 assertNull(sf.getLocation().getPosition());
178 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
179 assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
180 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu"
181 + "<br/> " + "p.MLML42LMVK Foo Too</html>");
184 * Check cross-references
186 List<DbReferenceType> xrefs = entry.getDbReference();
187 assertEquals(3, xrefs.size());
189 DbReferenceType xref = xrefs.get(0);
190 assertEquals("2FSQ", xref.getId());
191 assertEquals("PDB", xref.getType());
192 assertEquals("X-ray",
193 Uniprot.getProperty(xref.getProperty(), "method"));
195 Uniprot.getProperty(xref.getProperty(), "resolution"));
198 assertEquals("2FSR", xref.getId());
199 assertEquals("PDBsum", xref.getType());
200 assertTrue(xref.getProperty().isEmpty());
203 assertEquals("AE007869", xref.getId());
204 assertEquals("EMBL", xref.getType());
205 assertEquals("AAK85932.1",
206 Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
207 assertEquals("Genomic_DNA",
208 Uniprot.getProperty(xref.getProperty(), "molecule type"));
211 @Test(groups = { "Functional" })
212 public void testGetUniprotSequence() throws UnsupportedEncodingException
214 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
215 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
216 SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
218 assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL
219 assertEquals(seq.getSequenceAsString(),
220 seq.createDatasetSequence().getSequenceAsString());
221 assertEquals(2, seq.getPrimaryDBRefs().size());
222 List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(),
224 assertEquals(1, res.size());
225 assertTrue(res.get(0).isCanonical());
226 res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
227 DBRefSource.UNIPROT);
228 assertEquals(2, res.size());
230 * NB this test fragile - relies on ordering being preserved
232 assertTrue(res.get(0).isCanonical());
233 assertFalse(res.get(1).isCanonical());
235 // check version is preserved for EMBLCDS
236 res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
237 assertEquals(1, res.size());
238 // Ideally we would expect AAK85932.1 -> AAK85932
239 // assertTrue("1".equals(res.get(0).getVersion()));
240 // but it also passes through DBrefUtils.ensurePrimaries which adds
241 // (promoted) to the version string
242 // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
244 assertEquals("1 (promoted)", (res.get(0).getVersion()));
248 * Test the method that formats the sequence id
250 * @throws UnsupportedEncodingException
252 @Test(groups = { "Functional" })
253 public void testGetUniprotEntryId() throws UnsupportedEncodingException
255 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
256 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
259 * name formatted with Uniprot Entry name
261 String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
262 assertEquals(expectedName, Uniprot.getUniprotEntryId(entry));
266 * Test the method that formats the sequence description
268 * @throws UnsupportedEncodingException
270 @Test(groups = { "Functional" })
271 public void testGetUniprotEntryDescription()
272 throws UnsupportedEncodingException
274 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
275 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
277 assertEquals("Mitogen-activated protein kinase 13",
278 Uniprot.getUniprotEntryDescription(entry));
281 @Test(groups = { "Functional" })
282 public void testGetDescription()
284 FeatureType ft = new FeatureType();
285 assertEquals("", Uniprot.getDescription(ft));
287 ft.setDescription("Hello");
288 assertEquals("Hello", Uniprot.getDescription(ft));
290 ft.setLocation(new LocationType());
291 ft.getLocation().setPosition(new PositionType());
292 ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
294 ft.getVariation().add("y");
295 assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
297 // multiple variants generate an html description over more than one line
298 ft.getVariation().add("W");
299 assertEquals("<html>p.Lys23Tyr<br/> p.Lys23Trp Hello</html>",
300 Uniprot.getDescription(ft));
304 * up to 3 bases (original or variant) are shown using 3 letter code
306 ft.getVariation().clear();
307 ft.getVariation().add("KWE");
308 ft.setOriginal("KLS");
309 assertEquals("p.LysLeuSer23LysTrpGlu Hello",
310 Uniprot.getDescription(ft));
312 // adding a fourth original base switches to single letter code
313 ft.setOriginal("KLST");
314 assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
316 // adding a fourth variant switches to single letter code
317 ft.getVariation().clear();
318 ft.getVariation().add("KWES");
319 assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
321 ft.getVariation().clear();
322 ft.getVariation().add("z"); // unknown variant - fails gracefully
324 assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
326 ft.getVariation().clear(); // variant missing - is ignored
327 assertEquals("Hello", Uniprot.getDescription(ft));
330 public static String Q29079 = Q29079 = new String(
331 "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n"
332 + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n"
333 + "<accession>Q29079</accession>\n"
334 + "<accession>Q29017</accession>\n"
335 + "<name>PAG2_PIG</name>\n" + "<protein>\n"
336 + "<recommendedName>\n"
337 + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n"
338 + "<shortName>PAG 2</shortName>\n"
339 + "<ecNumber>3.4.23.-</ecNumber>\n"
340 + "</recommendedName>\n" + "</protein>\n" + "<gene>\n"
341 + "<name type=\"primary\">PAG2</name>\n" + "</gene>\n"
343 + "<name type=\"scientific\">Sus scrofa</name>\n"
344 + "<name type=\"common\">Pig</name>\n"
345 + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n"
346 + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n"
347 + "<taxon>Metazoa</taxon>\n" + "<taxon>Chordata</taxon>\n"
348 + "<taxon>Craniata</taxon>\n"
349 + "<taxon>Vertebrata</taxon>\n"
350 + "<taxon>Euteleostomi</taxon>\n"
351 + "<taxon>Mammalia</taxon>\n"
352 + "<taxon>Eutheria</taxon>\n"
353 + "<taxon>Laurasiatheria</taxon>\n"
354 + "<taxon>Artiodactyla</taxon>\n"
355 + "<taxon>Suina</taxon>\n" + "<taxon>Suidae</taxon>\n"
356 + "<taxon>Sus</taxon>\n" + "</lineage>\n"
357 + "</organism>\n" + "<reference key=\"1\">\n"
358 + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n"
359 + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n"
360 + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
361 + "<person name=\"Xie S.\"/>\n"
362 + "<person name=\"Green J.\"/>\n"
363 + "<person name=\"Roberts R.M.\"/>\n" + "</authorList>\n"
364 + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n"
365 + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n"
367 + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
368 + "</reference>\n" + "<reference key=\"2\">\n"
369 + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n"
370 + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n"
371 + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
372 + "<person name=\"Miura R.\"/>\n"
373 + "<person name=\"Ghosh D.\"/>\n"
374 + "<person name=\"Ezashi T.\"/>\n"
375 + "<person name=\"Xie S.\"/>\n"
376 + "<person name=\"Roberts R.M.\"/>\n"
377 + "<person name=\"Green J.A.\"/>\n" + "</authorList>\n"
378 + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n"
379 + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n"
381 + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
382 + "<source>\n" + "<tissue>Placenta</tissue>\n"
383 + "</source>\n" + "</reference>\n"
384 + "<comment type=\"subcellular location\">\n"
385 + "<subcellularLocation>\n"
386 + "<location>Secreted</location>\n"
387 + "<location>Extracellular space</location>\n"
388 + "</subcellularLocation>\n" + "</comment>\n"
389 + "<comment type=\"tissue specificity\">\n"
390 + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n"
392 + "<comment type=\"developmental stage\">\n"
393 + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n"
394 + "</comment>\n" + "<comment type=\"similarity\">\n"
395 + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n"
397 + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n"
398 + "<dbReference type=\"EMBL\" id=\"U39763\">\n"
399 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
400 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
402 + "<dbReference type=\"EMBL\" id=\"U41421\">\n"
403 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
404 + "<property type=\"status\" value=\"JOINED\"/>\n"
405 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
407 + "<dbReference type=\"EMBL\" id=\"U41422\">\n"
408 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
409 + "<property type=\"status\" value=\"JOINED\"/>\n"
410 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
412 + "<dbReference type=\"EMBL\" id=\"U39199\">\n"
413 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
414 + "<property type=\"status\" value=\"JOINED\"/>\n"
415 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
417 + "<dbReference type=\"EMBL\" id=\"U41423\">\n"
418 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
419 + "<property type=\"status\" value=\"JOINED\"/>\n"
420 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
422 + "<dbReference type=\"EMBL\" id=\"U41424\">\n"
423 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
424 + "<property type=\"status\" value=\"JOINED\"/>\n"
425 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
427 + "<dbReference type=\"EMBL\" id=\"U39762\">\n"
428 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
429 + "<property type=\"status\" value=\"JOINED\"/>\n"
430 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
432 + "<dbReference type=\"EMBL\" id=\"L34361\">\n"
433 + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n"
434 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
436 + "<dbReference type=\"PIR\" id=\"I46617\">\n"
437 + "<property type=\"entry name\" value=\"I46617\"/>\n"
439 + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n"
440 + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n"
441 + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n"
442 + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n"
443 + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n"
445 + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n"
446 + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n"
447 + "<property type=\"component\" value=\"Unplaced\"/>\n"
449 + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n"
450 + "<property type=\"component\" value=\"Unplaced\"/>\n"
452 + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n"
453 + "<property type=\"component\" value=\"Unplaced\"/>\n"
455 + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n"
456 + "<property type=\"component\" value=\"Unplaced\"/>\n"
458 + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n"
459 + "<property type=\"component\" value=\"Unplaced\"/>\n"
461 + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n"
462 + "<property type=\"component\" value=\"Unplaced\"/>\n"
464 + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n"
465 + "<property type=\"component\" value=\"Unplaced\"/>\n"
467 + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n"
468 + "<property type=\"component\" value=\"Unplaced\"/>\n"
470 + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n"
471 + "<property type=\"component\" value=\"Unplaced\"/>\n"
473 + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n"
474 + "<property type=\"component\" value=\"Unplaced\"/>\n"
476 + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n"
477 + "<property type=\"component\" value=\"Unplaced\"/>\n"
479 + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n"
480 + "<property type=\"component\" value=\"Unplaced\"/>\n"
482 + "<dbReference type=\"GO\" id=\"GO:0005615\">\n"
483 + "<property type=\"term\" value=\"C:extracellular space\"/>\n"
484 + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n"
485 + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n"
487 + "<dbReference type=\"GO\" id=\"GO:0004190\">\n"
488 + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n"
489 + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
490 + "<property type=\"project\" value=\"GO_Central\"/>\n"
492 + "<dbReference type=\"GO\" id=\"GO:0006508\">\n"
493 + "<property type=\"term\" value=\"P:proteolysis\"/>\n"
494 + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
495 + "<property type=\"project\" value=\"GO_Central\"/>\n"
497 + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n"
498 + "<property type=\"match status\" value=\"1\"/>\n"
500 + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n"
501 + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n"
502 + "<property type=\"match status\" value=\"3\"/>\n"
504 + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n"
505 + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n"
507 + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n"
508 + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n"
510 + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n"
511 + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n"
513 + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n"
514 + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
516 + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n"
517 + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n"
519 + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n"
520 + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n"
521 + "<property type=\"match status\" value=\"1\"/>\n"
523 + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n"
524 + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n"
525 + "<property type=\"match status\" value=\"1\"/>\n"
527 + "<dbReference type=\"Pfam\" id=\"PF07966\">\n"
528 + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n"
529 + "<property type=\"match status\" value=\"1\"/>\n"
531 + "<dbReference type=\"Pfam\" id=\"PF00026\">\n"
532 + "<property type=\"entry name\" value=\"Asp\"/>\n"
533 + "<property type=\"match status\" value=\"2\"/>\n"
535 + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n"
536 + "<property type=\"entry name\" value=\"PEPSIN\"/>\n"
538 + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n"
539 + "<property type=\"entry name\" value=\"Acid proteases\"/>\n"
540 + "<property type=\"match status\" value=\"2\"/>\n"
542 + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n"
543 + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n"
544 + "<property type=\"match status\" value=\"2\"/>\n"
546 + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n"
547 + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
548 + "<property type=\"match status\" value=\"1\"/>\n"
550 + "<proteinExistence type=\"evidence at transcript level\"/>\n"
551 + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n"
552 + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n"
553 + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n"
554 + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n"
555 + "<keyword id=\"KW-0645\">Protease</keyword>\n"
556 + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n"
557 + "<keyword id=\"KW-0964\">Secreted</keyword>\n"
558 + "<keyword id=\"KW-0732\">Signal</keyword>\n"
559 + "<keyword id=\"KW-0865\">Zymogen</keyword>\n"
560 + "<feature type=\"signal peptide\" evidence=\"2\">\n"
561 + "<location>\n" + "<begin position=\"1\"/>\n"
562 + "<end position=\"15\"/>\n" + "</location>\n"
564 + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n"
565 + "<location>\n" + "<begin position=\"16\"/>\n"
566 + "<end status=\"unknown\"/>\n" + "</location>\n"
568 + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n"
569 + "<location>\n" + "<begin status=\"unknown\"/>\n"
570 + "<end position=\"420\"/>\n" + "</location>\n"
572 + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n"
573 + "<location>\n" + "<begin position=\"76\"/>\n"
574 + "<end position=\"417\"/>\n" + "</location>\n"
576 + "<feature type=\"active site\" evidence=\"4\">\n"
577 + "<location>\n" + "<position position=\"94\"/>\n"
578 + "</location>\n" + "</feature>\n"
579 + "<feature type=\"active site\" evidence=\"4\">\n"
580 + "<location>\n" + "<position position=\"277\"/>\n"
581 + "</location>\n" + "</feature>\n"
582 + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
583 + "<location>\n" + "<position position=\"56\"/>\n"
584 + "</location>\n" + "</feature>\n"
585 + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
586 + "<location>\n" + "<position position=\"79\"/>\n"
587 + "</location>\n" + "</feature>\n"
588 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
589 + "<location>\n" + "<begin position=\"107\"/>\n"
590 + "<end position=\"112\"/>\n" + "</location>\n"
592 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
593 + "<location>\n" + "<begin position=\"268\"/>\n"
594 + "<end position=\"272\"/>\n" + "</location>\n"
596 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
597 + "<location>\n" + "<begin position=\"341\"/>\n"
598 + "<end position=\"376\"/>\n" + "</location>\n"
600 + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n"
601 + "<location>\n" + "<begin position=\"335\"/>\n"
602 + "<end position=\"367\"/>\n" + "</location>\n"
604 + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n"
605 + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n"
606 + "<evidence type=\"ECO:0000255\" key=\"3\">\n"
608 + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n"
609 + "</source>\n" + "</evidence>\n"
610 + "<evidence type=\"ECO:0000255\" key=\"4\">\n"
612 + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n"
613 + "</source>\n" + "</evidence>\n"
614 + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n"
615 + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n"
617 + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n"
621 public Object[][] problemEntries()
623 return new Object[][] { new Object[] { Q29079 } };
626 @Test(groups = "Functional", dataProvider = "problemEntries")
627 public SequenceI testimportOfProblemEntries(String entry)
629 Uniprot u = new Uniprot();
630 InputStream is = new ByteArrayInputStream(entry.getBytes());
631 List<Entry> entries = u.getUniprotEntries(is);
632 assertEquals(1, entries.size());
633 SequenceI sq = u.uniprotEntryToSequence(entries.get(0));
638 @Test(groups = "Functional")
639 public void checkIndefiniteSequenceFeatures()
641 SequenceI upseq = testimportOfProblemEntries(Q29079);
642 List<SequenceFeature> sf = upseq.getFeatures()
643 .getPositionalFeatures("chain");
645 assertTrue(sf.size() == 1);
646 SequenceFeature chainFeaure = sf.get(0);
647 assertTrue(chainFeaure.getBegin() == 1);
648 assertTrue(chainFeaure.getEnd() == upseq.getEnd());
649 assertNotNull(chainFeaure.getValueAsString("start_status"));
650 assertNull(chainFeaure.getValueAsString("end_status"));
652 "unknown".equals(chainFeaure.getValueAsString("start_status")));