2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.dbsources;
23 import static org.testng.Assert.assertFalse;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertTrue;
29 import java.io.ByteArrayInputStream;
30 import java.io.InputStream;
31 import java.io.UnsupportedEncodingException;
32 import java.math.BigInteger;
33 import java.util.List;
35 import org.testng.Assert;
36 import org.testng.annotations.BeforeClass;
37 import org.testng.annotations.DataProvider;
38 import org.testng.annotations.Test;
40 import jalview.datamodel.DBRefEntry;
41 import jalview.datamodel.DBRefSource;
42 import jalview.datamodel.SequenceI;
43 import jalview.gui.JvOptionPane;
44 import jalview.util.DBRefUtils;
45 import jalview.xml.binding.uniprot.DbReferenceType;
46 import jalview.xml.binding.uniprot.Entry;
47 import jalview.xml.binding.uniprot.FeatureType;
48 import jalview.xml.binding.uniprot.LocationType;
49 import jalview.xml.binding.uniprot.PositionType;
51 public class UniprotTest
54 @BeforeClass(alwaysRun = true)
55 public void setUpJvOptionPane()
57 JvOptionPane.setInteractiveMode(false);
58 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
61 // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
62 private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
63 + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
64 + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
65 + "<accession>A9CKP4</accession>"
66 + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>"
67 + "<name>A9CKP4_AGRT6</name>"
68 + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
69 + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
70 + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
71 + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
72 + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
73 + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
74 + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
75 + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
76 + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
77 + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
78 + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
79 + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
80 + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
81 + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
85 * Test the method that unmarshals XML to a Uniprot model
87 * @throws UnsupportedEncodingException
89 @Test(groups = { "Functional" })
90 public void testGetUniprotEntries() throws UnsupportedEncodingException
92 Uniprot u = new Uniprot();
93 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
94 List<Entry> entries = u.getUniprotEntries(is);
95 assertEquals(1, entries.size());
96 Entry entry = entries.get(0);
97 assertEquals(2, entry.getName().size());
98 assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
99 assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
100 assertEquals(2, entry.getAccession().size());
101 assertEquals("A9CKP4", entry.getAccession().get(0));
102 assertEquals("A9CKP5", entry.getAccession().get(1));
104 assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
106 assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
107 .getRecommendedName().getFullName().getValue());
110 * Check sequence features
112 List<FeatureType> features = entry.getFeature();
113 assertEquals(9, features.size());
114 FeatureType sf = features.get(0);
115 assertEquals("signal peptide", sf.getType());
116 assertNull(sf.getDescription());
117 assertNull(sf.getStatus());
118 assertNull(sf.getLocation().getPosition());
119 assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
120 assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
121 sf = features.get(1);
122 assertEquals("propeptide", sf.getType());
123 assertEquals("Activation peptide", sf.getDescription());
124 assertNull(sf.getLocation().getPosition());
125 assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
126 assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
127 sf = features.get(2);
128 assertEquals("chain", sf.getType());
129 assertEquals("Granzyme B", sf.getDescription());
130 assertNull(sf.getLocation().getPosition());
131 assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
132 assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
134 sf = features.get(3);
135 assertEquals("sequence variant", sf.getType());
136 assertNull(sf.getDescription());
138 sf.getLocation().getPosition().getPosition().intValue());
139 assertNull(sf.getLocation().getBegin());
140 assertNull(sf.getLocation().getEnd());
142 sf = features.get(4);
143 assertEquals("sequence variant", sf.getType());
144 assertEquals("Pathogenic", sf.getDescription());
146 sf.getLocation().getPosition().getPosition().intValue());
147 assertNull(sf.getLocation().getBegin());
148 assertNull(sf.getLocation().getEnd());
150 sf = features.get(5);
151 assertEquals("sequence variant", sf.getType());
152 assertEquals("Pathogenic", sf.getDescription());
154 sf.getLocation().getPosition().getPosition().intValue());
155 assertNull(sf.getLocation().getBegin());
156 assertNull(sf.getLocation().getEnd());
158 sf = features.get(6);
159 assertEquals("sequence variant", sf.getType());
160 assertEquals("Foo", sf.getDescription());
162 sf.getLocation().getPosition().getPosition().intValue());
163 assertNull(sf.getLocation().getBegin());
164 assertNull(sf.getLocation().getEnd());
165 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu"
166 + "<br/> " + "p.Met42LeuMetVal Foo</html>");
168 sf = features.get(7);
169 assertNull(sf.getLocation().getPosition());
170 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
171 assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
172 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu"
173 + "<br/> " + "p.MetLeu42LeuMetVal Foo</html>");
175 sf = features.get(8);
176 assertNull(sf.getLocation().getPosition());
177 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
178 assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
179 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu"
180 + "<br/> " + "p.MLML42LMVK Foo Too</html>");
183 * Check cross-references
185 List<DbReferenceType> xrefs = entry.getDbReference();
186 assertEquals(3, xrefs.size());
188 DbReferenceType xref = xrefs.get(0);
189 assertEquals("2FSQ", xref.getId());
190 assertEquals("PDB", xref.getType());
191 assertEquals("X-ray",
192 Uniprot.getProperty(xref.getProperty(), "method"));
194 Uniprot.getProperty(xref.getProperty(), "resolution"));
197 assertEquals("2FSR", xref.getId());
198 assertEquals("PDBsum", xref.getType());
199 assertTrue(xref.getProperty().isEmpty());
202 assertEquals("AE007869", xref.getId());
203 assertEquals("EMBL", xref.getType());
204 assertEquals("AAK85932.1",
205 Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
206 assertEquals("Genomic_DNA",
207 Uniprot.getProperty(xref.getProperty(), "molecule type"));
210 @Test(groups = { "Functional" })
211 public void testGetUniprotSequence() throws UnsupportedEncodingException
213 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
214 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
215 SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
217 assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL
218 assertEquals(seq.getSequenceAsString(),
219 seq.createDatasetSequence().getSequenceAsString());
220 assertEquals(2, seq.getPrimaryDBRefs().size());
221 List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(),
223 assertEquals(1, res.size());
224 assertTrue(res.get(0).isCanonical());
225 res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
226 DBRefSource.UNIPROT);
227 assertEquals(2, res.size());
229 * NB this test fragile - relies on ordering being preserved
231 assertTrue(res.get(0).isCanonical());
232 assertFalse(res.get(1).isCanonical());
234 // check version is preserved for EMBLCDS
235 res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
236 assertEquals(1, res.size());
237 // Ideally we would expect AAK85932.1 -> AAK85932
238 // assertTrue("1".equals(res.get(0).getVersion()));
239 // but it also passes through DBrefUtils.ensurePrimaries which adds
240 // (promoted) to the version string
241 // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
243 assertEquals("1 (promoted)", (res.get(0).getVersion()));
247 * Test the method that formats the sequence id
249 * @throws UnsupportedEncodingException
251 @Test(groups = { "Functional" })
252 public void testGetUniprotEntryId() throws UnsupportedEncodingException
254 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
255 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
258 * name formatted with Uniprot Entry name
260 String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
261 assertEquals(expectedName, Uniprot.getUniprotEntryId(entry));
265 * Test the method that formats the sequence description
267 * @throws UnsupportedEncodingException
269 @Test(groups = { "Functional" })
270 public void testGetUniprotEntryDescription()
271 throws UnsupportedEncodingException
273 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
274 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
276 assertEquals("Mitogen-activated protein kinase 13",
277 Uniprot.getUniprotEntryDescription(entry));
280 @Test(groups = { "Functional" })
281 public void testGetDescription()
283 FeatureType ft = new FeatureType();
284 assertEquals("", Uniprot.getDescription(ft));
286 ft.setDescription("Hello");
287 assertEquals("Hello", Uniprot.getDescription(ft));
289 ft.setLocation(new LocationType());
290 ft.getLocation().setPosition(new PositionType());
291 ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
293 ft.getVariation().add("y");
294 assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
296 // multiple variants generate an html description over more than one line
297 ft.getVariation().add("W");
298 assertEquals("<html>p.Lys23Tyr<br/> p.Lys23Trp Hello</html>",
299 Uniprot.getDescription(ft));
303 * up to 3 bases (original or variant) are shown using 3 letter code
305 ft.getVariation().clear();
306 ft.getVariation().add("KWE");
307 ft.setOriginal("KLS");
308 assertEquals("p.LysLeuSer23LysTrpGlu Hello",
309 Uniprot.getDescription(ft));
311 // adding a fourth original base switches to single letter code
312 ft.setOriginal("KLST");
313 assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
315 // adding a fourth variant switches to single letter code
316 ft.getVariation().clear();
317 ft.getVariation().add("KWES");
318 assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
320 ft.getVariation().clear();
321 ft.getVariation().add("z"); // unknown variant - fails gracefully
323 assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
325 ft.getVariation().clear(); // variant missing - is ignored
326 assertEquals("Hello", Uniprot.getDescription(ft));
330 public Object[][] problemEntries()
332 return new Object[][] {
335 "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n"
336 + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n"
337 + "<accession>Q29079</accession>\n"
338 + "<accession>Q29017</accession>\n"
339 + "<name>PAG2_PIG</name>\n" + "<protein>\n"
340 + "<recommendedName>\n"
341 + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n"
342 + "<shortName>PAG 2</shortName>\n"
343 + "<ecNumber>3.4.23.-</ecNumber>\n"
344 + "</recommendedName>\n" + "</protein>\n"
346 + "<name type=\"primary\">PAG2</name>\n"
347 + "</gene>\n" + "<organism>\n"
348 + "<name type=\"scientific\">Sus scrofa</name>\n"
349 + "<name type=\"common\">Pig</name>\n"
350 + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n"
351 + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n"
352 + "<taxon>Metazoa</taxon>\n"
353 + "<taxon>Chordata</taxon>\n"
354 + "<taxon>Craniata</taxon>\n"
355 + "<taxon>Vertebrata</taxon>\n"
356 + "<taxon>Euteleostomi</taxon>\n"
357 + "<taxon>Mammalia</taxon>\n"
358 + "<taxon>Eutheria</taxon>\n"
359 + "<taxon>Laurasiatheria</taxon>\n"
360 + "<taxon>Artiodactyla</taxon>\n"
361 + "<taxon>Suina</taxon>\n"
362 + "<taxon>Suidae</taxon>\n" + "<taxon>Sus</taxon>\n"
363 + "</lineage>\n" + "</organism>\n"
364 + "<reference key=\"1\">\n"
365 + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n"
366 + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n"
368 + "<person name=\"Szafranska B.\"/>\n"
369 + "<person name=\"Xie S.\"/>\n"
370 + "<person name=\"Green J.\"/>\n"
371 + "<person name=\"Roberts R.M.\"/>\n"
373 + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n"
374 + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n"
376 + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
377 + "</reference>\n" + "<reference key=\"2\">\n"
378 + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n"
379 + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n"
381 + "<person name=\"Szafranska B.\"/>\n"
382 + "<person name=\"Miura R.\"/>\n"
383 + "<person name=\"Ghosh D.\"/>\n"
384 + "<person name=\"Ezashi T.\"/>\n"
385 + "<person name=\"Xie S.\"/>\n"
386 + "<person name=\"Roberts R.M.\"/>\n"
387 + "<person name=\"Green J.A.\"/>\n"
389 + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n"
390 + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n"
392 + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
393 + "<source>\n" + "<tissue>Placenta</tissue>\n"
394 + "</source>\n" + "</reference>\n"
395 + "<comment type=\"subcellular location\">\n"
396 + "<subcellularLocation>\n"
397 + "<location>Secreted</location>\n"
398 + "<location>Extracellular space</location>\n"
399 + "</subcellularLocation>\n" + "</comment>\n"
400 + "<comment type=\"tissue specificity\">\n"
401 + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n"
403 + "<comment type=\"developmental stage\">\n"
404 + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n"
405 + "</comment>\n" + "<comment type=\"similarity\">\n"
406 + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n"
408 + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n"
409 + "<dbReference type=\"EMBL\" id=\"U39763\">\n"
410 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
411 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
413 + "<dbReference type=\"EMBL\" id=\"U41421\">\n"
414 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
415 + "<property type=\"status\" value=\"JOINED\"/>\n"
416 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
418 + "<dbReference type=\"EMBL\" id=\"U41422\">\n"
419 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
420 + "<property type=\"status\" value=\"JOINED\"/>\n"
421 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
423 + "<dbReference type=\"EMBL\" id=\"U39199\">\n"
424 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
425 + "<property type=\"status\" value=\"JOINED\"/>\n"
426 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
428 + "<dbReference type=\"EMBL\" id=\"U41423\">\n"
429 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
430 + "<property type=\"status\" value=\"JOINED\"/>\n"
431 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
433 + "<dbReference type=\"EMBL\" id=\"U41424\">\n"
434 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
435 + "<property type=\"status\" value=\"JOINED\"/>\n"
436 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
438 + "<dbReference type=\"EMBL\" id=\"U39762\">\n"
439 + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
440 + "<property type=\"status\" value=\"JOINED\"/>\n"
441 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
443 + "<dbReference type=\"EMBL\" id=\"L34361\">\n"
444 + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n"
445 + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
447 + "<dbReference type=\"PIR\" id=\"I46617\">\n"
448 + "<property type=\"entry name\" value=\"I46617\"/>\n"
450 + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n"
451 + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n"
452 + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n"
453 + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n"
454 + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n"
456 + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n"
457 + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n"
458 + "<property type=\"component\" value=\"Unplaced\"/>\n"
460 + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n"
461 + "<property type=\"component\" value=\"Unplaced\"/>\n"
463 + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n"
464 + "<property type=\"component\" value=\"Unplaced\"/>\n"
466 + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n"
467 + "<property type=\"component\" value=\"Unplaced\"/>\n"
469 + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n"
470 + "<property type=\"component\" value=\"Unplaced\"/>\n"
472 + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n"
473 + "<property type=\"component\" value=\"Unplaced\"/>\n"
475 + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n"
476 + "<property type=\"component\" value=\"Unplaced\"/>\n"
478 + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n"
479 + "<property type=\"component\" value=\"Unplaced\"/>\n"
481 + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n"
482 + "<property type=\"component\" value=\"Unplaced\"/>\n"
484 + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n"
485 + "<property type=\"component\" value=\"Unplaced\"/>\n"
487 + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n"
488 + "<property type=\"component\" value=\"Unplaced\"/>\n"
490 + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n"
491 + "<property type=\"component\" value=\"Unplaced\"/>\n"
493 + "<dbReference type=\"GO\" id=\"GO:0005615\">\n"
494 + "<property type=\"term\" value=\"C:extracellular space\"/>\n"
495 + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n"
496 + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n"
498 + "<dbReference type=\"GO\" id=\"GO:0004190\">\n"
499 + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n"
500 + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
501 + "<property type=\"project\" value=\"GO_Central\"/>\n"
503 + "<dbReference type=\"GO\" id=\"GO:0006508\">\n"
504 + "<property type=\"term\" value=\"P:proteolysis\"/>\n"
505 + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
506 + "<property type=\"project\" value=\"GO_Central\"/>\n"
508 + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n"
509 + "<property type=\"match status\" value=\"1\"/>\n"
511 + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n"
512 + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n"
513 + "<property type=\"match status\" value=\"3\"/>\n"
515 + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n"
516 + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n"
518 + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n"
519 + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n"
521 + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n"
522 + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n"
524 + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n"
525 + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
527 + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n"
528 + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n"
530 + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n"
531 + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n"
532 + "<property type=\"match status\" value=\"1\"/>\n"
534 + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n"
535 + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n"
536 + "<property type=\"match status\" value=\"1\"/>\n"
538 + "<dbReference type=\"Pfam\" id=\"PF07966\">\n"
539 + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n"
540 + "<property type=\"match status\" value=\"1\"/>\n"
542 + "<dbReference type=\"Pfam\" id=\"PF00026\">\n"
543 + "<property type=\"entry name\" value=\"Asp\"/>\n"
544 + "<property type=\"match status\" value=\"2\"/>\n"
546 + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n"
547 + "<property type=\"entry name\" value=\"PEPSIN\"/>\n"
549 + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n"
550 + "<property type=\"entry name\" value=\"Acid proteases\"/>\n"
551 + "<property type=\"match status\" value=\"2\"/>\n"
553 + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n"
554 + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n"
555 + "<property type=\"match status\" value=\"2\"/>\n"
557 + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n"
558 + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
559 + "<property type=\"match status\" value=\"1\"/>\n"
561 + "<proteinExistence type=\"evidence at transcript level\"/>\n"
562 + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n"
563 + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n"
564 + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n"
565 + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n"
566 + "<keyword id=\"KW-0645\">Protease</keyword>\n"
567 + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n"
568 + "<keyword id=\"KW-0964\">Secreted</keyword>\n"
569 + "<keyword id=\"KW-0732\">Signal</keyword>\n"
570 + "<keyword id=\"KW-0865\">Zymogen</keyword>\n"
571 + "<feature type=\"signal peptide\" evidence=\"2\">\n"
572 + "<location>\n" + "<begin position=\"1\"/>\n"
573 + "<end position=\"15\"/>\n" + "</location>\n"
575 + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n"
576 + "<location>\n" + "<begin position=\"16\"/>\n"
577 + "<end status=\"unknown\"/>\n" + "</location>\n"
579 + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n"
580 + "<location>\n" + "<begin status=\"unknown\"/>\n"
581 + "<end position=\"420\"/>\n" + "</location>\n"
583 + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n"
584 + "<location>\n" + "<begin position=\"76\"/>\n"
585 + "<end position=\"417\"/>\n" + "</location>\n"
587 + "<feature type=\"active site\" evidence=\"4\">\n"
588 + "<location>\n" + "<position position=\"94\"/>\n"
589 + "</location>\n" + "</feature>\n"
590 + "<feature type=\"active site\" evidence=\"4\">\n"
591 + "<location>\n" + "<position position=\"277\"/>\n"
592 + "</location>\n" + "</feature>\n"
593 + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
594 + "<location>\n" + "<position position=\"56\"/>\n"
595 + "</location>\n" + "</feature>\n"
596 + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
597 + "<location>\n" + "<position position=\"79\"/>\n"
598 + "</location>\n" + "</feature>\n"
599 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
600 + "<location>\n" + "<begin position=\"107\"/>\n"
601 + "<end position=\"112\"/>\n" + "</location>\n"
603 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
604 + "<location>\n" + "<begin position=\"268\"/>\n"
605 + "<end position=\"272\"/>\n" + "</location>\n"
607 + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
608 + "<location>\n" + "<begin position=\"341\"/>\n"
609 + "<end position=\"376\"/>\n" + "</location>\n"
611 + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n"
612 + "<location>\n" + "<begin position=\"335\"/>\n"
613 + "<end position=\"367\"/>\n" + "</location>\n"
615 + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n"
616 + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n"
617 + "<evidence type=\"ECO:0000255\" key=\"3\">\n"
619 + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n"
620 + "</source>\n" + "</evidence>\n"
621 + "<evidence type=\"ECO:0000255\" key=\"4\">\n"
623 + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n"
624 + "</source>\n" + "</evidence>\n"
625 + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n"
626 + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n"
628 + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n"
632 @Test(groups = "Functional", dataProvider = "problemEntries")
633 public void testimportOfProblemEntries(String entry)
635 Uniprot u = new Uniprot();
636 InputStream is = new ByteArrayInputStream(entry.getBytes());
637 List<Entry> entries = u.getUniprotEntries(is);
638 assertEquals(1, entries.size());
639 SequenceI sq = u.uniprotEntryToSequence(entries.get(0));