+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNull;
-import static org.testng.AssertJUnit.assertTrue;
import jalview.datamodel.DBRefEntry;
-import java.io.StringReader;
import java.util.Vector;
import org.testng.annotations.Test;
public class EmblFileTest
{
- // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
- private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
- + "<EMBL_Services>"
- + "<entry accession=\"X53828\" version=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\">"
- + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
- + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
- + "<dbreference db=\"EuropePMC\" primary=\"PMC1460223\" secondary=\"9649548\" />"
- + "<feature name=\"CDS\"><dbreference db=\"GOA\" primary=\"P00340\" secondary=\"2.1\" /><dbreference db=\"InterPro\" primary=\"IPR001236\" />"
- + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
- + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
- + "<location type=\"single\" complement=\"true\">"
- + "<locationElement type=\"range\" accession=\"X53828\" version=\"1\" complement=\"false\">"
- + "<basePosition type=\"simple\">60</basePosition><basePosition type=\"join\">1058</basePosition>"
- + "</locationElement></location></feature>"
- + "<sequence type=\"mRNA\" version=\"2\">GTGACG</sequence></entry></EMBL_Services>";
@Test(groups = { "Functional" })
public void testGetEmblFile()
{
- Vector<EmblEntry> entries = EmblFile.getEmblFile(
- new StringReader(TESTDATA)).getEntries();
+ Vector<EmblEntry> entries = EmblTestHelper.getEmblFile().getEntries();
assertEquals(1, entries.size());
EmblEntry entry = entries.get(0);
- assertEquals("X53828", entry.getAccession());
- assertEquals(
- "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
- entry.getDesc());
- assertEquals("2005-04-18", entry.getLastUpdated());
+ assertEquals("X07547", entry.getAccession());
+ assertEquals("C. trachomatis plasmid", entry.getDescription());
+ assertEquals("STD", entry.getDataClass());
+ assertEquals("PRO", entry.getTaxonomicDivision());
+ assertEquals("1999-02-10", entry.getLastUpdatedDate());
+ assertEquals("58", entry.getLastUpdatedRelease());
+ assertEquals("1988-11-10", entry.getFirstPublicDate());
+ assertEquals("18", entry.getFirstPublicRelease());
+ assertEquals("genomic DNA", entry.getMoleculeType());
+ assertEquals("1", entry.getSequenceVersion());
+ assertEquals("8", entry.getEntryVersion());
+ assertEquals("linear", entry.getTopology());
+ assertEquals("7499", entry.getSequenceLength());
/*
* FIXME these assertions fail - values are null - why?? Adding or removing
* attributes in the test XML modifies behaviour. eg. inserting an attribute
* _before_ lastUpdated results in a null value in this field.
*/
- // assertEquals("25", entry.getRCreated());
- // assertEquals("83", entry.getRLastUpdated());
+ assertEquals("1988-11-10", entry.getFirstPublicDate());
+ assertEquals("18", entry.getFirstPublicRelease());
assertEquals(2, entry.getKeywords().size());
- assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0));
- assertEquals("chutney", entry.getKeywords().get(1));
+ assertEquals("plasmid", entry.getKeywords().get(0));
+ assertEquals("unidentified reading frame", entry.getKeywords().get(1));
/*
* dbrefs
*/
- assertEquals(1, entry.getDbRefs().size());
+ assertEquals(2, entry.getDbRefs().size());
DBRefEntry dbref = entry.getDbRefs().get(0);
assertEquals("EuropePMC", dbref.getSource());
- assertEquals("PMC1460223", dbref.getAccessionId());
- assertEquals("9649548", dbref.getVersion());
+ assertEquals("PMC107176", dbref.getAccessionId());
+ assertEquals("9573186", dbref.getVersion());
+ dbref = entry.getDbRefs().get(1);
+ assertEquals("MD5", dbref.getSource());
+ assertEquals("ac73317", dbref.getAccessionId());
+ // blank version has been converted to "0"
+ assertEquals("0", dbref.getVersion());
/*
- * sequence features
+ * three sequence features for CDS
+ */
+ assertEquals(3, entry.getFeatures().size());
+ /*
+ * first CDS
*/
- assertEquals(1, entry.getFeatures().size());
EmblFeature ef = entry.getFeatures().get(0);
assertEquals("CDS", ef.getName());
+ assertEquals("complement(46..57)", ef.getLocation());
assertEquals(2, ef.getDbRefs().size());
dbref = ef.getDbRefs().get(0);
- assertEquals("GOA", dbref.getSource());
- assertEquals("P00340", dbref.getAccessionId());
+ assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+ assertEquals("B0BCM4", dbref.getAccessionId());
assertEquals("2.1", dbref.getVersion());
dbref = ef.getDbRefs().get(1);
- assertEquals("InterPro", dbref.getSource());
- assertEquals("IPR001236", dbref.getAccessionId());
- assertEquals("", dbref.getVersion());
- assertEquals(2, ef.getQualifiers().size());
-
- // feature qualifiers
+ assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+ assertEquals("P0CE20", dbref.getAccessionId());
+ // blank version gets converted to "0":
+ assertEquals("0", dbref.getVersion());
+ // CDS feature qualifiers
+ assertEquals(3, ef.getQualifiers().size());
Qualifier q = ef.getQualifiers().get(0);
assertEquals("note", q.getName());
assertEquals(2, q.getValues().length);
- assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]);
+ assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]);
assertEquals("pickle", q.getValues()[1]);
assertNull(q.getEvidence());
q = ef.getQualifiers().get(1);
+ assertEquals("protein_id", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("CAA30420.1", q.getValues()[0]);
+ q = ef.getQualifiers().get(2);
assertEquals("translation", q.getName());
assertEquals(1, q.getValues().length);
- assertEquals("MSLKDHLIHN", q.getValues()[0]);
+ assertEquals("MLCF", q.getValues()[0]);
assertEquals(1, q.getEvidence().length);
assertEquals("Keith", q.getEvidence()[0]);
- // feature locations
- assertEquals(1, ef.getLocations().size());
- EmblFeatureLocations fl = ef.getLocations().get(0);
- assertEquals("single", fl.getLocationType());
- assertTrue(fl.isLocationComplement());
- assertEquals(1, fl.getLocElements().size());
- EmblFeatureLocElement le = fl.getLocElements().get(0);
- assertEquals("range", le.getType());
- assertEquals("X53828", le.getAccession());
- assertEquals("1", le.getVersion());
- assertFalse(le.isComplement());
- assertEquals(2, le.getBasePositions().length);
- BasePosition bp = le.getBasePositions()[0];
- assertEquals("simple", bp.getType());
- assertEquals("60", bp.getPos());
- bp = le.getBasePositions()[1];
- assertEquals("join", bp.getType());
- assertEquals("1058", bp.getPos());
+ /*
+ * second CDS
+ */
+ ef = entry.getFeatures().get(1);
+ assertEquals("CDS", ef.getName());
+ assertEquals("4..15", ef.getLocation());
+ assertEquals(1, ef.getDbRefs().size());
+ dbref = ef.getDbRefs().get(0);
+ assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+ assertEquals("B0BCM3", dbref.getAccessionId());
+ assertEquals("0", dbref.getVersion());
+ assertEquals(2, ef.getQualifiers().size());
+ q = ef.getQualifiers().get(0);
+ assertEquals("protein_id", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("CAA30421.1", q.getValues()[0]);
+ q = ef.getQualifiers().get(1);
+ assertEquals("translation", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("MSSS", q.getValues()[0]);
+
+ /*
+ * third CDS
+ */
+ ef = entry.getFeatures().get(2);
+ assertEquals("CDS", ef.getName());
+ assertEquals("join(4..6,10..15)", ef.getLocation());
+ assertNull(ef.getDbRefs());
+ assertEquals(2, ef.getQualifiers().size());
+ q = ef.getQualifiers().get(0);
+ assertEquals("protein_id", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("CAA12345.6", q.getValues()[0]);
+ q = ef.getQualifiers().get(1);
+ assertEquals("translation", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("MSS", q.getValues()[0]);
/*
- * Sequence
+ * Sequence - verify newline not converted to space (JAL-2029)
*/
EmblSequence seq = entry.getSequence();
- assertEquals("mRNA", seq.getType());
- assertEquals("2", seq.getVersion());
- assertEquals("GTGACG", seq.getSequence());
+ assertEquals(
+ "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT",
+ seq.getSequence());
+
+ /*
+ * getSequence() converts empty DBRefEntry.version to "0"
+ */
+ assertEquals("0", entry.getDbRefs().get(1).getVersion());
+ assertEquals("0", entry.getFeatures().get(0).getDbRefs().get(1)
+ .getVersion());
}
}