X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=test%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblFileTest.java;h=906436fd755265bce8e67144ac0420ad54980408;hb=02d6aa2077a261d41db77a0158f2b4b779a36398;hp=fda25e3d6709b344e6b28e642ba757c217299dc3;hpb=5071850e8470077a8ac2bdc6a53908c70ad5526d;p=jalview.git diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index fda25e3..906436f 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -25,59 +25,45 @@ import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.DBRefEntry; -import java.io.StringReader; import java.util.Vector; import org.testng.annotations.Test; public class EmblFileTest { - // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml - private static final String TESTDATA = "" - + "" - + "" - + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" - + "L-lactate dehydrogenasechutney" - + "" - + "" - + "" - + "" - + "L-lactate dehydrogenase A-chainpickle" - + "MSLKDHLIHNKeith" - + "" - + "GTGACG"; @Test(groups = { "Functional" }) public void testGetEmblFile() { - Vector entries = EmblFile.getEmblFile( - new StringReader(TESTDATA)).getEntries(); + Vector entries = EmblTestHelper.getEmblFile().getEntries(); assertEquals(1, entries.size()); EmblEntry entry = entries.get(0); - assertEquals("X53828", entry.getAccession()); - assertEquals( - "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)", - entry.getDesc()); - assertEquals("2005-04-18", entry.getLastUpdated()); - assertEquals("mRNA", entry.getMoleculeType()); + assertEquals("X07547", entry.getAccession()); + assertEquals("C. trachomatis plasmid", entry.getDescription()); + assertEquals("STD", entry.getDataClass()); + assertEquals("PRO", entry.getTaxonomicDivision()); + assertEquals("1999-02-10", entry.getLastUpdatedDate()); + assertEquals("58", entry.getLastUpdatedRelease()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); + assertEquals("genomic DNA", entry.getMoleculeType()); assertEquals("1", entry.getSequenceVersion()); - assertEquals("3", entry.getEntryVersion()); + assertEquals("8", entry.getEntryVersion()); assertEquals("linear", entry.getTopology()); - assertEquals("1575", entry.getSequenceLength()); + assertEquals("7499", entry.getSequenceLength()); /* * FIXME these assertions fail - values are null - why?? Adding or removing * attributes in the test XML modifies behaviour. eg. inserting an attribute * _before_ lastUpdated results in a null value in this field. */ - // assertEquals("25", entry.getRCreated()); - // assertEquals("83", entry.getRLastUpdated()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); assertEquals(2, entry.getKeywords().size()); - assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0)); - assertEquals("chutney", entry.getKeywords().get(1)); + assertEquals("plasmid", entry.getKeywords().get(0)); + assertEquals("unidentified reading frame", entry.getKeywords().get(1)); /* * dbrefs @@ -85,52 +71,98 @@ public class EmblFileTest assertEquals(2, entry.getDbRefs().size()); DBRefEntry dbref = entry.getDbRefs().get(0); assertEquals("EuropePMC", dbref.getSource()); - assertEquals("PMC1460223", dbref.getAccessionId()); - assertEquals("9649548", dbref.getVersion()); + assertEquals("PMC107176", dbref.getAccessionId()); + assertEquals("9573186", dbref.getVersion()); dbref = entry.getDbRefs().get(1); assertEquals("MD5", dbref.getSource()); - assertEquals("d3b68", dbref.getAccessionId()); + assertEquals("ac73317", dbref.getAccessionId()); // blank version has been converted to "0" assertEquals("0", dbref.getVersion()); /* - * sequence feature for CDS + * three sequence features for CDS + */ + assertEquals(3, entry.getFeatures().size()); + /* + * first CDS */ - assertEquals(1, entry.getFeatures().size()); EmblFeature ef = entry.getFeatures().get(0); assertEquals("CDS", ef.getName()); - assertEquals("60..1058", ef.getLocation()); + assertEquals("complement(46..57)", ef.getLocation()); assertEquals(2, ef.getDbRefs().size()); dbref = ef.getDbRefs().get(0); - assertEquals("GOA", dbref.getSource()); - assertEquals("P00340", dbref.getAccessionId()); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM4", dbref.getAccessionId()); assertEquals("2.1", dbref.getVersion()); dbref = ef.getDbRefs().get(1); - assertEquals("InterPro", dbref.getSource()); - assertEquals("IPR001236", dbref.getAccessionId()); - // blank version converted to "0": + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("P0CE20", dbref.getAccessionId()); + // blank version gets converted to "0": assertEquals("0", dbref.getVersion()); - assertEquals(2, ef.getQualifiers().size()); - - // feature qualifiers + // CDS feature qualifiers + assertEquals(3, ef.getQualifiers().size()); Qualifier q = ef.getQualifiers().get(0); assertEquals("note", q.getName()); assertEquals(2, q.getValues().length); - assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]); + assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]); assertEquals("pickle", q.getValues()[1]); assertNull(q.getEvidence()); q = ef.getQualifiers().get(1); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30420.1", q.getValues()[0]); + q = ef.getQualifiers().get(2); assertEquals("translation", q.getName()); assertEquals(1, q.getValues().length); - assertEquals("MSLKDHLIHN", q.getValues()[0]); + assertEquals("MLCF", q.getValues()[0]); assertEquals(1, q.getEvidence().length); assertEquals("Keith", q.getEvidence()[0]); /* - * Sequence + * second CDS + */ + ef = entry.getFeatures().get(1); + assertEquals("CDS", ef.getName()); + assertEquals("4..15", ef.getLocation()); + assertEquals(1, ef.getDbRefs().size()); + dbref = ef.getDbRefs().get(0); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM3", dbref.getAccessionId()); + assertEquals("0", dbref.getVersion()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30421.1", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSSS", q.getValues()[0]); + + /* + * third CDS + */ + ef = entry.getFeatures().get(2); + assertEquals("CDS", ef.getName()); + assertEquals("join(4..6,10..15)", ef.getLocation()); + assertNull(ef.getDbRefs()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA12345.6", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSS", q.getValues()[0]); + + /* + * Sequence - verify newline not converted to space (JAL-2029) */ EmblSequence seq = entry.getSequence(); - assertEquals("GTGACG", seq.getSequence()); + assertEquals( + "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT", + seq.getSequence()); /* * getSequence() converts empty DBRefEntry.version to "0"