X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblFileTest.java;h=906436fd755265bce8e67144ac0420ad54980408;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=e58cf3550d889103c9591ae267f49135415e0790;hpb=52288466dd1e71946a06fd1e6ea15fa8e652c693;p=jalview.git diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index e58cf35..906436f 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -1,126 +1,174 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.DBRefEntry; -import java.io.StringReader; import java.util.Vector; import org.testng.annotations.Test; public class EmblFileTest { - // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml - private static final String TESTDATA = "" - + "" - + "" - + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" - + "L-lactate dehydrogenasechutney" - + "" - + "" - + "L-lactate dehydrogenase A-chainpickle" - + "MSLKDHLIHNKeith" - + "" - + "" - + "601058" - + "" - + "GTGACG"; @Test(groups = { "Functional" }) public void testGetEmblFile() { - Vector entries = EmblFile.getEmblFile( - new StringReader(TESTDATA)).getEntries(); + Vector entries = EmblTestHelper.getEmblFile().getEntries(); assertEquals(1, entries.size()); EmblEntry entry = entries.get(0); - assertEquals("X53828", entry.getAccession()); - assertEquals( - "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)", - entry.getDesc()); - assertEquals("2005-04-18", entry.getLastUpdated()); + assertEquals("X07547", entry.getAccession()); + assertEquals("C. trachomatis plasmid", entry.getDescription()); + assertEquals("STD", entry.getDataClass()); + assertEquals("PRO", entry.getTaxonomicDivision()); + assertEquals("1999-02-10", entry.getLastUpdatedDate()); + assertEquals("58", entry.getLastUpdatedRelease()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); + assertEquals("genomic DNA", entry.getMoleculeType()); + assertEquals("1", entry.getSequenceVersion()); + assertEquals("8", entry.getEntryVersion()); + assertEquals("linear", entry.getTopology()); + assertEquals("7499", entry.getSequenceLength()); /* * FIXME these assertions fail - values are null - why?? Adding or removing * attributes in the test XML modifies behaviour. eg. inserting an attribute * _before_ lastUpdated results in a null value in this field. */ - // assertEquals("25", entry.getRCreated()); - // assertEquals("83", entry.getRLastUpdated()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); assertEquals(2, entry.getKeywords().size()); - assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0)); - assertEquals("chutney", entry.getKeywords().get(1)); + assertEquals("plasmid", entry.getKeywords().get(0)); + assertEquals("unidentified reading frame", entry.getKeywords().get(1)); /* * dbrefs */ - assertEquals(1, entry.getDbRefs().size()); + assertEquals(2, entry.getDbRefs().size()); DBRefEntry dbref = entry.getDbRefs().get(0); assertEquals("EuropePMC", dbref.getSource()); - assertEquals("PMC1460223", dbref.getAccessionId()); - assertEquals("9649548", dbref.getVersion()); + assertEquals("PMC107176", dbref.getAccessionId()); + assertEquals("9573186", dbref.getVersion()); + dbref = entry.getDbRefs().get(1); + assertEquals("MD5", dbref.getSource()); + assertEquals("ac73317", dbref.getAccessionId()); + // blank version has been converted to "0" + assertEquals("0", dbref.getVersion()); /* - * sequence features + * three sequence features for CDS + */ + assertEquals(3, entry.getFeatures().size()); + /* + * first CDS */ - assertEquals(1, entry.getFeatures().size()); EmblFeature ef = entry.getFeatures().get(0); assertEquals("CDS", ef.getName()); + assertEquals("complement(46..57)", ef.getLocation()); assertEquals(2, ef.getDbRefs().size()); dbref = ef.getDbRefs().get(0); - assertEquals("GOA", dbref.getSource()); - assertEquals("P00340", dbref.getAccessionId()); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM4", dbref.getAccessionId()); assertEquals("2.1", dbref.getVersion()); dbref = ef.getDbRefs().get(1); - assertEquals("InterPro", dbref.getSource()); - assertEquals("IPR001236", dbref.getAccessionId()); - assertEquals("", dbref.getVersion()); - assertEquals(2, ef.getQualifiers().size()); - - // feature qualifiers + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("P0CE20", dbref.getAccessionId()); + // blank version gets converted to "0": + assertEquals("0", dbref.getVersion()); + // CDS feature qualifiers + assertEquals(3, ef.getQualifiers().size()); Qualifier q = ef.getQualifiers().get(0); assertEquals("note", q.getName()); assertEquals(2, q.getValues().length); - assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]); + assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]); assertEquals("pickle", q.getValues()[1]); assertNull(q.getEvidence()); q = ef.getQualifiers().get(1); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30420.1", q.getValues()[0]); + q = ef.getQualifiers().get(2); assertEquals("translation", q.getName()); assertEquals(1, q.getValues().length); - assertEquals("MSLKDHLIHN", q.getValues()[0]); + assertEquals("MLCF", q.getValues()[0]); assertEquals(1, q.getEvidence().length); assertEquals("Keith", q.getEvidence()[0]); - // feature locations - assertEquals(1, ef.getLocations().size()); - EmblFeatureLocations fl = ef.getLocations().get(0); - assertEquals("single", fl.getLocationType()); - assertTrue(fl.isLocationComplement()); - assertEquals(1, fl.getLocElements().size()); - EmblFeatureLocElement le = fl.getLocElements().get(0); - assertEquals("range", le.getType()); - assertEquals("X53828", le.getAccession()); - assertEquals("1", le.getVersion()); - assertFalse(le.isComplement()); - assertEquals(2, le.getBasePositions().length); - BasePosition bp = le.getBasePositions()[0]; - assertEquals("simple", bp.getType()); - assertEquals("60", bp.getPos()); - bp = le.getBasePositions()[1]; - assertEquals("join", bp.getType()); - assertEquals("1058", bp.getPos()); + /* + * second CDS + */ + ef = entry.getFeatures().get(1); + assertEquals("CDS", ef.getName()); + assertEquals("4..15", ef.getLocation()); + assertEquals(1, ef.getDbRefs().size()); + dbref = ef.getDbRefs().get(0); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM3", dbref.getAccessionId()); + assertEquals("0", dbref.getVersion()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30421.1", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSSS", q.getValues()[0]); + + /* + * third CDS + */ + ef = entry.getFeatures().get(2); + assertEquals("CDS", ef.getName()); + assertEquals("join(4..6,10..15)", ef.getLocation()); + assertNull(ef.getDbRefs()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA12345.6", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSS", q.getValues()[0]); /* - * Sequence + * Sequence - verify newline not converted to space (JAL-2029) */ EmblSequence seq = entry.getSequence(); - assertEquals("mRNA", seq.getType()); - assertEquals("2", seq.getVersion()); - assertEquals("GTGACG", seq.getSequence()); + assertEquals( + "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT", + seq.getSequence()); + + /* + * getSequence() converts empty DBRefEntry.version to "0" + */ + assertEquals("0", entry.getDbRefs().get(1).getVersion()); + assertEquals("0", entry.getFeatures().get(0).getDbRefs().get(1) + .getVersion()); } }