X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fio%2FEmblFlatFileTest.java;h=fdb8b9a7c66b89eb4fb4d2a11f755f009e5a21db;hb=e9a1c2c372f4bbf6cf658de3dba73ef326b20c20;hp=4ca826c457b1d1e694e830b1008762301f89db1a;hpb=25c0a38f60cee16b740a0b126badf790226dcdad;p=jalview.git diff --git a/test/jalview/io/EmblFlatFileTest.java b/test/jalview/io/EmblFlatFileTest.java index 4ca826c..fdb8b9a 100644 --- a/test/jalview/io/EmblFlatFileTest.java +++ b/test/jalview/io/EmblFlatFileTest.java @@ -1,11 +1,31 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.fail; -import static org.testng.AssertJUnit.assertNull; import java.io.File; import java.io.IOException; @@ -14,8 +34,10 @@ import java.util.Arrays; import java.util.List; import java.util.Set; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import jalview.bin.Console; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence.DBModList; @@ -26,6 +48,12 @@ import jalview.util.MapList; public class EmblFlatFileTest { + @BeforeClass(alwaysRun = true) + public void setUp() + { + Console.initLogger(); + } + /** * A fairly tough test, using J03321 (circular DNA), which has 8 CDS features, * one of them reverse strand @@ -39,7 +67,6 @@ public class EmblFlatFileTest File dataFile = new File("test/jalview/io/J03321.embl.txt"); FileParse fp = new FileParse(dataFile, DataSourceType.FILE); EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); - parser.parse(); List seqs = parser.getSeqs(); assertEquals(seqs.size(), 1); @@ -165,7 +192,7 @@ public class EmblFlatFileTest { assertEquals((ranges = map.getFromRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1579); - assertEquals(ranges.get(0)[1], 2934); + assertEquals(ranges.get(0)[1], 2931); // excludes stop 2934 assertEquals((ranges = map.getToRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1); assertEquals(ranges.get(0)[1], 451); @@ -177,7 +204,7 @@ public class EmblFlatFileTest { assertEquals((ranges = map.getFromRanges()).size(), 1); assertEquals(ranges.get(0)[0], 2928); - assertEquals(ranges.get(0)[1], 3992); + assertEquals(ranges.get(0)[1], 3989); // excludes stop 3992 assertEquals((ranges = map.getToRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1); assertEquals(ranges.get(0)[1], 354); @@ -186,7 +213,7 @@ public class EmblFlatFileTest { assertEquals((ranges = map.getFromRanges()).size(), 1); assertEquals(ranges.get(0)[0], 4054); - assertEquals(ranges.get(0)[1], 4848); + assertEquals(ranges.get(0)[1], 4845); // excludes stop 4848 assertEquals((ranges = map.getToRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1); assertEquals(ranges.get(0)[1], 264); @@ -198,7 +225,7 @@ public class EmblFlatFileTest assertEquals(ranges.get(0)[0], 7022); assertEquals(ranges.get(0)[1], 7502); assertEquals(ranges.get(1)[0], 1); - assertEquals(ranges.get(1)[1], 437); + assertEquals(ranges.get(1)[1], 434); // excludes stop at 437 assertEquals((ranges = map.getToRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1); assertEquals(ranges.get(0)[1], 305); @@ -208,7 +235,7 @@ public class EmblFlatFileTest // complement(488..1480) assertEquals((ranges = map.getFromRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1480); - assertEquals(ranges.get(0)[1], 488); + assertEquals(ranges.get(0)[1], 491); // // excludes stop at 488 assertEquals((ranges = map.getToRanges()).size(), 1); assertEquals(ranges.get(0)[0], 1); assertEquals(ranges.get(0)[1], 330); @@ -224,6 +251,23 @@ public class EmblFlatFileTest assertEquals(uniprotCount, 8); } + /** + * A fairly tough test, using J03321 (circular DNA), which has 8 CDS features, + * one of them reverse strand + * + * @throws MalformedURLException + * @throws IOException + */ + @Test(groups = "Functional") + public void testParseToRNA() throws MalformedURLException, IOException + { + File dataFile = new File("test/jalview/io/J03321_rna.embl.txt"); + FileParse fp = new FileParse(dataFile, DataSourceType.FILE); + EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); + List seqs = parser.getSeqs(); + assertTrue(seqs.get(0).getSequenceAsString().indexOf("u") > -1); + } + @Test(groups = "Functional") public void testParse_codonStartNot1() { @@ -242,17 +286,19 @@ public class EmblFlatFileTest public void testParse_noUniprotXref() throws IOException { // MN908947 cut down to 40BP, one CDS, length 5 peptide for test purposes + // plus an additional (invented) test case: + // - multi-line /product qualifier including escaped quotes String data = "ID MN908947; SV 3; linear; genomic RNA; STD; VRL; 20 BP.\n" + "DE Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1,\n" + "FT CDS 3..17\n" + "FT /protein_id=\"QHD43415.1\"\n" - + "FT /product=\"orf1ab polyprotein\"\n" + + "FT /product=\"orf1ab polyprotein\n" + + "FT \"\"foobar\"\" \"\n" + "FT /translation=\"MRKLD\n" + "SQ Sequence 7496 BP; 2450 A; 1290 C; 1434 G; 2322 T; 0 other;\n" + " ggatGcgtaa gttagacgaa attttgtctt tgcgcacaga 40\n"; FileParse fp = new FileParse(data, DataSourceType.PASTE); EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); - parser.parse(); List seqs = parser.getSeqs(); assertEquals(seqs.size(), 1); SequenceI seq = seqs.get(0); @@ -262,7 +308,7 @@ public class EmblFlatFileTest * dna should have dbref to itself, and to inferred EMBLCDSPROTEIN:QHD43415.1 */ assertEquals(dbrefs.size(), 2); - + // dbref to self DBRefEntry dbref = dbrefs.get(0); assertEquals(dbref.getSource(), "EMBLTEST"); @@ -276,7 +322,7 @@ public class EmblFlatFileTest assertEquals(map.getToHighest(), 40); assertEquals(map.getFromRatio(), 1); assertEquals(map.getToRatio(), 1); - + // dbref to inferred EMBLCDSPROTEIN: dbref = dbrefs.get(1); assertEquals(dbref.getSource(), "EMBLCDSPROTEIN"); @@ -284,7 +330,8 @@ public class EmblFlatFileTest mapping = dbref.getMap(); SequenceI mapTo = mapping.getTo(); assertEquals(mapTo.getName(), "QHD43415.1"); - assertEquals(mapTo.getDescription(), "orf1ab polyprotein"); + // the /product qualifier transfers to protein product description + assertEquals(mapTo.getDescription(), "orf1ab polyprotein \"foobar\""); assertEquals(mapTo.getSequenceAsString(), "MRKLD"); map = mapping.getMap(); assertEquals(map.getFromLowest(), 3); @@ -303,24 +350,39 @@ public class EmblFlatFileTest // exact length match: assertSame(exons, EmblFlatFile.adjustForProteinLength(6, exons)); + // patch from JAL-3725 in EmblXmlSource propagated to Flatfile // match if we assume exons include stop codon not in protein: - assertSame(exons, EmblFlatFile.adjustForProteinLength(5, exons)); + int[] truncated = EmblFlatFile.adjustForProteinLength(5, exons); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 35]"); // truncate last exon by 6bp - int[] truncated = EmblFlatFile.adjustForProteinLength(4, exons); - assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated)); + truncated = EmblFlatFile.adjustForProteinLength(4, exons); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 32]"); // remove last exon and truncate preceding by 1bp (so 3bp in total) truncated = EmblFlatFile.adjustForProteinLength(3, exons); - assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated)); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 24]"); // exact removal of exon case: exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp truncated = EmblFlatFile.adjustForProteinLength(4, exons); - assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated)); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 27]"); // what if exons are too short for protein? truncated = EmblFlatFile.adjustForProteinLength(7, exons); assertSame(exons, truncated); } + + @Test(groups = "Functional") + public void testRemoveQuotes() + { + assertNull(EmblFlatFile.removeQuotes(null)); + assertEquals(EmblFlatFile.removeQuotes("No quotes here"), + "No quotes here"); + assertEquals(EmblFlatFile.removeQuotes("\"Enclosing quotes\""), + "Enclosing quotes"); + assertEquals( + EmblFlatFile.removeQuotes("\"Escaped \"\"quotes\"\" example\""), + "Escaped \"quotes\" example"); + } }