From feeb56fc2f3d7c40a46466ff52c7663cc96d6c70 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 27 Aug 2020 15:40:22 +0100 Subject: [PATCH] JAL-3725 exclude stop codon from CDS-to-protein mapping --- src/jalview/ws/dbsources/EmblXmlSource.java | 14 ++++++------- test/jalview/ws/dbsources/EmblXmlSourceTest.java | 23 +++++++++++----------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 97d7c9f..c5532f5 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -69,6 +69,7 @@ import jalview.xml.binding.embl.XrefType; * * @deprecated endpoint withdrawn August 2020 (JAL-3692), use EmblFlatfileSource */ + public abstract class EmblXmlSource extends EbiFileRetrievedProxy { private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+"); @@ -453,9 +454,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy else { // final product length truncation check - int[] cdsRanges = adjustForProteinLength(translationLength, + int [] exons2 = adjustForProteinLength(translationLength, exons); - dnaToProteinMapping = new Mapping(product, cdsRanges, + dnaToProteinMapping = new Mapping(product, exons2, new int[] { 1, translationLength }, 3, 1); if (product != null) @@ -757,8 +758,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy /** * Truncates (if necessary) the exon intervals to match 3 times the length of - * the protein; also accepts 3 bases longer (for stop codon not included in - * protein) + * the protein (including truncation for stop codon included in exon) * * @param proteinLength * @param exon @@ -775,11 +775,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy int exonLength = MappingUtils.getLength(Arrays.asList(exon)); /* - * if exon length matches protein, or is shorter, or longer by the - * length of a stop codon (3 bases), then leave it unchanged + * if exon length matches protein, or is shorter, then leave it unchanged */ - if (expectedCdsLength >= exonLength - || expectedCdsLength == exonLength - 3) + if (expectedCdsLength >= exonLength) { return exon; } diff --git a/test/jalview/ws/dbsources/EmblXmlSourceTest.java b/test/jalview/ws/dbsources/EmblXmlSourceTest.java index 5f288a8..51d50f5 100644 --- a/test/jalview/ws/dbsources/EmblXmlSourceTest.java +++ b/test/jalview/ws/dbsources/EmblXmlSourceTest.java @@ -26,6 +26,14 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; @@ -36,14 +44,6 @@ import jalview.xml.binding.embl.EntryType.Feature; import jalview.xml.binding.embl.EntryType.Feature.Qualifier; import jalview.xml.binding.embl.XrefType; -import java.io.ByteArrayInputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - public class EmblXmlSourceTest { @@ -352,11 +352,12 @@ public class EmblXmlSourceTest // exact length match: assertSame(exons, EmblXmlSource.adjustForProteinLength(6, exons)); - // match if we assume exons include stop codon not in protein: - assertSame(exons, EmblXmlSource.adjustForProteinLength(5, exons)); + // truncate last exon by 3bp (e.g. stop codon) + int[] truncated = EmblXmlSource.adjustForProteinLength(5, exons); + assertEquals("[11, 15, 21, 25, 31, 35]", Arrays.toString(truncated)); // truncate last exon by 6bp - int[] truncated = EmblXmlSource.adjustForProteinLength(4, exons); + truncated = EmblXmlSource.adjustForProteinLength(4, exons); assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated)); // remove last exon and truncate preceding by 1bp -- 1.7.10.2