From 874fe73aa58e600466aab1609b258e14d5eacdf8 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 27 Aug 2020 15:40:22 +0100 Subject: [PATCH] JAL-3725 exclude stop codon from CDS-to-protein mapping --- src/jalview/ws/dbsources/EmblXmlSource.java | 52 ++++++++++++------------- test/jalview/ws/dbsources/EmblSourceTest.java | 21 +++++----- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 19366e0..5457114 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -20,6 +20,25 @@ */ package jalview.ws.dbsources; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + import jalview.analysis.SequenceIdMatcher; import jalview.bin.Cache; import jalview.datamodel.Alignment; @@ -40,29 +59,9 @@ import jalview.ws.ebi.EBIFetchClient; import jalview.xml.binding.embl.EntryType; import jalview.xml.binding.embl.EntryType.Feature; import jalview.xml.binding.embl.EntryType.Feature.Qualifier; -import jalview.xml.binding.jalview.JalviewModel; import jalview.xml.binding.embl.ROOT; import jalview.xml.binding.embl.XrefType; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.text.ParseException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Hashtable; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBElement; -import javax.xml.bind.JAXBException; -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - public abstract class EmblXmlSource extends EbiFileRetrievedProxy { /* @@ -443,9 +442,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy else { // final product length truncation check - int[] cdsRanges = adjustForProteinLength(translationLength, + int [] exons2 = adjustForProteinLength(translationLength, exons); - dnaToProteinMapping = new Mapping(product, cdsRanges, + dnaToProteinMapping = new Mapping(product, exons2, new int[] { 1, translationLength }, 3, 1); if (product != null) @@ -713,8 +712,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy /** * Truncates (if necessary) the exon intervals to match 3 times the length of - * the protein; also accepts 3 bases longer (for stop codon not included in - * protein) + * the protein (including truncation for stop codon included in exon) * * @param proteinLength * @param exon @@ -731,11 +729,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy int exonLength = MappingUtils.getLength(Arrays.asList(exon)); /* - * if exon length matches protein, or is shorter, or longer by the - * length of a stop codon (3 bases), then leave it unchanged + * if exon length matches protein, or is shorter, then leave it unchanged */ - if (expectedCdsLength >= exonLength - || expectedCdsLength == exonLength - 3) + if (expectedCdsLength >= exonLength) { return exon; } diff --git a/test/jalview/ws/dbsources/EmblSourceTest.java b/test/jalview/ws/dbsources/EmblSourceTest.java index 5bf215c..93e8e51 100644 --- a/test/jalview/ws/dbsources/EmblSourceTest.java +++ b/test/jalview/ws/dbsources/EmblSourceTest.java @@ -26,6 +26,13 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.testng.annotations.Test; + import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; @@ -35,13 +42,6 @@ import jalview.xml.binding.embl.EntryType.Feature; import jalview.xml.binding.embl.EntryType.Feature.Qualifier; import jalview.xml.binding.embl.XrefType; -import java.io.ByteArrayInputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.testng.annotations.Test; - public class EmblSourceTest { @@ -315,11 +315,12 @@ public class EmblSourceTest // exact length match: assertSame(exons, EmblXmlSource.adjustForProteinLength(6, exons)); - // match if we assume exons include stop codon not in protein: - assertSame(exons, EmblXmlSource.adjustForProteinLength(5, exons)); + // truncate last exon by 3bp (e.g. stop codon) + int[] truncated = EmblXmlSource.adjustForProteinLength(5, exons); + assertEquals("[11, 15, 21, 25, 31, 35]", Arrays.toString(truncated)); // truncate last exon by 6bp - int[] truncated = EmblXmlSource.adjustForProteinLength(4, exons); + truncated = EmblXmlSource.adjustForProteinLength(4, exons); assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated)); // remove last exon and truncate preceding by 1bp -- 1.7.10.2