From 07cb43d5a1e477b16582f98519343c162d7428ef Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Thu, 6 Jan 2022 12:29:01 +0000 Subject: [PATCH] JAL-3806 JAL-3725 propagate relaxed criteria for stop codon truncation from EMBLXmlSource to EMBLFlatFile parser --- src/jalview/io/EMBLLikeFlatFile.java | 9 +++------ test/jalview/io/EmblFlatFileTest.java | 16 +++++++++------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/jalview/io/EMBLLikeFlatFile.java b/src/jalview/io/EMBLLikeFlatFile.java index 64943b2..100fb8d 100644 --- a/src/jalview/io/EMBLLikeFlatFile.java +++ b/src/jalview/io/EMBLLikeFlatFile.java @@ -83,8 +83,7 @@ public abstract class EMBLLikeFlatFile extends AlignFile /** * Truncates (if necessary) the exon intervals to match 3 times the length of - * the protein; also accepts 3 bases longer (for stop codon not included in - * protein) + * the protein(including truncation for stop codon included in exon) * * @param proteinLength * @param exon @@ -102,11 +101,9 @@ public abstract class EMBLLikeFlatFile extends AlignFile int exonLength = MappingUtils.getLength(Arrays.asList(exon)); /* - * if exon length matches protein, or is shorter, or longer by the - * length of a stop codon (3 bases), then leave it unchanged + * if exon length matches protein, or is shorter, then leave it unchanged */ - if (expectedCdsLength >= exonLength - || expectedCdsLength == exonLength - 3) + if (expectedCdsLength >= exonLength) { return exon; } diff --git a/test/jalview/io/EmblFlatFileTest.java b/test/jalview/io/EmblFlatFileTest.java index 7775c8f..4b5afa7 100644 --- a/test/jalview/io/EmblFlatFileTest.java +++ b/test/jalview/io/EmblFlatFileTest.java @@ -329,21 +329,23 @@ public class EmblFlatFileTest // exact length match: assertSame(exons, EmblFlatFile.adjustForProteinLength(6, exons)); - // match if we assume exons include stop codon not in protein: - assertSame(exons, EmblFlatFile.adjustForProteinLength(5, exons)); - + // patch from JAL-3725 in EmblXmlSource propagated to Flatfile + // match if we assume exons include stop codon not in protein: + int[] truncated = EmblFlatFile.adjustForProteinLength(5, exons); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 35]"); + // truncate last exon by 6bp - int[] truncated = EmblFlatFile.adjustForProteinLength(4, exons); - assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated)); + truncated = EmblFlatFile.adjustForProteinLength(4, exons); + assertEquals(Arrays.toString(truncated),"[11, 15, 21, 25, 31, 32]"); // remove last exon and truncate preceding by 1bp (so 3bp in total) truncated = EmblFlatFile.adjustForProteinLength(3, exons); - assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated)); + assertEquals(Arrays.toString(truncated),"[11, 15, 21, 24]"); // exact removal of exon case: exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp truncated = EmblFlatFile.adjustForProteinLength(4, exons); - assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated)); + assertEquals(Arrays.toString(truncated), "[11, 15, 21, 27]"); // what if exons are too short for protein? truncated = EmblFlatFile.adjustForProteinLength(7, exons); -- 1.7.10.2