From 5a48101c1958df5d0f655a7044afd506df4e040f Mon Sep 17 00:00:00 2001 From: Mateusz Warowny Date: Tue, 12 Sep 2023 14:49:19 +0200 Subject: [PATCH] JAL-4241 Fix non-standard residues replacement. --- .../ws2/actions/annotation/AnnotationJob.java | 22 +++++++++++++++++--- .../ws2/actions/annotation/AnnotationJobTest.java | 16 +++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/jalview/ws2/actions/annotation/AnnotationJob.java b/src/jalview/ws2/actions/annotation/AnnotationJob.java index 467dafd..187c14c 100644 --- a/src/jalview/ws2/actions/annotation/AnnotationJob.java +++ b/src/jalview/ws2/actions/annotation/AnnotationJob.java @@ -71,18 +71,20 @@ public class AnnotationJob extends BaseJob String newName = SeqsetUtils.unique_name(seqences.size() + 1); if (namesMap != null) namesMap.put(newName, sq); + char[] seqChars = sq.getSequence(start, end + 1); + if (filterNonStandardResidues) + replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein()); Sequence seq; if (submitGaps) { - seq = new Sequence(newName, sq.getSequenceAsString()); + seq = new Sequence(newName, seqChars); updateResidueMap(residueMap, seq, filterNonStandardResidues); } else { // TODO: add ability to exclude hidden regions seq = new Sequence(newName, - AlignSeq.extractGaps(Comparison.GapChars, - sq.getSequenceAsString(start, end + 1))); + AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars))); // for annotation need to also record map to sequence start/end // position in range // then transfer back to original sequence on return. @@ -112,6 +114,20 @@ public class AnnotationJob extends BaseJob minSize); } + private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein) + { + for (int i = 0; i < seq.length; i++) + { + char chr = seq[i]; + if (isProtein + ? ResidueProperties.aaIndex[chr] >= 20 + : ResidueProperties.nucleotideIndex[chr] >= 5) + { + seq[i] = replacement; + } + } + } + private static void updateResidueMap(BitSet residueMap, SequenceI seq, boolean filterNonStandardResidues) { diff --git a/test/jalview/ws2/actions/annotation/AnnotationJobTest.java b/test/jalview/ws2/actions/annotation/AnnotationJobTest.java index 4e3767f..183d6f3 100644 --- a/test/jalview/ws2/actions/annotation/AnnotationJobTest.java +++ b/test/jalview/ws2/actions/annotation/AnnotationJobTest.java @@ -172,7 +172,7 @@ public class AnnotationJobTest } @Test(groups = { "Functional"} ) - public void testCreate_ContainsNonStandardAndNoFilterNonStandard_NonStandardToGap() + public void testCreate_ContainsNonStandardAndNoFilterNonStandard_NonStandardRemain() { var alignment = new Alignment(new Sequence[] { new Sequence("test1", "ACACAOACACAC"), @@ -180,6 +180,20 @@ public class AnnotationJobTest }); var annotJob = AnnotationJob.create(alignment, true, true, true, false, 0); assertThat(annotJob.getInputSequences(), contains( + matchesSequenceString("ACACAOACACAC"), + matchesSequenceString("ABAVAVAVABAV") + )); + } + + @Test(groups = { "Functional"} ) + public void testCreate_ContainsNonStandardAndFilterNonStandard_NonStandardToGap() + { + var alignment = new Alignment(new Sequence[] { + new Sequence("test1", "ACACAOACACAC"), + new Sequence("test2", "ABAVAVAVABAV") + }); + var annotJob = AnnotationJob.create(alignment, true, true, true, true, 0); + assertThat(annotJob.getInputSequences(), contains( matchesSequenceString("ACACA-ACACAC"), matchesSequenceString("A-AVAVAVA-AV") )); -- 1.7.10.2