JAL-4241 Fix non-standard residues replacement.
authorMateusz Warowny <mmzwarowny@dundee.ac.uk>
Tue, 12 Sep 2023 12:49:19 +0000 (14:49 +0200)
committerMateusz Warowny <mmzwarowny@dundee.ac.uk>
Tue, 12 Sep 2023 12:49:19 +0000 (14:49 +0200)
src/jalview/ws2/actions/annotation/AnnotationJob.java
test/jalview/ws2/actions/annotation/AnnotationJobTest.java

index 467dafd..187c14c 100644 (file)
@@ -71,18 +71,20 @@ public class AnnotationJob extends BaseJob
       String newName = SeqsetUtils.unique_name(seqences.size() + 1);
       if (namesMap != null)
         namesMap.put(newName, sq);
+      char[] seqChars = sq.getSequence(start, end + 1);
+      if (filterNonStandardResidues)
+        replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein());
       Sequence seq;
       if (submitGaps)
       {
-        seq = new Sequence(newName, sq.getSequenceAsString());
+        seq = new Sequence(newName, seqChars);
         updateResidueMap(residueMap, seq, filterNonStandardResidues);
       }
       else
       {
         // TODO: add ability to exclude hidden regions
         seq = new Sequence(newName,
-                AlignSeq.extractGaps(Comparison.GapChars,
-                        sq.getSequenceAsString(start, end + 1)));
+                AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
         // for annotation need to also record map to sequence start/end
         // position in range
         // then transfer back to original sequence on return.
@@ -112,6 +114,20 @@ public class AnnotationJob extends BaseJob
             minSize);
   }
 
+  private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein)
+  {
+    for (int i = 0; i < seq.length; i++)
+    {
+      char chr = seq[i];
+      if (isProtein
+          ? ResidueProperties.aaIndex[chr] >= 20
+          : ResidueProperties.nucleotideIndex[chr] >= 5)
+      {
+        seq[i] = replacement;
+      }
+    }
+  }
+
   private static void updateResidueMap(BitSet residueMap, SequenceI seq,
           boolean filterNonStandardResidues)
   {
index 4e3767f..183d6f3 100644 (file)
@@ -172,7 +172,7 @@ public class AnnotationJobTest
   }
 
   @Test(groups = { "Functional"} )
-  public void testCreate_ContainsNonStandardAndNoFilterNonStandard_NonStandardToGap()
+  public void testCreate_ContainsNonStandardAndNoFilterNonStandard_NonStandardRemain()
   {
     var alignment = new Alignment(new Sequence[] {
         new Sequence("test1", "ACACAOACACAC"),
@@ -180,6 +180,20 @@ public class AnnotationJobTest
     });
     var annotJob = AnnotationJob.create(alignment, true, true, true, false, 0);
     assertThat(annotJob.getInputSequences(), contains(
+            matchesSequenceString("ACACAOACACAC"),
+            matchesSequenceString("ABAVAVAVABAV")
+    ));
+  }
+
+  @Test(groups = { "Functional"} )
+  public void testCreate_ContainsNonStandardAndFilterNonStandard_NonStandardToGap()
+  {
+    var alignment = new Alignment(new Sequence[] {
+        new Sequence("test1", "ACACAOACACAC"),
+        new Sequence("test2", "ABAVAVAVABAV")
+    });
+    var annotJob = AnnotationJob.create(alignment, true, true, true, true, 0);
+    assertThat(annotJob.getInputSequences(), contains(
             matchesSequenceString("ACACA-ACACAC"),
             matchesSequenceString("A-AVAVAVA-AV")
     ));