JAL-3365 expand range of allowed DSSP secondary structure symbols in Stockholm files
authorJim Procter <j.procter@dundee.ac.uk>
Sat, 16 Apr 2022 12:36:06 +0000 (13:36 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Sat, 16 Apr 2022 12:36:06 +0000 (13:36 +0100)
src/jalview/datamodel/AlignmentAnnotation.java
src/jalview/io/StockholmFile.java
test/jalview/io/StockholmFileTest.java

index f3cdae6..0f41850 100755 (executable)
  */
 package jalview.datamodel;
 
-import java.util.Locale;
-
-import jalview.analysis.Rna;
-import jalview.analysis.SecStrConsensus.SimpleBP;
-import jalview.analysis.WUSSParseException;
-
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -33,9 +27,14 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import jalview.analysis.Rna;
+import jalview.analysis.SecStrConsensus.SimpleBP;
+import jalview.analysis.WUSSParseException;
+
 /**
  * DOCUMENT ME!
  * 
@@ -382,21 +381,24 @@ public class AlignmentAnnotation
         // annotations[i].secondaryStructure + "'");
         // TODO: 2.8.2 should this ss symbol validation check be a function in
         // RNA/ResidueProperties ?
+        // allow for DSSP extended code:
+        // https://www.wikidoc.org/index.php/Secondary_structure#The_DSSP_code
+        // GHITEBS as well as C and X (for missing?)
         if (annotations[i].secondaryStructure == '('
                 || annotations[i].secondaryStructure == '['
                 || annotations[i].secondaryStructure == '<'
                 || annotations[i].secondaryStructure == '{'
                 || annotations[i].secondaryStructure == 'A'
-                || annotations[i].secondaryStructure == 'B'
-                || annotations[i].secondaryStructure == 'C'
+                // || annotations[i].secondaryStructure == 'B'
+                // || annotations[i].secondaryStructure == 'C'
                 || annotations[i].secondaryStructure == 'D'
                 // || annotations[i].secondaryStructure == 'E' // ambiguous on
                 // its own -- already checked above
                 || annotations[i].secondaryStructure == 'F'
-                || annotations[i].secondaryStructure == 'G'
+                // || annotations[i].secondaryStructure == 'G'
                 // || annotations[i].secondaryStructure == 'H' // ambiguous on
                 // its own -- already checked above
-                || annotations[i].secondaryStructure == 'I'
+                // || annotations[i].secondaryStructure == 'I'
                 || annotations[i].secondaryStructure == 'J'
                 || annotations[i].secondaryStructure == 'K'
                 || annotations[i].secondaryStructure == 'L'
@@ -406,12 +408,12 @@ public class AlignmentAnnotation
                 || annotations[i].secondaryStructure == 'P'
                 || annotations[i].secondaryStructure == 'Q'
                 || annotations[i].secondaryStructure == 'R'
-                || annotations[i].secondaryStructure == 'S'
-                || annotations[i].secondaryStructure == 'T'
+                // || annotations[i].secondaryStructure == 'S'
+                // || annotations[i].secondaryStructure == 'T'
                 || annotations[i].secondaryStructure == 'U'
                 || annotations[i].secondaryStructure == 'V'
                 || annotations[i].secondaryStructure == 'W'
-                || annotations[i].secondaryStructure == 'X'
+                // || annotations[i].secondaryStructure == 'X'
                 || annotations[i].secondaryStructure == 'Y'
                 || annotations[i].secondaryStructure == 'Z')
         {
@@ -547,12 +549,12 @@ public class AlignmentAnnotation
                       : annotations[index + offset].displayCharacter == null
                               || annotations[index
                                       + offset].displayCharacter
-                                              .length() == 0
-                                                      ? annotations[index
-                                                              + offset].secondaryStructure
-                                                      : annotations[index
-                                                              + offset].displayCharacter
-                                                                      .charAt(0));
+                                      .length() == 0
+                                              ? annotations[index
+                                                      + offset].secondaryStructure
+                                              : annotations[index
+                                                      + offset].displayCharacter
+                                                      .charAt(0));
     }
 
     @Override
index cff328b..5d645ca 100644 (file)
@@ -90,7 +90,7 @@ public class StockholmFile extends AlignFile
   // use the following regex to decide an annotations (whole) line is NOT an RNA
   // SS (it contains only E,H,e,h and other non-brace/non-alpha chars)
   private static final Regex NOT_RNASS = new Regex(
-          "^[^<>[\\](){}A-DF-Za-df-z]*$");
+          "^[^<>[\\](){}ADFJ-RUVWYZadfj-ruvwyz]*$");
 
   StringBuffer out; // output buffer
 
index e4f2abc..b1995ab 100644 (file)
@@ -274,6 +274,11 @@ public class StockholmFileTest
     // check Alignment annotation
     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+    boolean expectProteinSS = !al.isNucleotide();
+    assertTrue(
+            "Alignments not both "
+                    + (al.isNucleotide() ? "nucleotide" : "protein"),
+            al_input.isNucleotide() == al.isNucleotide());
 
     // note - at moment we do not distinguish between alignment without any
     // annotation rows and alignment with no annotation row vector
@@ -292,6 +297,13 @@ public class StockholmFileTest
           assertEqualSecondaryStructure(
                   "Different alignment annotation at position " + i,
                   aa_original[i], aa_new[i], allowNullAnnotation);
+          if (aa_original[i].hasIcons)
+          {
+            assertTrue(
+                    "Secondary structure expected to be "
+                            + (expectProteinSS ? "protein" : "nucleotide"),
+                    expectProteinSS == !aa_original[i].isRNA());
+          }
           // compare graphGroup or graph properties - needed to verify JAL-1299
           assertEquals("Graph type not identical.", aa_original[i].graph,
                   aa_new[i].graph);