JAL-2907 read/write underscore for annotation at gap positions

author gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)
diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java

index f5b5177..0388fda 100644 (file)
--- a/src/jalview/io/StockholmFile.java
+++ b/src/jalview/io/StockholmFile.java
@@ -46,6 +46,7 @@ import java.util.Hashtable;
  import java.util.LinkedHashMap;
  import java.util.List;
  import java.util.Map;
+import java.util.Map.Entry;
  import java.util.Vector;
  
  import com.stevesoft.pat.Regex;
@@ -74,11 +75,13 @@ import fr.orsay.lri.varna.models.rna.RNA;
   */
  public class StockholmFile extends AlignFile
  {
-  private static final String ANNOTATION = "annotation";
+  private static final char UNDERSCORE = '_';
  
-  private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "(");
+  private static final String ANNOTATION = "annotation";
  
-  private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")");
+  // private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "(");
+  //
+  // private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")");
  
    public static final Regex DETECT_BRACKETS = new Regex(
            "(<|>|\\[|\\]|\\(|\\)|\\{|\\})");
@@ -197,7 +200,7 @@ public class StockholmFile extends AlignFile
      String version;
      // String id;
      Hashtable seqAnn = new Hashtable(); // Sequence related annotations
-    LinkedHashMap<String, String> seqs = new LinkedHashMap<String, String>();
+    LinkedHashMap<String, String> seqs = new LinkedHashMap<>();
      Regex p, r, rend, s, x;
      // Temporary line for processing RNA annotation
      // String RNAannot = "";
@@ -658,7 +661,7 @@ public class StockholmFile extends AlignFile
                strucAnn = new Hashtable();
              }
  
-            Vector<AlignmentAnnotation> newStruc = new Vector<AlignmentAnnotation>();
+            Vector<AlignmentAnnotation> newStruc = new Vector<>();
              parseAnnotationRow(newStruc, type, ns);
              for (AlignmentAnnotation alan : newStruc)
              {
@@ -710,7 +713,7 @@ public class StockholmFile extends AlignFile
    private void guessDatabaseFor(Sequence seqO, String dbr, String dbsource)
    {
      DBRefEntry dbrf = null;
-    List<DBRefEntry> dbrs = new ArrayList<DBRefEntry>();
+    List<DBRefEntry> dbrs = new ArrayList<>();
      String seqdb = "Unknown", sdbac = "" + dbr;
      int st = -1, en = -1, p;
      if ((st = sdbac.indexOf("/")) > -1)
@@ -837,6 +840,10 @@ public class StockholmFile extends AlignFile
      for (int i = 0; i < annots.length(); i++)
      {
        String pos = annots.substring(i, i + 1);
+      if (UNDERSCORE == pos.charAt(0))
+      {
+        pos = " ";
+      }
        Annotation ann;
        ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
        // be written out
@@ -985,7 +992,6 @@ public class StockholmFile extends AlignFile
                  .form("#=GS " + idd.toString() + " "));
          if (type.contains("PFAM") || type.contains("RFAM"))
          {
-
            out.append(" AC " + type.substring(type.indexOf(";") + 1));
          }
          else
@@ -1028,7 +1034,7 @@ public class StockholmFile extends AlignFile
            String seq = "";
            for (int k = 0; k < ann.length; k++)
            {
-            seq += outputCharacter(key, k, isrna, ann, s[i]);
+            seq += outputCharacter(key, k, ann, s[i]);
            }
            out.append(seq);
            out.append(newline);
@@ -1080,10 +1086,9 @@ public class StockholmFile extends AlignFile
  
          out.append(
                  new Format("%-" + maxid + "s").form("#=GC " + label + " "));
-        boolean isrna = aa.isValidStruc();
          for (int j = 0; j < aa.annotations.length; j++)
          {
-          seq += outputCharacter(key, j, isrna, aa.annotations, null);
+          seq += outputCharacter(key, j, aa.annotations, null);
          }
          out.append(seq);
          out.append(newline);
@@ -1101,26 +1106,25 @@ public class StockholmFile extends AlignFile
     * 
     * @param seq
     * @param key
-   * @param k
-   * @param isrna
+   * @param column
     * @param ann
     * @param sequenceI
     */
-  private char outputCharacter(String key, int k, boolean isrna,
-          Annotation[] ann, SequenceI sequenceI)
+  static char outputCharacter(String key, int column, Annotation[] ann,
+          SequenceI sequenceI)
    {
-    char seq = ' ';
-    Annotation annot = ann[k];
+    Annotation annot = column >= ann.length ? null : ann[column];
      String ch = (annot == null)
              ? ((sequenceI == null) ? "-"
-                    : Character.toString(sequenceI.getCharAt(k)))
+                    : Character.toString(sequenceI.getCharAt(column)))
              : annot.displayCharacter;
-    if (key != null && key.equals("SS"))
+
+    if ("SS".equals(key))
      {
        if (annot == null)
        {
-        // sensible gap character
-        return ' ';
+        // whitespace not allowed in annotation
+        return UNDERSCORE;
        }
        else
        {
@@ -1132,6 +1136,7 @@ public class StockholmFile extends AlignFile
        }
      }
  
+    char seq = '0';
      if (ch.length() == 0)
      {
        seq = '.';
@@ -1140,7 +1145,7 @@ public class StockholmFile extends AlignFile
      {
        seq = ch.charAt(0);
      }
-    else if (ch.length() > 1)
+    else
      {
        seq = ch.charAt(1);
      }
@@ -1159,13 +1164,13 @@ public class StockholmFile extends AlignFile
      return out.toString();
    }
  
-  private static Hashtable typeIds = null;
+  private static Map<String, String> typeIds = null;
  
    static
    {
      if (typeIds == null)
      {
-      typeIds = new Hashtable();
+      typeIds = new Hashtable<>();
        typeIds.put("SS", "Secondary Structure");
        typeIds.put("SA", "Surface Accessibility");
        typeIds.put("TM", "transmembrane");
@@ -1181,7 +1186,6 @@ public class StockholmFile extends AlignFile
        typeIds.put("DR", "reference");
        typeIds.put("LO", "look");
        typeIds.put("RF", "Reference Positions");
-
      }
    }
  
@@ -1189,7 +1193,7 @@ public class StockholmFile extends AlignFile
    {
      if (typeIds.containsKey(id))
      {
-      return (String) typeIds.get(id);
+      return typeIds.get(id);
      }
      System.err.println(
              "Warning : Unknown Stockholm annotation type code " + id);
@@ -1199,20 +1203,13 @@ public class StockholmFile extends AlignFile
    protected static String type2id(String type)
    {
      String key = null;
-    Enumeration e = typeIds.keys();
-    while (e.hasMoreElements())
+    for (Entry<String, String> entry : typeIds.entrySet())
      {
-      Object ll = e.nextElement();
-      if (typeIds.get(ll).toString().equalsIgnoreCase(type))
+      if (entry.getValue().equalsIgnoreCase(type))
        {
-        key = (String) ll;
-        break;
+        return entry.getKey();
        }
      }
-    if (key != null)
-    {
-      return key;
-    }
      System.err.println(
              "Warning : Unknown Stockholm annotation type: " + type);
      return key;
diff --git a/test/jalview/io/StockholmFileTest.java b/test/jalview/io/StockholmFileTest.java

index 4273e6c..7187b9f 100644 (file)
--- a/test/jalview/io/StockholmFileTest.java
+++ b/test/jalview/io/StockholmFileTest.java
@@ -22,17 +22,21 @@ package jalview.io;
  
  import static org.testng.AssertJUnit.assertEquals;
  import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNull;
  import static org.testng.AssertJUnit.assertTrue;
  import static org.testng.AssertJUnit.fail;
  
+import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Annotation;
+import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
  import jalview.gui.JvOptionPane;
  
  import java.io.File;
+import java.io.IOException;
  import java.util.Arrays;
  import java.util.BitSet;
  import java.util.HashMap;
@@ -230,8 +234,8 @@ public class StockholmFileTest
      // we might want to revise this in future
      int aa_new_size = (aa_new == null ? 0 : aa_new.length);
      int aa_original_size = (aa_original == null ? 0 : aa_original.length);
-    Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
-    Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
+    Map<Integer, BitSet> orig_groups = new HashMap<>();
+    Map<Integer, BitSet> new_groups = new HashMap<>();
  
      if (aa_new != null && aa_original != null)
      {
@@ -654,4 +658,112 @@ public class StockholmFileTest
      testAlignmentEquivalence(al, newAl, true, true, true);
  
    }
+
+  @Test(groups = "Functional")
+  public void testType2id()
+  {
+    assertEquals("OS", StockholmFile.type2id("organism"));
+    // not case-sensitive:
+    assertEquals("OS", StockholmFile.type2id("Organism"));
+    // is space-sensitive:
+    assertNull(StockholmFile.type2id("Organism "));
+    assertNull(StockholmFile.type2id("orgasm"));
+  }
+
+  @Test(groups = "Functional")
+  public void testOutputCharacter()
+  {
+    SequenceI seq = new Sequence("seq", "abc--def-");
+
+    Annotation[] ann = new Annotation[8];
+    ann[1] = new Annotation("Z", "desc", 'E', 1f);
+    ann[2] = new Annotation("Q", "desc", ' ', 1f);
+    ann[4] = new Annotation("", "desc", 'E', 1f);
+    ann[6] = new Annotation("ZH", "desc", 'E', 1f);
+
+    /*
+     * null annotation in column (not Secondary Structure annotation)
+     * should answer sequence character, or '-' if null sequence
+     */
+    assertEquals('-', StockholmFile.outputCharacter("RF", 0, ann, null));
+    assertEquals('d', StockholmFile.outputCharacter("RF", 5, ann, seq));
+    assertEquals('-', StockholmFile.outputCharacter("RF", 8, ann, seq));
+
+    /*
+     * null annotation in column (SS annotation) should answer underscore
+     */
+    assertEquals('_', StockholmFile.outputCharacter("SS", 0, ann, seq));
+
+    /*
+     * SS secondary structure symbol
+     */
+    assertEquals('E', StockholmFile.outputCharacter("SS", 1, ann, seq));
+
+    /*
+     * no SS symbol, use label instead 
+     */
+    assertEquals('Q', StockholmFile.outputCharacter("SS", 2, ann, seq));
+
+    /*
+     * SS with 2 character label - second character overrides SS symbol 
+     */
+    assertEquals('H', StockholmFile.outputCharacter("SS", 6, ann, seq));
+
+    /*
+     * empty display character, not SS - answers '.'
+     */
+    assertEquals('.', StockholmFile.outputCharacter("RF", 4, ann, seq));
+  }
+
+  /**
+   * Test to verify that gaps are input/output as underscore in STO annotation
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testRoundtripWithGaps() throws IOException
+  {
+    /*
+     * small extract from RF00031_folded.stk
+     */
+    // @formatter:off
+    String stoData = 
+            "# STOCKHOLM 1.0\n" +
+            "#=GR B.taurus.4 SS .._((.))_\n" +
+            "B.taurus.4         AC.UGCGU.\n" +
+            "#=GR B.taurus.5 SS ..((_._))\n" +
+            "B.taurus.5         ACUU.G.CG\n" +
+        "//\n";
+    // @formatter:on
+    StockholmFile parser = new StockholmFile(stoData, DataSourceType.PASTE);
+    SequenceI[] seqs = parser.getSeqsAsArray();
+    assertEquals(2, seqs.length);
+
+    /*
+     * B.taurus.4 has a trailing gap
+     * rendered as underscore in Stockholm annotation
+     */
+    assertEquals("AC.UGCGU.", seqs[0].getSequenceAsString());
+    AlignmentAnnotation[] anns = seqs[0].getAnnotation();
+    assertEquals(1, anns.length);
+    AlignmentAnnotation taurus4SS = anns[0];
+    assertEquals(9, taurus4SS.annotations.length);
+    assertEquals(" .", taurus4SS.annotations[0].displayCharacter);
+    assertNull(taurus4SS.annotations[2]); // gapped position
+    assertNull(taurus4SS.annotations[8]); // gapped position
+    assertEquals('(', taurus4SS.annotations[3].secondaryStructure);
+    assertEquals("(", taurus4SS.annotations[3].displayCharacter);
+    assertEquals(')', taurus4SS.annotations[7].secondaryStructure);
+    
+    /*
+     * output as Stockholm and verify it matches the original input
+     * (gaps output as underscore in annotation lines)
+     * note: roundtrip test works with the input lines ordered as above;
+     * can also parse in other orders, but then input doesn't match output
+     */
+    AlignmentFileWriterI afile = FileFormat.Stockholm
+            .getWriter(new Alignment(seqs));
+    String output = afile.print(seqs, false);
+    assertEquals(stoData, output);
+  }
  }
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 26 Feb 2018 16:32:51 +0000 (16:32 +0000)
src/jalview/io/StockholmFile.java		patch \| blob \| history
test/jalview/io/StockholmFileTest.java		patch \| blob \| history