From 7489d510fdcef2a8e92698cc0bfe125bdcbb3278 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 26 Feb 2018 16:32:51 +0000 Subject: [PATCH] JAL-2907 read/write underscore for annotation at gap positions --- src/jalview/io/StockholmFile.java | 67 +++++++++--------- test/jalview/io/StockholmFileTest.java | 116 +++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 37 deletions(-) diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java index f5b5177..0388fda 100644 --- a/src/jalview/io/StockholmFile.java +++ b/src/jalview/io/StockholmFile.java @@ -46,6 +46,7 @@ import java.util.Hashtable; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Vector; import com.stevesoft.pat.Regex; @@ -74,11 +75,13 @@ import fr.orsay.lri.varna.models.rna.RNA; */ public class StockholmFile extends AlignFile { - private static final String ANNOTATION = "annotation"; + private static final char UNDERSCORE = '_'; - private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "("); + private static final String ANNOTATION = "annotation"; - private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")"); + // private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "("); + // + // private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")"); public static final Regex DETECT_BRACKETS = new Regex( "(<|>|\\[|\\]|\\(|\\)|\\{|\\})"); @@ -197,7 +200,7 @@ public class StockholmFile extends AlignFile String version; // String id; Hashtable seqAnn = new Hashtable(); // Sequence related annotations - LinkedHashMap seqs = new LinkedHashMap(); + LinkedHashMap seqs = new LinkedHashMap<>(); Regex p, r, rend, s, x; // Temporary line for processing RNA annotation // String RNAannot = ""; @@ -658,7 +661,7 @@ public class StockholmFile extends AlignFile strucAnn = new Hashtable(); } - Vector newStruc = new Vector(); + Vector newStruc = new Vector<>(); parseAnnotationRow(newStruc, type, ns); for (AlignmentAnnotation alan : newStruc) { @@ -710,7 +713,7 @@ public class StockholmFile extends AlignFile private void guessDatabaseFor(Sequence seqO, String dbr, String dbsource) { DBRefEntry dbrf = null; - List dbrs = new ArrayList(); + List dbrs = new ArrayList<>(); String seqdb = "Unknown", sdbac = "" + dbr; int st = -1, en = -1, p; if ((st = sdbac.indexOf("/")) > -1) @@ -837,6 +840,10 @@ public class StockholmFile extends AlignFile for (int i = 0; i < annots.length(); i++) { String pos = annots.substring(i, i + 1); + if (UNDERSCORE == pos.charAt(0)) + { + pos = " "; + } Annotation ann; ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not // be written out @@ -985,7 +992,6 @@ public class StockholmFile extends AlignFile .form("#=GS " + idd.toString() + " ")); if (type.contains("PFAM") || type.contains("RFAM")) { - out.append(" AC " + type.substring(type.indexOf(";") + 1)); } else @@ -1028,7 +1034,7 @@ public class StockholmFile extends AlignFile String seq = ""; for (int k = 0; k < ann.length; k++) { - seq += outputCharacter(key, k, isrna, ann, s[i]); + seq += outputCharacter(key, k, ann, s[i]); } out.append(seq); out.append(newline); @@ -1080,10 +1086,9 @@ public class StockholmFile extends AlignFile out.append( new Format("%-" + maxid + "s").form("#=GC " + label + " ")); - boolean isrna = aa.isValidStruc(); for (int j = 0; j < aa.annotations.length; j++) { - seq += outputCharacter(key, j, isrna, aa.annotations, null); + seq += outputCharacter(key, j, aa.annotations, null); } out.append(seq); out.append(newline); @@ -1101,26 +1106,25 @@ public class StockholmFile extends AlignFile * * @param seq * @param key - * @param k - * @param isrna + * @param column * @param ann * @param sequenceI */ - private char outputCharacter(String key, int k, boolean isrna, - Annotation[] ann, SequenceI sequenceI) + static char outputCharacter(String key, int column, Annotation[] ann, + SequenceI sequenceI) { - char seq = ' '; - Annotation annot = ann[k]; + Annotation annot = column >= ann.length ? null : ann[column]; String ch = (annot == null) ? ((sequenceI == null) ? "-" - : Character.toString(sequenceI.getCharAt(k))) + : Character.toString(sequenceI.getCharAt(column))) : annot.displayCharacter; - if (key != null && key.equals("SS")) + + if ("SS".equals(key)) { if (annot == null) { - // sensible gap character - return ' '; + // whitespace not allowed in annotation + return UNDERSCORE; } else { @@ -1132,6 +1136,7 @@ public class StockholmFile extends AlignFile } } + char seq = '0'; if (ch.length() == 0) { seq = '.'; @@ -1140,7 +1145,7 @@ public class StockholmFile extends AlignFile { seq = ch.charAt(0); } - else if (ch.length() > 1) + else { seq = ch.charAt(1); } @@ -1159,13 +1164,13 @@ public class StockholmFile extends AlignFile return out.toString(); } - private static Hashtable typeIds = null; + private static Map typeIds = null; static { if (typeIds == null) { - typeIds = new Hashtable(); + typeIds = new Hashtable<>(); typeIds.put("SS", "Secondary Structure"); typeIds.put("SA", "Surface Accessibility"); typeIds.put("TM", "transmembrane"); @@ -1181,7 +1186,6 @@ public class StockholmFile extends AlignFile typeIds.put("DR", "reference"); typeIds.put("LO", "look"); typeIds.put("RF", "Reference Positions"); - } } @@ -1189,7 +1193,7 @@ public class StockholmFile extends AlignFile { if (typeIds.containsKey(id)) { - return (String) typeIds.get(id); + return typeIds.get(id); } System.err.println( "Warning : Unknown Stockholm annotation type code " + id); @@ -1199,20 +1203,13 @@ public class StockholmFile extends AlignFile protected static String type2id(String type) { String key = null; - Enumeration e = typeIds.keys(); - while (e.hasMoreElements()) + for (Entry entry : typeIds.entrySet()) { - Object ll = e.nextElement(); - if (typeIds.get(ll).toString().equalsIgnoreCase(type)) + if (entry.getValue().equalsIgnoreCase(type)) { - key = (String) ll; - break; + return entry.getKey(); } } - if (key != null) - { - return key; - } System.err.println( "Warning : Unknown Stockholm annotation type: " + type); return key; diff --git a/test/jalview/io/StockholmFileTest.java b/test/jalview/io/StockholmFileTest.java index 4273e6c..7187b9f 100644 --- a/test/jalview/io/StockholmFileTest.java +++ b/test/jalview/io/StockholmFileTest.java @@ -22,17 +22,21 @@ package jalview.io; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertTrue; import static org.testng.AssertJUnit.fail; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.JvOptionPane; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; @@ -230,8 +234,8 @@ public class StockholmFileTest // we might want to revise this in future int aa_new_size = (aa_new == null ? 0 : aa_new.length); int aa_original_size = (aa_original == null ? 0 : aa_original.length); - Map orig_groups = new HashMap(); - Map new_groups = new HashMap(); + Map orig_groups = new HashMap<>(); + Map new_groups = new HashMap<>(); if (aa_new != null && aa_original != null) { @@ -654,4 +658,112 @@ public class StockholmFileTest testAlignmentEquivalence(al, newAl, true, true, true); } + + @Test(groups = "Functional") + public void testType2id() + { + assertEquals("OS", StockholmFile.type2id("organism")); + // not case-sensitive: + assertEquals("OS", StockholmFile.type2id("Organism")); + // is space-sensitive: + assertNull(StockholmFile.type2id("Organism ")); + assertNull(StockholmFile.type2id("orgasm")); + } + + @Test(groups = "Functional") + public void testOutputCharacter() + { + SequenceI seq = new Sequence("seq", "abc--def-"); + + Annotation[] ann = new Annotation[8]; + ann[1] = new Annotation("Z", "desc", 'E', 1f); + ann[2] = new Annotation("Q", "desc", ' ', 1f); + ann[4] = new Annotation("", "desc", 'E', 1f); + ann[6] = new Annotation("ZH", "desc", 'E', 1f); + + /* + * null annotation in column (not Secondary Structure annotation) + * should answer sequence character, or '-' if null sequence + */ + assertEquals('-', StockholmFile.outputCharacter("RF", 0, ann, null)); + assertEquals('d', StockholmFile.outputCharacter("RF", 5, ann, seq)); + assertEquals('-', StockholmFile.outputCharacter("RF", 8, ann, seq)); + + /* + * null annotation in column (SS annotation) should answer underscore + */ + assertEquals('_', StockholmFile.outputCharacter("SS", 0, ann, seq)); + + /* + * SS secondary structure symbol + */ + assertEquals('E', StockholmFile.outputCharacter("SS", 1, ann, seq)); + + /* + * no SS symbol, use label instead + */ + assertEquals('Q', StockholmFile.outputCharacter("SS", 2, ann, seq)); + + /* + * SS with 2 character label - second character overrides SS symbol + */ + assertEquals('H', StockholmFile.outputCharacter("SS", 6, ann, seq)); + + /* + * empty display character, not SS - answers '.' + */ + assertEquals('.', StockholmFile.outputCharacter("RF", 4, ann, seq)); + } + + /** + * Test to verify that gaps are input/output as underscore in STO annotation + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testRoundtripWithGaps() throws IOException + { + /* + * small extract from RF00031_folded.stk + */ + // @formatter:off + String stoData = + "# STOCKHOLM 1.0\n" + + "#=GR B.taurus.4 SS .._((.))_\n" + + "B.taurus.4 AC.UGCGU.\n" + + "#=GR B.taurus.5 SS ..((_._))\n" + + "B.taurus.5 ACUU.G.CG\n" + + "//\n"; + // @formatter:on + StockholmFile parser = new StockholmFile(stoData, DataSourceType.PASTE); + SequenceI[] seqs = parser.getSeqsAsArray(); + assertEquals(2, seqs.length); + + /* + * B.taurus.4 has a trailing gap + * rendered as underscore in Stockholm annotation + */ + assertEquals("AC.UGCGU.", seqs[0].getSequenceAsString()); + AlignmentAnnotation[] anns = seqs[0].getAnnotation(); + assertEquals(1, anns.length); + AlignmentAnnotation taurus4SS = anns[0]; + assertEquals(9, taurus4SS.annotations.length); + assertEquals(" .", taurus4SS.annotations[0].displayCharacter); + assertNull(taurus4SS.annotations[2]); // gapped position + assertNull(taurus4SS.annotations[8]); // gapped position + assertEquals('(', taurus4SS.annotations[3].secondaryStructure); + assertEquals("(", taurus4SS.annotations[3].displayCharacter); + assertEquals(')', taurus4SS.annotations[7].secondaryStructure); + + /* + * output as Stockholm and verify it matches the original input + * (gaps output as underscore in annotation lines) + * note: roundtrip test works with the input lines ordered as above; + * can also parse in other orders, but then input doesn't match output + */ + AlignmentFileWriterI afile = FileFormat.Stockholm + .getWriter(new Alignment(seqs)); + String output = afile.print(seqs, false); + assertEquals(stoData, output); + } } -- 1.7.10.2