From fdf1d81fa8cd498d17b959427930e3fc0bc85249 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 29 Mar 2018 10:47:17 +0100 Subject: [PATCH] JAL-2907 bug fix redone on HMMER branch --- src/jalview/io/StockholmFile.java | 16 +++-- test/jalview/io/StockholmFileTest.java | 106 ++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java index 09c5b25..58b171d 100644 --- a/src/jalview/io/StockholmFile.java +++ b/src/jalview/io/StockholmFile.java @@ -79,6 +79,8 @@ public class StockholmFile extends AlignFile { private static final String ANNOTATION = "annotation"; + private static final char UNDERSCORE = '_'; + // private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "("); // private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")"); @@ -111,14 +113,14 @@ public class StockholmFile extends AlignFile featureTypes.put("RF", "Reference Positions"); } - AlignmentI al; + private AlignmentI al; public StockholmFile() { } /** - * Creates a new StockholmFile object for output. + * Creates a new StockholmFile object for output */ public StockholmFile(AlignmentI al) { @@ -903,6 +905,10 @@ public class StockholmFile extends AlignFile for (int i = 0; i < annots.length(); i++) { String pos = annots.substring(i, i + 1); + if (UNDERSCORE == pos.charAt(0)) + { + pos = " "; + } Annotation ann; ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not // be written out @@ -1171,7 +1177,7 @@ public class StockholmFile extends AlignFile * @param ann * @param sequenceI */ - private char getAnnotationCharacter(String key, int k, Annotation annot, + static char getAnnotationCharacter(String key, int k, Annotation annot, SequenceI sequenceI) { char seq = ' '; @@ -1183,8 +1189,8 @@ public class StockholmFile extends AlignFile { if (annot == null) { - // sensible gap character - return ' '; + // Stockholm format requires underscore, not space + return UNDERSCORE; } else { diff --git a/test/jalview/io/StockholmFileTest.java b/test/jalview/io/StockholmFileTest.java index 14050e8..7cc5258 100644 --- a/test/jalview/io/StockholmFileTest.java +++ b/test/jalview/io/StockholmFileTest.java @@ -37,6 +37,7 @@ import jalview.datamodel.SequenceI; import jalview.gui.JvOptionPane; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; @@ -822,4 +823,109 @@ public class StockholmFileTest //@formatter:on assertEquals(expected, output); } + + @Test(groups = "Functional") + public void testOutputCharacter() + { + SequenceI seq = new Sequence("seq", "abc--def-"); + + Annotation[] ann = new Annotation[8]; + ann[1] = new Annotation("Z", "desc", 'E', 1f); + ann[2] = new Annotation("Q", "desc", ' ', 1f); + ann[4] = new Annotation("", "desc", 'E', 1f); + ann[6] = new Annotation("ZH", "desc", 'E', 1f); + + /* + * null annotation in column (not Secondary Structure annotation) + * should answer sequence character, or '-' if null sequence + */ + assertEquals('-', + StockholmFile.getAnnotationCharacter("RF", 0, ann[0], null)); + assertEquals('d', + StockholmFile.getAnnotationCharacter("RF", 5, ann[5], seq)); + assertEquals('-', + StockholmFile.getAnnotationCharacter("RF", 8, null, seq)); + + /* + * null annotation in column (SS annotation) should answer underscore + */ + assertEquals('_', + StockholmFile.getAnnotationCharacter("SS", 0, ann[0], seq)); + + /* + * SS secondary structure symbol + */ + assertEquals('E', + StockholmFile.getAnnotationCharacter("SS", 1, ann[1], seq)); + + /* + * no SS symbol, use label instead + */ + assertEquals('Q', + StockholmFile.getAnnotationCharacter("SS", 2, ann[2], seq)); + + /* + * SS with 2 character label - second character overrides SS symbol + */ + assertEquals('H', + StockholmFile.getAnnotationCharacter("SS", 6, ann[6], seq)); + + /* + * empty display character, not SS - answers '.' + */ + assertEquals('.', + StockholmFile.getAnnotationCharacter("RF", 4, ann[4], seq)); + } + + /** + * Test to verify that gaps are input/output as underscore in STO annotation + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testRoundtripWithGaps() throws IOException + { + /* + * small extract from RF00031_folded.stk + */ + // @formatter:off + String stoData = + "# STOCKHOLM 1.0\n" + + "#=GR B.taurus.4 SS .._((.))_\n" + + "B.taurus.4 AC.UGCGU.\n" + + "#=GR B.taurus.5 SS ..((_._))\n" + + "B.taurus.5 ACUU.G.CG\n" + + "//\n"; + // @formatter:on + StockholmFile parser = new StockholmFile(stoData, DataSourceType.PASTE); + SequenceI[] seqs = parser.getSeqsAsArray(); + assertEquals(2, seqs.length); + + /* + * B.taurus.4 has a trailing gap + * rendered as underscore in Stockholm annotation + */ + assertEquals("AC.UGCGU.", seqs[0].getSequenceAsString()); + AlignmentAnnotation[] anns = seqs[0].getAnnotation(); + assertEquals(1, anns.length); + AlignmentAnnotation taurus4SS = anns[0]; + assertEquals(9, taurus4SS.annotations.length); + assertEquals(" .", taurus4SS.annotations[0].displayCharacter); + assertNull(taurus4SS.annotations[2]); // gapped position + assertNull(taurus4SS.annotations[8]); // gapped position + assertEquals('(', taurus4SS.annotations[3].secondaryStructure); + assertEquals("(", taurus4SS.annotations[3].displayCharacter); + assertEquals(')', taurus4SS.annotations[7].secondaryStructure); + + /* + * output as Stockholm and verify it matches the original input + * (gaps output as underscore in annotation lines) + * note: roundtrip test works with the input lines ordered as above; + * can also parse in other orders, but then input doesn't match output + */ + AlignmentFileWriterI afile = FileFormat.Stockholm + .getWriter(new Alignment(seqs)); + String output = afile.print(seqs, false); + assertEquals(stoData, output); + } } -- 1.7.10.2