import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Vector;
import com.stevesoft.pat.Regex;
*/
public class StockholmFile extends AlignFile
{
- private static final String ANNOTATION = "annotation";
+ private static final char UNDERSCORE = '_';
- private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "(");
+ private static final String ANNOTATION = "annotation";
- private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")");
+ // private static final Regex OPEN_PAREN = new Regex("(<|\\[)", "(");
+ //
+ // private static final Regex CLOSE_PAREN = new Regex("(>|\\])", ")");
public static final Regex DETECT_BRACKETS = new Regex(
"(<|>|\\[|\\]|\\(|\\)|\\{|\\})");
String version;
// String id;
Hashtable seqAnn = new Hashtable(); // Sequence related annotations
- LinkedHashMap<String, String> seqs = new LinkedHashMap<String, String>();
+ LinkedHashMap<String, String> seqs = new LinkedHashMap<>();
Regex p, r, rend, s, x;
// Temporary line for processing RNA annotation
// String RNAannot = "";
strucAnn = new Hashtable();
}
- Vector<AlignmentAnnotation> newStruc = new Vector<AlignmentAnnotation>();
+ Vector<AlignmentAnnotation> newStruc = new Vector<>();
parseAnnotationRow(newStruc, type, ns);
for (AlignmentAnnotation alan : newStruc)
{
private void guessDatabaseFor(Sequence seqO, String dbr, String dbsource)
{
DBRefEntry dbrf = null;
- List<DBRefEntry> dbrs = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> dbrs = new ArrayList<>();
String seqdb = "Unknown", sdbac = "" + dbr;
int st = -1, en = -1, p;
if ((st = sdbac.indexOf("/")) > -1)
for (int i = 0; i < annots.length(); i++)
{
String pos = annots.substring(i, i + 1);
+ if (UNDERSCORE == pos.charAt(0))
+ {
+ pos = " ";
+ }
Annotation ann;
ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
// be written out
.form("#=GS " + idd.toString() + " "));
if (type.contains("PFAM") || type.contains("RFAM"))
{
-
out.append(" AC " + type.substring(type.indexOf(";") + 1));
}
else
String seq = "";
for (int k = 0; k < ann.length; k++)
{
- seq += outputCharacter(key, k, isrna, ann, s[i]);
+ seq += outputCharacter(key, k, ann, s[i]);
}
out.append(seq);
out.append(newline);
out.append(
new Format("%-" + maxid + "s").form("#=GC " + label + " "));
- boolean isrna = aa.isValidStruc();
for (int j = 0; j < aa.annotations.length; j++)
{
- seq += outputCharacter(key, j, isrna, aa.annotations, null);
+ seq += outputCharacter(key, j, aa.annotations, null);
}
out.append(seq);
out.append(newline);
*
* @param seq
* @param key
- * @param k
- * @param isrna
+ * @param column
* @param ann
* @param sequenceI
*/
- private char outputCharacter(String key, int k, boolean isrna,
- Annotation[] ann, SequenceI sequenceI)
+ static char outputCharacter(String key, int column, Annotation[] ann,
+ SequenceI sequenceI)
{
- char seq = ' ';
- Annotation annot = ann[k];
+ Annotation annot = column >= ann.length ? null : ann[column];
String ch = (annot == null)
? ((sequenceI == null) ? "-"
- : Character.toString(sequenceI.getCharAt(k)))
+ : Character.toString(sequenceI.getCharAt(column)))
: annot.displayCharacter;
- if (key != null && key.equals("SS"))
+
+ if ("SS".equals(key))
{
if (annot == null)
{
- // sensible gap character
- return ' ';
+ // whitespace not allowed in annotation
+ return UNDERSCORE;
}
else
{
}
}
+ char seq = '0';
if (ch.length() == 0)
{
seq = '.';
{
seq = ch.charAt(0);
}
- else if (ch.length() > 1)
+ else
{
seq = ch.charAt(1);
}
return out.toString();
}
- private static Hashtable typeIds = null;
+ private static Map<String, String> typeIds = null;
static
{
if (typeIds == null)
{
- typeIds = new Hashtable();
+ typeIds = new Hashtable<>();
typeIds.put("SS", "Secondary Structure");
typeIds.put("SA", "Surface Accessibility");
typeIds.put("TM", "transmembrane");
typeIds.put("DR", "reference");
typeIds.put("LO", "look");
typeIds.put("RF", "Reference Positions");
-
}
}
{
if (typeIds.containsKey(id))
{
- return (String) typeIds.get(id);
+ return typeIds.get(id);
}
System.err.println(
"Warning : Unknown Stockholm annotation type code " + id);
protected static String type2id(String type)
{
String key = null;
- Enumeration e = typeIds.keys();
- while (e.hasMoreElements())
+ for (Entry<String, String> entry : typeIds.entrySet())
{
- Object ll = e.nextElement();
- if (typeIds.get(ll).toString().equalsIgnoreCase(type))
+ if (entry.getValue().equalsIgnoreCase(type))
{
- key = (String) ll;
- break;
+ return entry.getKey();
}
}
- if (key != null)
- {
- return key;
- }
System.err.println(
"Warning : Unknown Stockholm annotation type: " + type);
return key;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
+import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.gui.JvOptionPane;
import java.io.File;
+import java.io.IOException;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
// we might want to revise this in future
int aa_new_size = (aa_new == null ? 0 : aa_new.length);
int aa_original_size = (aa_original == null ? 0 : aa_original.length);
- Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
- Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
+ Map<Integer, BitSet> orig_groups = new HashMap<>();
+ Map<Integer, BitSet> new_groups = new HashMap<>();
if (aa_new != null && aa_original != null)
{
testAlignmentEquivalence(al, newAl, true, true, true);
}
+
+ @Test(groups = "Functional")
+ public void testType2id()
+ {
+ assertEquals("OS", StockholmFile.type2id("organism"));
+ // not case-sensitive:
+ assertEquals("OS", StockholmFile.type2id("Organism"));
+ // is space-sensitive:
+ assertNull(StockholmFile.type2id("Organism "));
+ assertNull(StockholmFile.type2id("orgasm"));
+ }
+
+ @Test(groups = "Functional")
+ public void testOutputCharacter()
+ {
+ SequenceI seq = new Sequence("seq", "abc--def-");
+
+ Annotation[] ann = new Annotation[8];
+ ann[1] = new Annotation("Z", "desc", 'E', 1f);
+ ann[2] = new Annotation("Q", "desc", ' ', 1f);
+ ann[4] = new Annotation("", "desc", 'E', 1f);
+ ann[6] = new Annotation("ZH", "desc", 'E', 1f);
+
+ /*
+ * null annotation in column (not Secondary Structure annotation)
+ * should answer sequence character, or '-' if null sequence
+ */
+ assertEquals('-', StockholmFile.outputCharacter("RF", 0, ann, null));
+ assertEquals('d', StockholmFile.outputCharacter("RF", 5, ann, seq));
+ assertEquals('-', StockholmFile.outputCharacter("RF", 8, ann, seq));
+
+ /*
+ * null annotation in column (SS annotation) should answer underscore
+ */
+ assertEquals('_', StockholmFile.outputCharacter("SS", 0, ann, seq));
+
+ /*
+ * SS secondary structure symbol
+ */
+ assertEquals('E', StockholmFile.outputCharacter("SS", 1, ann, seq));
+
+ /*
+ * no SS symbol, use label instead
+ */
+ assertEquals('Q', StockholmFile.outputCharacter("SS", 2, ann, seq));
+
+ /*
+ * SS with 2 character label - second character overrides SS symbol
+ */
+ assertEquals('H', StockholmFile.outputCharacter("SS", 6, ann, seq));
+
+ /*
+ * empty display character, not SS - answers '.'
+ */
+ assertEquals('.', StockholmFile.outputCharacter("RF", 4, ann, seq));
+ }
+
+ /**
+ * Test to verify that gaps are input/output as underscore in STO annotation
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testRoundtripWithGaps() throws IOException
+ {
+ /*
+ * small extract from RF00031_folded.stk
+ */
+ // @formatter:off
+ String stoData =
+ "# STOCKHOLM 1.0\n" +
+ "#=GR B.taurus.4 SS .._((.))_\n" +
+ "B.taurus.4 AC.UGCGU.\n" +
+ "#=GR B.taurus.5 SS ..((_._))\n" +
+ "B.taurus.5 ACUU.G.CG\n" +
+ "//\n";
+ // @formatter:on
+ StockholmFile parser = new StockholmFile(stoData, DataSourceType.PASTE);
+ SequenceI[] seqs = parser.getSeqsAsArray();
+ assertEquals(2, seqs.length);
+
+ /*
+ * B.taurus.4 has a trailing gap
+ * rendered as underscore in Stockholm annotation
+ */
+ assertEquals("AC.UGCGU.", seqs[0].getSequenceAsString());
+ AlignmentAnnotation[] anns = seqs[0].getAnnotation();
+ assertEquals(1, anns.length);
+ AlignmentAnnotation taurus4SS = anns[0];
+ assertEquals(9, taurus4SS.annotations.length);
+ assertEquals(" .", taurus4SS.annotations[0].displayCharacter);
+ assertNull(taurus4SS.annotations[2]); // gapped position
+ assertNull(taurus4SS.annotations[8]); // gapped position
+ assertEquals('(', taurus4SS.annotations[3].secondaryStructure);
+ assertEquals("(", taurus4SS.annotations[3].displayCharacter);
+ assertEquals(')', taurus4SS.annotations[7].secondaryStructure);
+
+ /*
+ * output as Stockholm and verify it matches the original input
+ * (gaps output as underscore in annotation lines)
+ * note: roundtrip test works with the input lines ordered as above;
+ * can also parse in other orders, but then input doesn't match output
+ */
+ AlignmentFileWriterI afile = FileFormat.Stockholm
+ .getWriter(new Alignment(seqs));
+ String output = afile.print(seqs, false);
+ assertEquals(stoData, output);
+ }
}