2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26 import static org.testng.AssertJUnit.fail;
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.Annotation;
31 import jalview.datamodel.SequenceFeature;
32 import jalview.datamodel.SequenceI;
33 import jalview.gui.JvOptionPane;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.HashMap;
39 import java.util.List;
42 import org.testng.Assert;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
46 public class StockholmFileTest
49 @BeforeClass(alwaysRun = true)
50 public void setUpJvOptionPane()
52 JvOptionPane.setInteractiveMode(false);
53 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
56 static String PfamFile = "examples/PF00111_seed.stk",
57 RfamFile = "examples/RF00031_folded.stk",
58 RnaSSTestFile = "examples/rna_ss_test.stk";
60 @Test(groups = { "Functional" })
61 public void pfamFileIO() throws Exception
63 testFileIOwithFormat(new File(PfamFile), FileFormat.Stockholm, -1, 0,
67 @Test(groups = { "Functional" })
68 public void pfamFileDataExtraction() throws Exception
70 AppletFormatAdapter af = new AppletFormatAdapter();
71 AlignmentI al = af.readFile(PfamFile, DataSourceType.FILE,
72 new IdentifyFile().identify(PfamFile, DataSourceType.FILE));
74 for (SequenceI sq : al.getSequences())
76 if (sq.getAllPDBEntries() != null)
78 numpdb += sq.getAllPDBEntries().size();
82 "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
86 @Test(groups = { "Functional" })
87 public void rfamFileIO() throws Exception
89 testFileIOwithFormat(new File(RfamFile), FileFormat.Stockholm, 2, 1,
94 * test alignment data in given file can be imported, exported and reimported
98 * - source datafile (IdentifyFile.identify() should work with it)
100 * - label for IO class used to write and read back in the data from
102 * @param ignoreFeatures
103 * @param ignoreRowVisibility
104 * @param allowNullAnnotations
107 public static void testFileIOwithFormat(File f, FileFormatI ioformat,
108 int naliannot, int nminseqann, boolean ignoreFeatures,
109 boolean ignoreRowVisibility, boolean allowNullAnnotations)
111 System.out.println("Reading file: " + f);
112 String ff = f.getPath();
115 AppletFormatAdapter rf = new AppletFormatAdapter();
117 AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
118 new IdentifyFile().identify(ff, DataSourceType.FILE));
120 assertNotNull("Couldn't read supplied alignment data.", al);
122 // make sure dataset is initialised ? not sure about this
123 for (int i = 0; i < al.getSequencesArray().length; ++i)
125 al.getSequenceAt(i).createDatasetSequence();
127 String outputfile = rf.formatSequences(ioformat, al, true);
128 System.out.println("Output file in '" + ioformat + "':\n"
129 + outputfile + "\n<<EOF\n");
130 // test for consistency in io
131 AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
132 DataSourceType.PASTE, ioformat);
133 assertNotNull("Couldn't parse reimported alignment data.", al_input);
135 FileFormatI identifyoutput = new IdentifyFile().identify(outputfile,
136 DataSourceType.PASTE);
137 assertNotNull("Identify routine failed for outputformat " + ioformat,
140 "Identify routine could not recognise output generated by '"
141 + ioformat + "' writer",
142 ioformat.equals(identifyoutput));
143 testAlignmentEquivalence(al, al_input, ignoreFeatures,
144 ignoreRowVisibility, allowNullAnnotations);
145 int numaliannot = 0, numsqswithali = 0;
146 for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
148 if (ala.sequenceRef == null)
159 assertEquals("Number of alignment annotations", naliannot,
164 "Number of sequence associated annotations wasn't at least "
165 + nminseqann, numsqswithali >= nminseqann);
167 } catch (Exception e)
170 assertTrue("Couln't format the alignment for output file.", false);
175 * assert alignment equivalence
180 * 'secondary' or generated alignment from some datapreserving
182 * @param ignoreFeatures
183 * when true, differences in sequence feature annotation are ignored
185 public static void testAlignmentEquivalence(AlignmentI al,
186 AlignmentI al_input, boolean ignoreFeatures)
188 testAlignmentEquivalence(al, al_input, ignoreFeatures, false, false);
192 * assert alignment equivalence - uses special comparators for RNA structure
198 * 'secondary' or generated alignment from some datapreserving
200 * @param ignoreFeatures
201 * when true, differences in sequence feature annotation are ignored
203 * @param ignoreRowVisibility
204 * when true, do not fail if there are differences in the visibility
206 * @param allowNullAnnotation
207 * when true, positions in alignment annotation that are null will be
208 * considered equal to positions containing annotation where
209 * Annotation.isWhitespace() returns true.
212 public static void testAlignmentEquivalence(AlignmentI al,
213 AlignmentI al_input, boolean ignoreFeatures,
214 boolean ignoreRowVisibility, boolean allowNullAnnotation)
216 assertNotNull("Original alignment was null", al);
217 assertNotNull("Generated alignment was null", al_input);
219 assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
220 + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
221 + "x" + al_input.getWidth(),
222 al.getHeight() == al_input.getHeight()
223 && al.getWidth() == al_input.getWidth());
225 // check Alignment annotation
226 AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
227 AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
229 // note - at moment we do not distinguish between alignment without any
230 // annotation rows and alignment with no annotation row vector
231 // we might want to revise this in future
232 int aa_new_size = (aa_new == null ? 0 : aa_new.length);
233 int aa_original_size = (aa_original == null ? 0 : aa_original.length);
234 Map<Integer, BitSet> orig_groups = new HashMap<>();
235 Map<Integer, BitSet> new_groups = new HashMap<>();
237 if (aa_new != null && aa_original != null)
239 for (int i = 0; i < aa_original.length; i++)
241 if (aa_new.length > i)
243 assertEqualSecondaryStructure(
244 "Different alignment annotation at position " + i,
245 aa_original[i], aa_new[i], allowNullAnnotation);
246 // compare graphGroup or graph properties - needed to verify JAL-1299
247 assertEquals("Graph type not identical.", aa_original[i].graph,
249 if (!ignoreRowVisibility)
251 assertEquals("Visibility not identical.",
252 aa_original[i].visible,
255 assertEquals("Threshold line not identical.",
256 aa_original[i].threshold, aa_new[i].threshold);
257 // graphGroup may differ, but pattern should be the same
258 Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
259 Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
260 BitSet orig_g = orig_groups.get(o_ggrp);
261 BitSet new_g = new_groups.get(n_ggrp);
264 orig_groups.put(o_ggrp, orig_g = new BitSet());
268 new_groups.put(n_ggrp, new_g = new BitSet());
270 assertEquals("Graph Group pattern differs at annotation " + i,
277 System.err.println("No matching annotation row for "
278 + aa_original[i].toString());
283 "Generated and imported alignment have different annotation sets",
284 aa_original_size, aa_new_size);
286 // check sequences, annotation and features
287 SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
288 seq_original = al.getSequencesArray();
289 SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
290 seq_new = al_input.getSequencesArray();
291 List<SequenceFeature> sequenceFeatures_original;
292 List<SequenceFeature> sequenceFeatures_new;
293 AlignmentAnnotation annot_original, annot_new;
295 for (int i = 0; i < al.getSequencesArray().length; i++)
297 String name = seq_original[i].getName();
298 int start = seq_original[i].getStart();
299 int end = seq_original[i].getEnd();
300 System.out.println("Check sequence: " + name + "/" + start + "-"
303 // search equal sequence
304 for (int in = 0; in < al_input.getSequencesArray().length; in++)
306 if (name.equals(seq_new[in].getName())
307 && start == seq_new[in].getStart()
308 && end == seq_new[in].getEnd())
310 String ss_original = seq_original[i].getSequenceAsString();
311 String ss_new = seq_new[in].getSequenceAsString();
312 assertEquals("The sequences " + name + "/" + start + "-" + end
313 + " are not equal", ss_original, ss_new);
316 "Sequence Features were not equivalent"
317 + (ignoreFeatures ? " ignoring." : ""),
319 || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
320 .getSequenceFeatures() == null)
321 || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
322 .getSequenceFeatures() != null));
323 // compare sequence features
324 if (seq_original[i].getSequenceFeatures() != null
325 && seq_new[in].getSequenceFeatures() != null)
327 System.out.println("There are feature!!!");
328 sequenceFeatures_original = seq_original[i]
329 .getSequenceFeatures();
330 sequenceFeatures_new = seq_new[in].getSequenceFeatures();
332 assertEquals("different number of features", seq_original[i]
333 .getSequenceFeatures().size(), seq_new[in]
334 .getSequenceFeatures().size());
336 for (int feat = 0; feat < seq_original[i].getSequenceFeatures()
339 assertEquals("Different features",
340 sequenceFeatures_original.get(feat),
341 sequenceFeatures_new.get(feat));
344 // compare alignment annotation
345 if (al.getSequenceAt(i).getAnnotation() != null
346 && al_input.getSequenceAt(in).getAnnotation() != null)
348 for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
350 if (al.getSequenceAt(i).getAnnotation()[j] != null
351 && al_input.getSequenceAt(in).getAnnotation()[j] != null)
353 annot_original = al.getSequenceAt(i).getAnnotation()[j];
354 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
355 assertEqualSecondaryStructure(
356 "Different annotation elements", annot_original,
357 annot_new, allowNullAnnotation);
361 else if (al.getSequenceAt(i).getAnnotation() == null
362 && al_input.getSequenceAt(in).getAnnotation() == null)
364 System.out.println("No annotations");
366 else if (al.getSequenceAt(i).getAnnotation() != null
367 && al_input.getSequenceAt(in).getAnnotation() == null)
369 fail("Annotations differed between sequences ("
370 + al.getSequenceAt(i).getName() + ") and ("
371 + al_input.getSequenceAt(i).getName() + ")");
380 * compare two annotation rows, with special support for secondary structure
381 * comparison. With RNA, only the value and the secondaryStructure symbols are
382 * compared, displayCharacter and description are ignored. Annotations where
383 * Annotation.isWhitespace() is true are always considered equal.
386 * - not actually used yet..
388 * - the original annotation
390 * - the one compared to the original annotation
391 * @param allowNullEquivalence
392 * when true, positions in alignment annotation that are null will be
393 * considered equal to non-null positions for which
394 * Annotation.isWhitespace() is true.
396 private static void assertEqualSecondaryStructure(String message,
397 AlignmentAnnotation annot_or, AlignmentAnnotation annot_new,
398 boolean allowNullEqivalence)
400 // TODO: test to cover this assert behaves correctly for all allowed
401 // variations of secondary structure annotation row equivalence
402 if (annot_or.annotations.length != annot_new.annotations.length)
404 fail("Different lengths for annotation row elements: "
405 + annot_or.annotations.length + "!="
406 + annot_new.annotations.length);
408 boolean isRna = annot_or.isRNA();
409 assertTrue("Expected " + (isRna ? " valid RNA " : " no RNA ")
410 + " secondary structure in the row.",
411 isRna == annot_new.isRNA());
412 for (int i = 0; i < annot_or.annotations.length; i++)
414 Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
415 if (an_or != null && an_new != null)
420 if (an_or.secondaryStructure != an_new.secondaryStructure
421 || ((Float.isNaN(an_or.value) != Float
422 .isNaN(an_new.value)) || an_or.value != an_new.value))
424 fail("Different RNA secondary structure at column " + i
425 + " expected: [" + annot_or.annotations[i].toString()
426 + "] but got: [" + annot_new.annotations[i].toString()
432 // not RNA secondary structure, so expect all elements to match...
433 if ((an_or.isWhitespace() != an_new.isWhitespace())
434 || !an_or.displayCharacter.trim().equals(
435 an_new.displayCharacter.trim())
436 || !("" + an_or.secondaryStructure).trim().equals(
437 ("" + an_new.secondaryStructure).trim())
438 || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
439 .trim().length() == 0)
440 || (an_new.description == null && an_or.description
441 .trim().length() == 0) || an_or.description
442 .trim().equals(an_new.description.trim())))
443 || !((Float.isNaN(an_or.value) && Float
444 .isNaN(an_new.value)) || an_or.value == an_new.value))
446 fail("Annotation Element Mismatch\nElement " + i
447 + " in original: " + annot_or.annotations[i].toString()
448 + "\nElement " + i + " in new: "
449 + annot_new.annotations[i].toString());
453 else if (annot_or.annotations[i] == null
454 && annot_new.annotations[i] == null)
460 if (allowNullEqivalence)
462 if (an_or != null && an_or.isWhitespace())
467 if (an_new != null && an_new.isWhitespace())
472 // need also to test for null in one, non-SS annotation in other...
473 fail("Annotation Element Mismatch\nElement " + i + " in original: "
474 + (an_or == null ? "is null" : an_or.toString())
475 + "\nElement " + i + " in new: "
476 + (an_new == null ? "is null" : an_new.toString()));
482 * @see assertEqualSecondaryStructure - test if two secondary structure
483 * annotations are not equal
487 * @param allowNullEquivalence
489 public static void assertNotEqualSecondaryStructure(String message,
490 AlignmentAnnotation an_orig, AlignmentAnnotation an_new,
491 boolean allowNullEquivalence)
493 boolean thrown = false;
496 assertEqualSecondaryStructure("", an_orig, an_new,
497 allowNullEquivalence);
498 } catch (AssertionError af)
504 fail("Expected difference for [" + an_orig + "] and [" + an_new + "]");
507 private AlignmentAnnotation makeAnnot(Annotation ae)
509 return new AlignmentAnnotation("label", "description", new Annotation[]
513 @Test(groups={"Functional"})
514 public void testAnnotationEquivalence()
516 AlignmentAnnotation one = makeAnnot(new Annotation("", "", ' ', 1));
517 AlignmentAnnotation anotherOne = makeAnnot(new Annotation("", "", ' ',
519 AlignmentAnnotation sheet = makeAnnot(new Annotation("","",'E',0f));
520 AlignmentAnnotation anotherSheet = makeAnnot(new Annotation("","",'E',0f));
521 AlignmentAnnotation sheetWithLabel = makeAnnot(new Annotation("1", "",
523 AlignmentAnnotation anotherSheetWithLabel = makeAnnot(new Annotation(
525 AlignmentAnnotation rnaNoDC = makeAnnot(new Annotation("","",'<',0f));
526 AlignmentAnnotation anotherRnaNoDC = makeAnnot(new Annotation("","",'<',0f));
527 AlignmentAnnotation rnaWithDC = makeAnnot(new Annotation("B", "", '<',
529 AlignmentAnnotation anotherRnaWithDC = makeAnnot(new Annotation("B",
532 // check self equivalence
533 for (boolean allowNull : new boolean[] { true, false })
535 assertEqualSecondaryStructure("Should be equal", one, anotherOne,
537 assertEqualSecondaryStructure("Should be equal", sheet, anotherSheet,
539 assertEqualSecondaryStructure("Should be equal", sheetWithLabel,
540 anotherSheetWithLabel, allowNull);
541 assertEqualSecondaryStructure("Should be equal", rnaNoDC,
542 anotherRnaNoDC, allowNull);
543 assertEqualSecondaryStructure("Should be equal", rnaWithDC,
544 anotherRnaWithDC, allowNull);
545 // display character doesn't matter for RNA structure (for 2.10.2)
546 assertEqualSecondaryStructure("Should be equal", rnaWithDC, rnaNoDC,
548 assertEqualSecondaryStructure("Should be equal", rnaNoDC, rnaWithDC,
552 // verify others are different
553 List<AlignmentAnnotation> aaSet = Arrays.asList(one, sheet,
554 sheetWithLabel, rnaWithDC);
555 for (int p = 0; p < aaSet.size(); p++)
557 for (int q = 0; q < aaSet.size(); q++)
561 assertNotEqualSecondaryStructure("Should be different",
562 aaSet.get(p), aaSet.get(q), false);
566 assertEqualSecondaryStructure("Should be same", aaSet.get(p),
567 aaSet.get(q), false);
568 assertEqualSecondaryStructure("Should be same", aaSet.get(p),
570 assertNotEqualSecondaryStructure(
571 "Should be different to empty anot", aaSet.get(p),
572 makeAnnot(Annotation.EMPTY_ANNOTATION), false);
573 assertNotEqualSecondaryStructure(
574 "Should be different to empty annot",
575 makeAnnot(Annotation.EMPTY_ANNOTATION), aaSet.get(q),
577 assertNotEqualSecondaryStructure("Should be different to null",
578 aaSet.get(p), makeAnnot(null), false);
579 assertNotEqualSecondaryStructure("Should be different to null",
580 makeAnnot(null), aaSet.get(q), true);
589 String aliFile = ">Dm\nAAACCCUUUUACACACGGGAAAGGG";
590 String annFile = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
591 + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
592 + "(|(|(|(|, .|, .|, .|, .|)|)|)|)|\t0.0\nROWPROPERTIES\t"
593 + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
595 String annFileCurlyWuss = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
596 + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
597 + "(|(|(|(||{|{||{|{||)|)|)|)||}|}|}|}|\t0.0\nROWPROPERTIES\t"
598 + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
599 String annFileFullWuss = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
600 + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
601 + "(|(|(|(||{|{||[|[||)|)|)|)||}|}|]|]|\t0.0\nROWPROPERTIES\t"
602 + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
604 @Test(groups = { "Functional" })
605 public void secondaryStructureForRNASequence() throws Exception
607 roundTripSSForRNA(aliFile, annFile);
610 @Test(groups = { "Functional" })
611 public void curlyWUSSsecondaryStructureForRNASequence() throws Exception
613 roundTripSSForRNA(aliFile, annFileCurlyWuss);
616 @Test(groups = { "Functional" })
617 public void fullWUSSsecondaryStructureForRNASequence() throws Exception
619 roundTripSSForRNA(aliFile, annFileFullWuss);
622 @Test(groups = { "Functional" })
623 public void detectWussBrackets()
625 for (char ch : new char[] { '{', '}', '[', ']', '(', ')', '<', '>' })
627 Assert.assertTrue(StockholmFile.DETECT_BRACKETS.matchAt("" + ch, 0),
628 "Didn't recognise " + ch + " as a WUSS bracket");
630 for (char ch : new char[] { '@', '!', 'V', 'Q', '*', ' ', '-', '.' })
632 Assert.assertFalse(StockholmFile.DETECT_BRACKETS.matchAt("" + ch, 0),
633 "Shouldn't recognise " + ch + " as a WUSS bracket");
636 private static void roundTripSSForRNA(String aliFile, String annFile)
639 AlignmentI al = new AppletFormatAdapter().readFile(aliFile,
640 DataSourceType.PASTE, jalview.io.FileFormat.Fasta);
641 AnnotationFile aaf = new AnnotationFile();
642 aaf.readAnnotationFile(al, annFile, DataSourceType.PASTE);
643 al.getAlignmentAnnotation()[0].visible = true;
645 // TODO: create a better 'save as <format>' pattern
646 StockholmFile sf = new StockholmFile(al);
648 String stockholmFile = sf.print(al.getSequencesArray(), true);
650 AlignmentI newAl = new AppletFormatAdapter().readFile(stockholmFile,
651 DataSourceType.PASTE, jalview.io.FileFormat.Stockholm);
652 // AlignmentUtils.showOrHideSequenceAnnotations(newAl.getViewport()
653 // .getAlignment(), Arrays.asList("Secondary Structure"), newAl
654 // .getViewport().getAlignment().getSequences(), true, true);
655 testAlignmentEquivalence(al, newAl, true, true, true);