JAL-2507 spit out tests for ‘{‘ WUSS io from ‘[‘ WUSS io (which may be causing proble...
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26 import static org.testng.AssertJUnit.fail;
27
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.Annotation;
31 import jalview.datamodel.SequenceFeature;
32 import jalview.datamodel.SequenceI;
33 import jalview.gui.JvOptionPane;
34
35 import java.io.File;
36 import java.util.BitSet;
37 import java.util.HashMap;
38 import java.util.Map;
39
40 import org.testng.Assert;
41 import org.testng.annotations.BeforeClass;
42 import org.testng.annotations.Test;
43
44 public class StockholmFileTest
45 {
46
47   @BeforeClass(alwaysRun = true)
48   public void setUpJvOptionPane()
49   {
50     JvOptionPane.setInteractiveMode(false);
51     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
52   }
53
54   static String PfamFile = "examples/PF00111_seed.stk",
55           RfamFile = "examples/RF00031_folded.stk";
56
57   @Test(groups = { "Functional" })
58   public void pfamFileIO() throws Exception
59   {
60     testFileIOwithFormat(new File(PfamFile), FileFormat.Stockholm, -1, 0,
61             false);
62   }
63
64   @Test(groups = { "Functional" })
65   public void pfamFileDataExtraction() throws Exception
66   {
67     AppletFormatAdapter af = new AppletFormatAdapter();
68     AlignmentI al = af.readFile(PfamFile, DataSourceType.FILE,
69             new IdentifyFile().identify(PfamFile, DataSourceType.FILE));
70     int numpdb = 0;
71     for (SequenceI sq : al.getSequences())
72     {
73       if (sq.getAllPDBEntries() != null)
74       {
75         numpdb += sq.getAllPDBEntries().size();
76       }
77     }
78     assertTrue(
79             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
80             numpdb > 0);
81   }
82
83   @Test(groups = { "Functional" })
84   public void rfamFileIO() throws Exception
85   {
86     testFileIOwithFormat(new File(RfamFile), FileFormat.Stockholm, 2, 1,
87             false);
88   }
89
90   /**
91    * test alignment data in given file can be imported, exported and reimported
92    * with no dataloss
93    * 
94    * @param f
95    *          - source datafile (IdentifyFile.identify() should work with it)
96    * @param ioformat
97    *          - label for IO class used to write and read back in the data from
98    *          f
99    * @param ignoreRowVisibility
100    */
101
102   public static void testFileIOwithFormat(File f, FileFormatI ioformat,
103           int naliannot, int nminseqann, boolean ignoreRowVisibility)
104   {
105     System.out.println("Reading file: " + f);
106     String ff = f.getPath();
107     try
108     {
109       AppletFormatAdapter rf = new AppletFormatAdapter();
110
111       AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
112               new IdentifyFile().identify(ff, DataSourceType.FILE));
113
114       assertNotNull("Couldn't read supplied alignment data.", al);
115
116       // make sure dataset is initialised ? not sure about this
117       for (int i = 0; i < al.getSequencesArray().length; ++i)
118       {
119         al.getSequenceAt(i).createDatasetSequence();
120       }
121       String outputfile = rf.formatSequences(ioformat, al, true);
122       System.out.println("Output file in '" + ioformat + "':\n"
123               + outputfile + "\n<<EOF\n");
124       // test for consistency in io
125       AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
126               DataSourceType.PASTE, ioformat);
127       assertNotNull("Couldn't parse reimported alignment data.", al_input);
128
129       FileFormatI identifyoutput = new IdentifyFile().identify(outputfile,
130               DataSourceType.PASTE);
131       assertNotNull("Identify routine failed for outputformat " + ioformat,
132               identifyoutput);
133       assertTrue(
134               "Identify routine could not recognise output generated by '"
135                       + ioformat + "' writer",
136               ioformat.equals(identifyoutput));
137       testAlignmentEquivalence(al, al_input, false);
138       int numaliannot = 0, numsqswithali = 0;
139       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
140       {
141         if (ala.sequenceRef == null)
142         {
143           numaliannot++;
144         }
145         else
146         {
147           numsqswithali++;
148         }
149       }
150       if (naliannot > -1)
151       {
152         assertEquals("Number of alignment annotations", naliannot,
153                 numaliannot);
154       }
155
156       assertTrue(
157               "Number of sequence associated annotations wasn't at least "
158                       + nminseqann, numsqswithali >= nminseqann);
159
160     } catch (Exception e)
161     {
162       e.printStackTrace();
163       assertTrue("Couln't format the alignment for output file.", false);
164     }
165   }
166
167   /**
168    * assert alignment equivalence
169    * 
170    * @param al
171    *          'original'
172    * @param al_input
173    *          'secondary' or generated alignment from some datapreserving
174    *          transformation
175    * @param ignoreFeatures
176    *          when true, differences in sequence feature annotation are ignored
177    */
178   public static void testAlignmentEquivalence(AlignmentI al,
179           AlignmentI al_input, boolean ignoreFeatures)
180   {
181   }
182
183   /**
184    * assert alignment equivalence
185    * 
186    * @param al
187    *          'original'
188    * @param al_input
189    *          'secondary' or generated alignment from some datapreserving
190    *          transformation
191    * @param ignoreFeatures
192    *          when true, differences in sequence feature annotation are ignored
193    * 
194    * @param ignoreRowVisibility
195    *          when true, do not fail if there are differences in the visibility
196    *          of annotation rows
197    */
198   public static void testAlignmentEquivalence(AlignmentI al,
199           AlignmentI al_input, boolean ignoreFeatures,
200           boolean ignoreRowVisibility)
201   {
202     assertNotNull("Original alignment was null", al);
203     assertNotNull("Generated alignment was null", al_input);
204
205     assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
206             + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
207             + "x" + al_input.getWidth(),
208             al.getHeight() == al_input.getHeight()
209                     && al.getWidth() == al_input.getWidth());
210
211     // check Alignment annotation
212     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
213     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
214
215     // note - at moment we do not distinguish between alignment without any
216     // annotation rows and alignment with no annotation row vector
217     // we might want to revise this in future
218     int aa_new_size = (aa_new == null ? 0 : aa_new.length);
219     int aa_original_size = (aa_original == null ? 0 : aa_original.length);
220     Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
221     Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
222
223     if (aa_new != null && aa_original != null)
224     {
225       for (int i = 0; i < aa_original.length; i++)
226       {
227         if (aa_new.length > i)
228         {
229           assertEqualSecondaryStructure(
230                   "Different alignment annotation at position " + i,
231                   aa_original[i], aa_new[i]);
232           // compare graphGroup or graph properties - needed to verify JAL-1299
233           assertEquals("Graph type not identical.", aa_original[i].graph,
234                   aa_new[i].graph);
235           if (!ignoreRowVisibility)
236           {
237             assertEquals("Visibility not identical.",
238                     aa_original[i].visible,
239                   aa_new[i].visible);
240           }
241           assertEquals("Threshold line not identical.",
242                   aa_original[i].threshold, aa_new[i].threshold);
243           // graphGroup may differ, but pattern should be the same
244           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
245           Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
246           BitSet orig_g = orig_groups.get(o_ggrp);
247           BitSet new_g = new_groups.get(n_ggrp);
248           if (orig_g == null)
249           {
250             orig_groups.put(o_ggrp, orig_g = new BitSet());
251           }
252           if (new_g == null)
253           {
254             new_groups.put(n_ggrp, new_g = new BitSet());
255           }
256           assertEquals("Graph Group pattern differs at annotation " + i,
257                   orig_g, new_g);
258           orig_g.set(i);
259           new_g.set(i);
260         }
261         else
262         {
263           System.err.println("No matching annotation row for "
264                   + aa_original[i].toString());
265         }
266       }
267     }
268     assertEquals(
269             "Generated and imported alignment have different annotation sets",
270             aa_original_size, aa_new_size);
271
272     // check sequences, annotation and features
273     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
274     seq_original = al.getSequencesArray();
275     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
276     seq_new = al_input.getSequencesArray();
277     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
278     AlignmentAnnotation annot_original, annot_new;
279     //
280     for (int i = 0; i < al.getSequencesArray().length; i++)
281     {
282       String name = seq_original[i].getName();
283       int start = seq_original[i].getStart();
284       int end = seq_original[i].getEnd();
285       System.out.println("Check sequence: " + name + "/" + start + "-"
286               + end);
287
288       // search equal sequence
289       for (int in = 0; in < al_input.getSequencesArray().length; in++)
290       {
291         if (name.equals(seq_new[in].getName())
292                 && start == seq_new[in].getStart()
293                 && end == seq_new[in].getEnd())
294         {
295           String ss_original = seq_original[i].getSequenceAsString();
296           String ss_new = seq_new[in].getSequenceAsString();
297           assertEquals("The sequences " + name + "/" + start + "-" + end
298                   + " are not equal", ss_original, ss_new);
299
300           assertTrue(
301                   "Sequence Features were not equivalent"
302                           + (ignoreFeatures ? " ignoring." : ""),
303                   ignoreFeatures
304                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
305                                   .getSequenceFeatures() == null)
306                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
307                                   .getSequenceFeatures() != null));
308           // compare sequence features
309           if (seq_original[i].getSequenceFeatures() != null
310                   && seq_new[in].getSequenceFeatures() != null)
311           {
312             System.out.println("There are feature!!!");
313             sequenceFeatures_original = new SequenceFeature[seq_original[i]
314                     .getSequenceFeatures().length];
315             sequenceFeatures_original = seq_original[i]
316                     .getSequenceFeatures();
317             sequenceFeatures_new = new SequenceFeature[seq_new[in]
318                     .getSequenceFeatures().length];
319             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
320
321             assertEquals("different number of features",
322                     seq_original[i].getSequenceFeatures().length,
323                     seq_new[in].getSequenceFeatures().length);
324
325             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
326             {
327               assertEquals("Different features",
328                       sequenceFeatures_original[feat],
329                       sequenceFeatures_new[feat]);
330             }
331           }
332           // compare alignment annotation
333           if (al.getSequenceAt(i).getAnnotation() != null
334                   && al_input.getSequenceAt(in).getAnnotation() != null)
335           {
336             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
337             {
338               if (al.getSequenceAt(i).getAnnotation()[j] != null
339                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
340               {
341                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
342                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
343                 assertEqualSecondaryStructure(
344                         "Different annotation elements", annot_original,
345                         annot_new);
346               }
347             }
348           }
349           else if (al.getSequenceAt(i).getAnnotation() == null
350                   && al_input.getSequenceAt(in).getAnnotation() == null)
351           {
352             System.out.println("No annotations");
353           }
354           else if (al.getSequenceAt(i).getAnnotation() != null
355                   && al_input.getSequenceAt(in).getAnnotation() == null)
356           {
357             fail("Annotations differed between sequences ("
358                     + al.getSequenceAt(i).getName() + ") and ("
359                     + al_input.getSequenceAt(i).getName() + ")");
360           }
361           break;
362         }
363       }
364     }
365   }
366
367   private static void assertEqualSecondaryStructure(String message,
368           AlignmentAnnotation annot_or,
369           AlignmentAnnotation annot_new)
370   {
371     // TODO: test to cover this assert behaves correctly for all allowed
372     // variations of secondary structure annotation row equivalence
373     if (annot_or.annotations.length != annot_new.annotations.length)
374     {
375       fail("Different lengths for annotation row elements: "
376               + annot_or.annotations.length + "!="
377               + annot_new.annotations.length);
378     }
379     boolean isRna = annot_or.isRNA();
380     assertTrue("Expected " + (isRna ? " valid RNA " : " no RNA ")
381             + " secondary structure in the row.",
382             isRna == annot_new.isRNA());
383     for (int i = 0; i < annot_or.annotations.length; i++)
384     {
385       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
386       if (an_or != null && an_new != null)
387       {
388
389         if (isRna)
390         {
391           if (an_or.secondaryStructure != an_new.secondaryStructure
392                   || an_or.value != an_new.value)
393           {
394             fail("Different RNA secondary structure at column " + i
395                     + " expected: [" + annot_or.annotations[i].toString()
396                     + "] but got: [" + annot_new.annotations[i].toString()
397                     + "]");
398           }
399         }
400         else
401         {
402           // not RNA secondary structure, so expect all elements to match...
403           if (!an_or.displayCharacter.trim().equals(
404                   an_new.displayCharacter.trim())
405                   || !("" + an_or.secondaryStructure).trim().equals(
406                           ("" + an_new.secondaryStructure).trim())
407                   || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
408                           .trim().length() == 0)
409                           || (an_new.description == null && an_or.description
410                                   .trim().length() == 0) || an_or.description
411                           .trim().equals(an_new.description.trim()))))
412           {
413             fail("Annotation Element Mismatch\nElement " + i
414                     + " in original: " + annot_or.annotations[i].toString()
415                     + "\nElement " + i + " in new: "
416                     + annot_new.annotations[i].toString());
417           }
418         }
419       }
420       else if (annot_or.annotations[i] == null
421               && annot_new.annotations[i] == null)
422       {
423         continue;
424       }
425       else
426       {
427         fail("Annotation Element Mismatch\nElement "
428                 + i
429                 + " in original: "
430                 + (annot_or.annotations[i] == null ? "is null"
431                         : annot_or.annotations[i].toString())
432                 + "\nElement "
433                 + i
434                 + " in new: "
435                 + (annot_new.annotations[i] == null ? "is null"
436                         : annot_new.annotations[i].toString()));
437       }
438     }
439   }
440
441   String aliFile = ">Dm\nAAACCCUUUUACACACGGGAAAGGG";
442   String annFile = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
443           + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
444           + "(|(|(|(|, .|, .|, .|, .|)|)|)|)|\t0.0\nROWPROPERTIES\t"
445           + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
446
447   String annFileCurlyWuss = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
448           + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
449           + "(|(|(|(||{|{||{|{||)|)|)|)||}|}|}|}|\t0.0\nROWPROPERTIES\t"
450           + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
451   String annFileFullWuss = "JALVIEW_ANNOTATION\n# Created: Thu May 04 11:16:52 BST 2017\n\n"
452           + "SEQUENCE_REF\tDm\nNO_GRAPH\tsecondary structure\tsecondary structure\t"
453           + "(|(|(|(||{|{||[|[||)|)|)|)||}|}|]|]|\t0.0\nROWPROPERTIES\t"
454           + "secondary structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false";
455
456   @Test(groups = { "Functional" })
457   public void secondaryStructureForRNASequence() throws Exception
458   {
459     roundTripSSForRNA(aliFile, annFile);
460   }
461
462   @Test(groups = { "Functional" })
463   public void curlyWUSSsecondaryStructureForRNASequence() throws Exception
464   {
465     roundTripSSForRNA(aliFile, annFileCurlyWuss);
466   }
467
468   @Test(groups = { "Functional" })
469   public void fullWUSSsecondaryStructureForRNASequence() throws Exception
470   {
471     roundTripSSForRNA(aliFile, annFileFullWuss);
472   }
473
474   @Test(groups = { "Functional" })
475   public void detectWussBrackets()
476   {
477     for (char ch : new char[] { '{', '}', '[', ']', '(', ')', '<', '>' })
478     {
479       Assert.assertTrue(StockholmFile.DETECT_BRACKETS.matchAt("" + ch, 0),
480               "Didn't recognise " + ch + " as a WUSS bracket");
481     }
482     for (char ch : new char[] { '@', '!', 'V', 'Q', '*', ' ', '-', '.' })
483     {
484       Assert.assertFalse(StockholmFile.DETECT_BRACKETS.matchAt("" + ch, 0),
485               "Shouldn't recognise " + ch + " as a WUSS bracket");
486     }
487   }
488   private static void roundTripSSForRNA(String aliFile, String annFile)
489           throws Exception
490   {
491     AlignmentI al = new AppletFormatAdapter().readFile(aliFile,
492             DataSourceType.PASTE, jalview.io.FileFormat.Fasta);
493     AnnotationFile aaf = new AnnotationFile();
494     aaf.readAnnotationFile(al, annFile, DataSourceType.PASTE);
495     al.getAlignmentAnnotation()[0].visible = true;
496
497     // TODO: create a better 'save as <format>' pattern
498     StockholmFile sf = new StockholmFile(al);
499
500     String stockholmFile = sf.print(al.getSequencesArray(), true);
501
502     AlignmentI newAl = new AppletFormatAdapter().readFile(stockholmFile,
503             DataSourceType.PASTE, jalview.io.FileFormat.Stockholm);
504     // AlignmentUtils.showOrHideSequenceAnnotations(newAl.getViewport()
505     // .getAlignment(), Arrays.asList("Secondary Structure"), newAl
506     // .getViewport().getAlignment().getSequences(), true, true);
507     testAlignmentEquivalence(al, newAl, true, true);
508
509   }
510 }