JAL-2507 JAL-2509 allow differences in visibility of annotation rows to be ignored...
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26 import static org.testng.AssertJUnit.fail;
27
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.Annotation;
31 import jalview.datamodel.SequenceFeature;
32 import jalview.datamodel.SequenceI;
33 import jalview.gui.JvOptionPane;
34
35 import java.io.File;
36 import java.util.BitSet;
37 import java.util.HashMap;
38 import java.util.Map;
39
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class StockholmFileTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   static String PfamFile = "examples/PF00111_seed.stk",
54           RfamFile = "examples/RF00031_folded.stk";
55
56   @Test(groups = { "Functional" })
57   public void pfamFileIO() throws Exception
58   {
59     testFileIOwithFormat(new File(PfamFile), FileFormat.Stockholm, -1, 0,
60             false);
61   }
62
63   @Test(groups = { "Functional" })
64   public void pfamFileDataExtraction() throws Exception
65   {
66     AppletFormatAdapter af = new AppletFormatAdapter();
67     AlignmentI al = af.readFile(PfamFile, DataSourceType.FILE,
68             new IdentifyFile().identify(PfamFile, DataSourceType.FILE));
69     int numpdb = 0;
70     for (SequenceI sq : al.getSequences())
71     {
72       if (sq.getAllPDBEntries() != null)
73       {
74         numpdb += sq.getAllPDBEntries().size();
75       }
76     }
77     assertTrue(
78             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
79             numpdb > 0);
80   }
81
82   @Test(groups = { "Functional" })
83   public void rfamFileIO() throws Exception
84   {
85     testFileIOwithFormat(new File(RfamFile), FileFormat.Stockholm, 2, 1,
86             false);
87   }
88
89   /**
90    * test alignment data in given file can be imported, exported and reimported
91    * with no dataloss
92    * 
93    * @param f
94    *          - source datafile (IdentifyFile.identify() should work with it)
95    * @param ioformat
96    *          - label for IO class used to write and read back in the data from
97    *          f
98    * @param ignoreRowVisibility
99    */
100
101   public static void testFileIOwithFormat(File f, FileFormatI ioformat,
102           int naliannot, int nminseqann, boolean ignoreRowVisibility)
103   {
104     System.out.println("Reading file: " + f);
105     String ff = f.getPath();
106     try
107     {
108       AppletFormatAdapter rf = new AppletFormatAdapter();
109
110       AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
111               new IdentifyFile().identify(ff, DataSourceType.FILE));
112
113       assertNotNull("Couldn't read supplied alignment data.", al);
114
115       // make sure dataset is initialised ? not sure about this
116       for (int i = 0; i < al.getSequencesArray().length; ++i)
117       {
118         al.getSequenceAt(i).createDatasetSequence();
119       }
120       String outputfile = rf.formatSequences(ioformat, al, true);
121       System.out.println("Output file in '" + ioformat + "':\n"
122               + outputfile + "\n<<EOF\n");
123       // test for consistency in io
124       AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
125               DataSourceType.PASTE, ioformat);
126       assertNotNull("Couldn't parse reimported alignment data.", al_input);
127
128       FileFormatI identifyoutput = new IdentifyFile().identify(outputfile,
129               DataSourceType.PASTE);
130       assertNotNull("Identify routine failed for outputformat " + ioformat,
131               identifyoutput);
132       assertTrue(
133               "Identify routine could not recognise output generated by '"
134                       + ioformat + "' writer",
135               ioformat.equals(identifyoutput));
136       testAlignmentEquivalence(al, al_input, false);
137       int numaliannot = 0, numsqswithali = 0;
138       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
139       {
140         if (ala.sequenceRef == null)
141         {
142           numaliannot++;
143         }
144         else
145         {
146           numsqswithali++;
147         }
148       }
149       if (naliannot > -1)
150       {
151         assertEquals("Number of alignment annotations", naliannot,
152                 numaliannot);
153       }
154
155       assertTrue(
156               "Number of sequence associated annotations wasn't at least "
157                       + nminseqann, numsqswithali >= nminseqann);
158
159     } catch (Exception e)
160     {
161       e.printStackTrace();
162       assertTrue("Couln't format the alignment for output file.", false);
163     }
164   }
165
166   /**
167    * assert alignment equivalence
168    * 
169    * @param al
170    *          'original'
171    * @param al_input
172    *          'secondary' or generated alignment from some datapreserving
173    *          transformation
174    * @param ignoreFeatures
175    *          when true, differences in sequence feature annotation are ignored
176    */
177   public static void testAlignmentEquivalence(AlignmentI al,
178           AlignmentI al_input, boolean ignoreFeatures)
179   {
180   }
181
182   /**
183    * assert alignment equivalence
184    * 
185    * @param al
186    *          'original'
187    * @param al_input
188    *          'secondary' or generated alignment from some datapreserving
189    *          transformation
190    * @param ignoreFeatures
191    *          when true, differences in sequence feature annotation are ignored
192    * 
193    * @param ignoreRowVisibility
194    *          when true, do not fail if there are differences in the visibility
195    *          of annotation rows
196    */
197   public static void testAlignmentEquivalence(AlignmentI al,
198           AlignmentI al_input, boolean ignoreFeatures,
199           boolean ignoreRowVisibility)
200   {
201     assertNotNull("Original alignment was null", al);
202     assertNotNull("Generated alignment was null", al_input);
203
204     assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
205             + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
206             + "x" + al_input.getWidth(),
207             al.getHeight() == al_input.getHeight()
208                     && al.getWidth() == al_input.getWidth());
209
210     // check Alignment annotation
211     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
212     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
213
214     // note - at moment we do not distinguish between alignment without any
215     // annotation rows and alignment with no annotation row vector
216     // we might want to revise this in future
217     int aa_new_size = (aa_new == null ? 0 : aa_new.length);
218     int aa_original_size = (aa_original == null ? 0 : aa_original.length);
219     Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
220     Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
221
222     if (aa_new != null && aa_original != null)
223     {
224       for (int i = 0; i < aa_original.length; i++)
225       {
226         if (aa_new.length > i)
227         {
228           assertEqualSecondaryStructure(
229                   "Different alignment annotation at position " + i,
230                   aa_original[i], aa_new[i]);
231           // compare graphGroup or graph properties - needed to verify JAL-1299
232           assertEquals("Graph type not identical.", aa_original[i].graph,
233                   aa_new[i].graph);
234           if (!ignoreRowVisibility)
235           {
236             assertEquals("Visibility not identical.",
237                     aa_original[i].visible,
238                   aa_new[i].visible);
239           }
240           assertEquals("Threshold line not identical.",
241                   aa_original[i].threshold, aa_new[i].threshold);
242           // graphGroup may differ, but pattern should be the same
243           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
244           Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
245           BitSet orig_g = orig_groups.get(o_ggrp);
246           BitSet new_g = new_groups.get(n_ggrp);
247           if (orig_g == null)
248           {
249             orig_groups.put(o_ggrp, orig_g = new BitSet());
250           }
251           if (new_g == null)
252           {
253             new_groups.put(n_ggrp, new_g = new BitSet());
254           }
255           assertEquals("Graph Group pattern differs at annotation " + i,
256                   orig_g, new_g);
257           orig_g.set(i);
258           new_g.set(i);
259         }
260         else
261         {
262           System.err.println("No matching annotation row for "
263                   + aa_original[i].toString());
264         }
265       }
266     }
267     assertEquals(
268             "Generated and imported alignment have different annotation sets",
269             aa_original_size, aa_new_size);
270
271     // check sequences, annotation and features
272     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
273     seq_original = al.getSequencesArray();
274     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
275     seq_new = al_input.getSequencesArray();
276     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
277     AlignmentAnnotation annot_original, annot_new;
278     //
279     for (int i = 0; i < al.getSequencesArray().length; i++)
280     {
281       String name = seq_original[i].getName();
282       int start = seq_original[i].getStart();
283       int end = seq_original[i].getEnd();
284       System.out.println("Check sequence: " + name + "/" + start + "-"
285               + end);
286
287       // search equal sequence
288       for (int in = 0; in < al_input.getSequencesArray().length; in++)
289       {
290         if (name.equals(seq_new[in].getName())
291                 && start == seq_new[in].getStart()
292                 && end == seq_new[in].getEnd())
293         {
294           String ss_original = seq_original[i].getSequenceAsString();
295           String ss_new = seq_new[in].getSequenceAsString();
296           assertEquals("The sequences " + name + "/" + start + "-" + end
297                   + " are not equal", ss_original, ss_new);
298
299           assertTrue(
300                   "Sequence Features were not equivalent"
301                           + (ignoreFeatures ? " ignoring." : ""),
302                   ignoreFeatures
303                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
304                                   .getSequenceFeatures() == null)
305                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
306                                   .getSequenceFeatures() != null));
307           // compare sequence features
308           if (seq_original[i].getSequenceFeatures() != null
309                   && seq_new[in].getSequenceFeatures() != null)
310           {
311             System.out.println("There are feature!!!");
312             sequenceFeatures_original = new SequenceFeature[seq_original[i]
313                     .getSequenceFeatures().length];
314             sequenceFeatures_original = seq_original[i]
315                     .getSequenceFeatures();
316             sequenceFeatures_new = new SequenceFeature[seq_new[in]
317                     .getSequenceFeatures().length];
318             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
319
320             assertEquals("different number of features",
321                     seq_original[i].getSequenceFeatures().length,
322                     seq_new[in].getSequenceFeatures().length);
323
324             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
325             {
326               assertEquals("Different features",
327                       sequenceFeatures_original[feat],
328                       sequenceFeatures_new[feat]);
329             }
330           }
331           // compare alignment annotation
332           if (al.getSequenceAt(i).getAnnotation() != null
333                   && al_input.getSequenceAt(in).getAnnotation() != null)
334           {
335             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
336             {
337               if (al.getSequenceAt(i).getAnnotation()[j] != null
338                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
339               {
340                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
341                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
342                 assertEqualSecondaryStructure(
343                         "Different annotation elements", annot_original,
344                         annot_new);
345               }
346             }
347           }
348           else if (al.getSequenceAt(i).getAnnotation() == null
349                   && al_input.getSequenceAt(in).getAnnotation() == null)
350           {
351             System.out.println("No annotations");
352           }
353           else if (al.getSequenceAt(i).getAnnotation() != null
354                   && al_input.getSequenceAt(in).getAnnotation() == null)
355           {
356             fail("Annotations differed between sequences ("
357                     + al.getSequenceAt(i).getName() + ") and ("
358                     + al_input.getSequenceAt(i).getName() + ")");
359           }
360           break;
361         }
362       }
363     }
364   }
365
366   private static void assertEqualSecondaryStructure(String message,
367           AlignmentAnnotation annot_or,
368           AlignmentAnnotation annot_new)
369   {
370     // TODO: test to cover this assert behaves correctly for all allowed
371     // variations of secondary structure annotation row equivalence
372     if (annot_or.annotations.length != annot_new.annotations.length)
373     {
374       fail("Different lengths for annotation row elements: "
375               + annot_or.annotations.length + "!="
376               + annot_new.annotations.length);
377     }
378     boolean isRna = annot_or.isRNA();
379     assertTrue("Expected " + (isRna ? " valid RNA " : " no RNA ")
380             + " secondary structure in the row.",
381             isRna == annot_new.isRNA());
382     for (int i = 0; i < annot_or.annotations.length; i++)
383     {
384       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
385       if (an_or != null && an_new != null)
386       {
387
388         if (isRna)
389         {
390           if (an_or.secondaryStructure != an_new.secondaryStructure
391                   || an_or.value != an_new.value)
392           {
393             fail("Different RNA secondary structure at column " + i
394                     + " expected: [" + annot_or.annotations[i].toString()
395                     + "] but got: [" + annot_new.annotations[i].toString()
396                     + "]");
397           }
398         }
399         else
400         {
401           // not RNA secondary structure, so expect all elements to match...
402           if (!an_or.displayCharacter.trim().equals(
403                   an_new.displayCharacter.trim())
404                   || !("" + an_or.secondaryStructure).trim().equals(
405                           ("" + an_new.secondaryStructure).trim())
406                   || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
407                           .trim().length() == 0)
408                           || (an_new.description == null && an_or.description
409                                   .trim().length() == 0) || an_or.description
410                           .trim().equals(an_new.description.trim()))))
411           {
412             fail("Annotation Element Mismatch\nElement " + i
413                     + " in original: " + annot_or.annotations[i].toString()
414                     + "\nElement " + i + " in new: "
415                     + annot_new.annotations[i].toString());
416           }
417         }
418       }
419       else if (annot_or.annotations[i] == null
420               && annot_new.annotations[i] == null)
421       {
422         continue;
423       }
424       else
425       {
426         fail("Annotation Element Mismatch\nElement "
427                 + i
428                 + " in original: "
429                 + (annot_or.annotations[i] == null ? "is null"
430                         : annot_or.annotations[i].toString())
431                 + "\nElement "
432                 + i
433                 + " in new: "
434                 + (annot_new.annotations[i] == null ? "is null"
435                         : annot_new.annotations[i].toString()));
436       }
437     }
438   }
439 }