JAL-2507 JAL-2509 new assert to compare secondary structure in two annotation rows
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26 import static org.testng.AssertJUnit.fail;
27
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.Annotation;
31 import jalview.datamodel.SequenceFeature;
32 import jalview.datamodel.SequenceI;
33 import jalview.gui.JvOptionPane;
34
35 import java.io.File;
36 import java.util.BitSet;
37 import java.util.HashMap;
38 import java.util.Map;
39
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class StockholmFileTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   static String PfamFile = "examples/PF00111_seed.stk",
54           RfamFile = "examples/RF00031_folded.stk";
55
56   @Test(groups = { "Functional" })
57   public void pfamFileIO() throws Exception
58   {
59     testFileIOwithFormat(new File(PfamFile), FileFormat.Stockholm, -1, 0);
60   }
61
62   @Test(groups = { "Functional" })
63   public void pfamFileDataExtraction() throws Exception
64   {
65     AppletFormatAdapter af = new AppletFormatAdapter();
66     AlignmentI al = af.readFile(PfamFile, DataSourceType.FILE,
67             new IdentifyFile().identify(PfamFile, DataSourceType.FILE));
68     int numpdb = 0;
69     for (SequenceI sq : al.getSequences())
70     {
71       if (sq.getAllPDBEntries() != null)
72       {
73         numpdb += sq.getAllPDBEntries().size();
74       }
75     }
76     assertTrue(
77             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
78             numpdb > 0);
79   }
80
81   @Test(groups = { "Functional" })
82   public void rfamFileIO() throws Exception
83   {
84     testFileIOwithFormat(new File(RfamFile), FileFormat.Stockholm, 2, 1);
85   }
86
87   /**
88    * test alignment data in given file can be imported, exported and reimported
89    * with no dataloss
90    * 
91    * @param f
92    *          - source datafile (IdentifyFile.identify() should work with it)
93    * @param ioformat
94    *          - label for IO class used to write and read back in the data from
95    *          f
96    */
97
98   public static void testFileIOwithFormat(File f, FileFormatI ioformat,
99           int naliannot, int nminseqann)
100   {
101     System.out.println("Reading file: " + f);
102     String ff = f.getPath();
103     try
104     {
105       AppletFormatAdapter rf = new AppletFormatAdapter();
106
107       AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
108               new IdentifyFile().identify(ff, DataSourceType.FILE));
109
110       assertNotNull("Couldn't read supplied alignment data.", al);
111
112       // make sure dataset is initialised ? not sure about this
113       for (int i = 0; i < al.getSequencesArray().length; ++i)
114       {
115         al.getSequenceAt(i).createDatasetSequence();
116       }
117       String outputfile = rf.formatSequences(ioformat, al, true);
118       System.out.println("Output file in '" + ioformat + "':\n"
119               + outputfile + "\n<<EOF\n");
120       // test for consistency in io
121       AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
122               DataSourceType.PASTE, ioformat);
123       assertNotNull("Couldn't parse reimported alignment data.", al_input);
124
125       FileFormatI identifyoutput = new IdentifyFile().identify(outputfile,
126               DataSourceType.PASTE);
127       assertNotNull("Identify routine failed for outputformat " + ioformat,
128               identifyoutput);
129       assertTrue(
130               "Identify routine could not recognise output generated by '"
131                       + ioformat + "' writer",
132               ioformat.equals(identifyoutput));
133       testAlignmentEquivalence(al, al_input, false);
134       int numaliannot = 0, numsqswithali = 0;
135       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
136       {
137         if (ala.sequenceRef == null)
138         {
139           numaliannot++;
140         }
141         else
142         {
143           numsqswithali++;
144         }
145       }
146       if (naliannot > -1)
147       {
148         assertEquals("Number of alignment annotations", naliannot,
149                 numaliannot);
150       }
151
152       assertTrue(
153               "Number of sequence associated annotations wasn't at least "
154                       + nminseqann, numsqswithali >= nminseqann);
155
156     } catch (Exception e)
157     {
158       e.printStackTrace();
159       assertTrue("Couln't format the alignment for output file.", false);
160     }
161   }
162
163   /**
164    * assert alignment equivalence
165    * 
166    * @param al
167    *          'original'
168    * @param al_input
169    *          'secondary' or generated alignment from some datapreserving
170    *          transformation
171    * @param ignoreFeatures
172    *          when true, differences in sequence feature annotation are ignored
173    */
174   public static void testAlignmentEquivalence(AlignmentI al,
175           AlignmentI al_input, boolean ignoreFeatures)
176   {
177     assertNotNull("Original alignment was null", al);
178     assertNotNull("Generated alignment was null", al_input);
179
180     assertTrue("Alignment dimension mismatch: original: " + al.getHeight()
181             + "x" + al.getWidth() + ", generated: " + al_input.getHeight()
182             + "x" + al_input.getWidth(),
183             al.getHeight() == al_input.getHeight()
184                     && al.getWidth() == al_input.getWidth());
185
186     // check Alignment annotation
187     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
188     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
189
190     // note - at moment we do not distinguish between alignment without any
191     // annotation rows and alignment with no annotation row vector
192     // we might want to revise this in future
193     int aa_new_size = (aa_new == null ? 0 : aa_new.length);
194     int aa_original_size = (aa_original == null ? 0 : aa_original.length);
195     Map<Integer, BitSet> orig_groups = new HashMap<Integer, BitSet>();
196     Map<Integer, BitSet> new_groups = new HashMap<Integer, BitSet>();
197
198     if (aa_new != null && aa_original != null)
199     {
200       for (int i = 0; i < aa_original.length; i++)
201       {
202         if (aa_new.length > i)
203         {
204           assertEqualSecondaryStructure(
205                   "Different alignment annotation at position " + i,
206                   aa_original[i], aa_new[i]);
207           // compare graphGroup or graph properties - needed to verify JAL-1299
208           assertEquals("Graph type not identical.", aa_original[i].graph,
209                   aa_new[i].graph);
210           assertEquals("Visibility not identical.", aa_original[i].visible,
211                   aa_new[i].visible);
212           assertEquals("Threshold line not identical.",
213                   aa_original[i].threshold, aa_new[i].threshold);
214           // graphGroup may differ, but pattern should be the same
215           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
216           Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
217           BitSet orig_g = orig_groups.get(o_ggrp);
218           BitSet new_g = new_groups.get(n_ggrp);
219           if (orig_g == null)
220           {
221             orig_groups.put(o_ggrp, orig_g = new BitSet());
222           }
223           if (new_g == null)
224           {
225             new_groups.put(n_ggrp, new_g = new BitSet());
226           }
227           assertEquals("Graph Group pattern differs at annotation " + i,
228                   orig_g, new_g);
229           orig_g.set(i);
230           new_g.set(i);
231         }
232         else
233         {
234           System.err.println("No matching annotation row for "
235                   + aa_original[i].toString());
236         }
237       }
238     }
239     assertEquals(
240             "Generated and imported alignment have different annotation sets",
241             aa_original_size, aa_new_size);
242
243     // check sequences, annotation and features
244     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
245     seq_original = al.getSequencesArray();
246     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
247     seq_new = al_input.getSequencesArray();
248     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
249     AlignmentAnnotation annot_original, annot_new;
250     //
251     for (int i = 0; i < al.getSequencesArray().length; i++)
252     {
253       String name = seq_original[i].getName();
254       int start = seq_original[i].getStart();
255       int end = seq_original[i].getEnd();
256       System.out.println("Check sequence: " + name + "/" + start + "-"
257               + end);
258
259       // search equal sequence
260       for (int in = 0; in < al_input.getSequencesArray().length; in++)
261       {
262         if (name.equals(seq_new[in].getName())
263                 && start == seq_new[in].getStart()
264                 && end == seq_new[in].getEnd())
265         {
266           String ss_original = seq_original[i].getSequenceAsString();
267           String ss_new = seq_new[in].getSequenceAsString();
268           assertEquals("The sequences " + name + "/" + start + "-" + end
269                   + " are not equal", ss_original, ss_new);
270
271           assertTrue(
272                   "Sequence Features were not equivalent"
273                           + (ignoreFeatures ? " ignoring." : ""),
274                   ignoreFeatures
275                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
276                                   .getSequenceFeatures() == null)
277                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
278                                   .getSequenceFeatures() != null));
279           // compare sequence features
280           if (seq_original[i].getSequenceFeatures() != null
281                   && seq_new[in].getSequenceFeatures() != null)
282           {
283             System.out.println("There are feature!!!");
284             sequenceFeatures_original = new SequenceFeature[seq_original[i]
285                     .getSequenceFeatures().length];
286             sequenceFeatures_original = seq_original[i]
287                     .getSequenceFeatures();
288             sequenceFeatures_new = new SequenceFeature[seq_new[in]
289                     .getSequenceFeatures().length];
290             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
291
292             assertEquals("different number of features",
293                     seq_original[i].getSequenceFeatures().length,
294                     seq_new[in].getSequenceFeatures().length);
295
296             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
297             {
298               assertEquals("Different features",
299                       sequenceFeatures_original[feat],
300                       sequenceFeatures_new[feat]);
301             }
302           }
303           // compare alignment annotation
304           if (al.getSequenceAt(i).getAnnotation() != null
305                   && al_input.getSequenceAt(in).getAnnotation() != null)
306           {
307             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
308             {
309               if (al.getSequenceAt(i).getAnnotation()[j] != null
310                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
311               {
312                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
313                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
314                 assertEqualSecondaryStructure(
315                         "Different annotation elements", annot_original,
316                         annot_new);
317               }
318             }
319           }
320           else if (al.getSequenceAt(i).getAnnotation() == null
321                   && al_input.getSequenceAt(in).getAnnotation() == null)
322           {
323             System.out.println("No annotations");
324           }
325           else if (al.getSequenceAt(i).getAnnotation() != null
326                   && al_input.getSequenceAt(in).getAnnotation() == null)
327           {
328             fail("Annotations differed between sequences ("
329                     + al.getSequenceAt(i).getName() + ") and ("
330                     + al_input.getSequenceAt(i).getName() + ")");
331           }
332           break;
333         }
334       }
335     }
336   }
337
338   private static void assertEqualSecondaryStructure(String message,
339           AlignmentAnnotation annot_or,
340           AlignmentAnnotation annot_new)
341   {
342     // TODO: test to cover this assert behaves correctly for all allowed
343     // variations of secondary structure annotation row equivalence
344     if (annot_or.annotations.length != annot_new.annotations.length)
345     {
346       fail("Different lengths for annotation row elements: "
347               + annot_or.annotations.length + "!="
348               + annot_new.annotations.length);
349     }
350     boolean isRna = annot_or.isRNA();
351     assertTrue("Expected " + (isRna ? " valid RNA " : " no RNA ")
352             + " secondary structure in the row.",
353             isRna == annot_new.isRNA());
354     for (int i = 0; i < annot_or.annotations.length; i++)
355     {
356       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
357       if (an_or != null && an_new != null)
358       {
359
360         if (isRna)
361         {
362           if (an_or.secondaryStructure != an_new.secondaryStructure
363                   || an_or.value != an_new.value)
364           {
365             fail("Different RNA secondary structure at column " + i
366                     + " expected: [" + annot_or.annotations[i].toString()
367                     + "] but got: [" + annot_new.annotations[i].toString()
368                     + "]");
369           }
370         }
371         else
372         {
373           // not RNA secondary structure, so expect all elements to match...
374           if (!an_or.displayCharacter.trim().equals(
375                   an_new.displayCharacter.trim())
376                   || !("" + an_or.secondaryStructure).trim().equals(
377                           ("" + an_new.secondaryStructure).trim())
378                   || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
379                           .trim().length() == 0)
380                           || (an_new.description == null && an_or.description
381                                   .trim().length() == 0) || an_or.description
382                           .trim().equals(an_new.description.trim()))))
383           {
384             fail("Annotation Element Mismatch\nElement " + i
385                     + " in original: " + annot_or.annotations[i].toString()
386                     + "\nElement " + i + " in new: "
387                     + annot_new.annotations[i].toString());
388           }
389         }
390       }
391       else if (annot_or.annotations[i] == null
392               && annot_new.annotations[i] == null)
393       {
394         continue;
395       }
396       else
397       {
398         fail("Annotation Element Mismatch\nElement "
399                 + i
400                 + " in original: "
401                 + (annot_or.annotations[i] == null ? "is null"
402                         : annot_or.annotations[i].toString())
403                 + "\nElement "
404                 + i
405                 + " in new: "
406                 + (annot_new.annotations[i] == null ? "is null"
407                         : annot_new.annotations[i].toString()));
408       }
409     }
410   }
411 }