JAL-1270 test suits import order refactor
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.datamodel.Alignment;
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.Annotation;
31 import jalview.datamodel.SequenceFeature;
32 import jalview.datamodel.SequenceI;
33
34 import java.io.File;
35 import java.util.BitSet;
36 import java.util.HashMap;
37 import java.util.Map;
38
39 import org.testng.annotations.Test;
40
41 public class StockholmFileTest
42 {
43
44   static String PfamFile = "examples/PF00111_seed.stk",
45           RfamFile = "examples/RF00031_folded.stk";
46
47   @Test
48   public void pfamFileIO() throws Exception
49   {
50     testFileIOwithFormat(new File(PfamFile), "STH", -1, 0);
51   }
52
53   @Test
54   public void pfamFileDataExtraction() throws Exception
55   {
56     AppletFormatAdapter af = new AppletFormatAdapter();
57     AlignmentI al = af.readFile(PfamFile, af.FILE,
58             new IdentifyFile().Identify(PfamFile, af.FILE));
59     int numpdb = 0;
60     for (SequenceI sq : al.getSequences())
61     {
62       if (sq.getPDBId() != null)
63       {
64         numpdb += sq.getPDBId().size();
65       }
66     }
67     assertTrue(
68             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
69             numpdb > 0);
70   }
71
72   @Test
73   public void rfamFileIO() throws Exception
74   {
75     testFileIOwithFormat(new File(RfamFile), "STH", 2, 1);
76   }
77
78   /**
79    * test alignment data in given file can be imported, exported and reimported
80    * with no dataloss
81    * 
82    * @param f
83    *          - source datafile (IdentifyFile.identify() should work with it)
84    * @param ioformat
85    *          - label for IO class used to write and read back in the data from
86    *          f
87    */
88
89   public static void testFileIOwithFormat(File f, String ioformat,
90           int naliannot, int nminseqann)
91   {
92     System.out.println("Reading file: " + f);
93     String ff = f.getPath();
94     try
95     {
96       AppletFormatAdapter rf = new AppletFormatAdapter();
97
98       Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
99               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
100
101       assertNotNull("Couldn't read supplied alignment data.", al);
102
103       // make sure dataset is initialised ? not sure about this
104       for (int i = 0; i < al.getSequencesArray().length; ++i)
105       {
106         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
107       }
108       String outputfile = rf.formatSequences(ioformat, al, true);
109       System.out.println("Output file in '" + ioformat + "':\n"
110               + outputfile + "\n<<EOF\n");
111       // test for consistency in io
112       Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
113               AppletFormatAdapter.PASTE, ioformat);
114       assertNotNull("Couldn't parse reimported alignment data.", al_input);
115
116       String identifyoutput = new IdentifyFile().Identify(outputfile,
117               AppletFormatAdapter.PASTE);
118       assertNotNull("Identify routine failed for outputformat " + ioformat,
119               identifyoutput);
120       assertTrue(
121               "Identify routine could not recognise output generated by '"
122                       + ioformat + "' writer",
123               ioformat.equals(identifyoutput));
124       testAlignmentEquivalence(al, al_input, false);
125       int numaliannot = 0, numsqswithali = 0;
126       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
127       {
128         if (ala.sequenceRef == null)
129         {
130           numaliannot++;
131         }
132         else
133         {
134           numsqswithali++;
135         }
136       }
137       if (naliannot > -1)
138       {
139         assertEquals("Number of alignment annotations", naliannot,
140               numaliannot);
141       }
142
143       assertTrue(
144               "Number of sequence associated annotations wasn't at least "
145                       + nminseqann, numsqswithali >= nminseqann);
146
147     } catch (Exception e)
148     {
149       e.printStackTrace();
150       assertTrue("Couln't format the alignment for output file.", false);
151     }
152   }
153
154   /**
155    * assert alignment equivalence
156    * 
157    * @param al
158    *          'original'
159    * @param al_input
160    *          'secondary' or generated alignment from some datapreserving
161    *          transformation
162    * @param ignoreFeatures
163    *          when true, differences in seuqence feature annotation are ignored.
164    */
165   public static void testAlignmentEquivalence(AlignmentI al,
166           AlignmentI al_input, boolean ignoreFeatures)
167   {
168     assertNotNull("Original alignment was null", al);
169     assertNotNull("Generated alignment was null", al_input);
170
171     assertTrue(
172             "Alignment dimension mismatch: originl contains "
173                     + al.getHeight() + " and generated has "
174                     + al_input.getHeight() + " sequences; original has "
175                     + al.getWidth() + " and generated has "
176                     + al_input.getWidth() + " columns.",
177             al.getHeight() == al_input.getHeight()
178                     && al.getWidth() == al_input.getWidth());
179
180     // check Alignment annotation
181     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
182     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
183
184     // note - at moment we do not distinguish between alignment without any
185     // annotation rows and alignment with no annotation row vector
186     // we might want to revise this in future
187     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
188             : aa_original.length);
189     Map<Integer, java.util.BitSet> orig_groups = new HashMap<Integer, java.util.BitSet>(), new_groups = new HashMap<Integer, java.util.BitSet>();
190
191     if (aa_new != null && aa_original != null)
192     {
193       for (int i = 0; i < aa_original.length; i++)
194       {
195         if (aa_new.length > i)
196         {
197           assertTrue("Different alignment annotation at position " + i,
198                   equalss(aa_original[i], aa_new[i]));
199           // compare graphGroup or graph properties - needed to verify JAL-1299
200           assertTrue("Graph type not identical.",
201                   aa_original[i].graph == aa_new[i].graph);
202           assertTrue("Visibility not identical.",
203                   aa_original[i].visible == aa_new[i].visible);
204           assertTrue(
205                   "Threshold line not identical.",
206                   aa_original[i].threshold == null ? aa_new[i].threshold == null
207                           : aa_original[i].threshold
208                                   .equals(aa_new[i].threshold));
209           // graphGroup may differ, but pattern should be the same
210           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer(
211                   aa_new[i].graphGroup + 2);
212           BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups
213                   .get(n_ggrp);
214           if (orig_g == null)
215           {
216             orig_groups.put(o_ggrp, orig_g = new BitSet());
217           }
218           if (new_g == null)
219           {
220             new_groups.put(n_ggrp, new_g = new BitSet());
221           }
222           assertTrue("Graph Group pattern differs at annotation " + i,
223                   orig_g.equals(new_g));
224           orig_g.set(i);
225           new_g.set(i);
226         }
227         else
228         {
229           System.err.println("No matching annotation row for "
230                   + aa_original[i].toString());
231         }
232       }
233     }
234     assertTrue(
235             "Generated and imported alignment have different annotation sets ("
236                     + aa_new_size + " != " + aa_original_size + ")",
237             aa_new_size == aa_original_size);
238
239     // check sequences, annotation and features
240     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
241     seq_original = al.getSequencesArray();
242     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
243     seq_new = al_input.getSequencesArray();
244     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
245     AlignmentAnnotation annot_original, annot_new;
246     //
247     for (int i = 0; i < al.getSequencesArray().length; i++)
248     {
249       String name = seq_original[i].getName();
250       int start = seq_original[i].getStart();
251       int end = seq_original[i].getEnd();
252       System.out.println("Check sequence: " + name + "/" + start + "-"
253               + end);
254
255       // search equal sequence
256       for (int in = 0; in < al_input.getSequencesArray().length; in++)
257       {
258         if (name.equals(seq_new[in].getName())
259                 && start == seq_new[in].getStart()
260                 && end == seq_new[in].getEnd())
261         {
262           String ss_original = seq_original[i].getSequenceAsString();
263           String ss_new = seq_new[in].getSequenceAsString();
264           assertTrue("The sequences " + name + "/" + start + "-" + end
265                   + " are not equal", ss_original.equals(ss_new));
266
267           assertTrue(
268                   "Sequence Features were not equivalent"
269                           + (ignoreFeatures ? " ignoring." : ""),
270                   ignoreFeatures
271                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
272                           .getSequenceFeatures() == null)
273                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
274                                   .getSequenceFeatures() != null));
275           // compare sequence features
276           if (seq_original[i].getSequenceFeatures() != null
277                   && seq_new[in].getSequenceFeatures() != null)
278           {
279             System.out.println("There are feature!!!");
280             sequenceFeatures_original = new SequenceFeature[seq_original[i]
281                     .getSequenceFeatures().length];
282             sequenceFeatures_original = seq_original[i]
283                     .getSequenceFeatures();
284             sequenceFeatures_new = new SequenceFeature[seq_new[in]
285                     .getSequenceFeatures().length];
286             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
287
288             assertTrue("different number of features", seq_original[i]
289                     .getSequenceFeatures().length == seq_new[in]
290                     .getSequenceFeatures().length);
291
292             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
293             {
294               assertTrue("Different features",
295                       sequenceFeatures_original[feat]
296                               .equals(sequenceFeatures_new[feat]));
297             }
298           }
299           // compare alignment annotation
300           if (al.getSequenceAt(i).getAnnotation() != null
301                   && al_input.getSequenceAt(in).getAnnotation() != null)
302           {
303             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
304             {
305               if (al.getSequenceAt(i).getAnnotation()[j] != null
306                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
307               {
308                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
309                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
310                 assertTrue("Different annotation elements",
311                         equalss(annot_original, annot_new));
312               }
313             }
314           }
315           else if (al.getSequenceAt(i).getAnnotation() == null
316                   && al_input.getSequenceAt(in).getAnnotation() == null)
317           {
318             System.out.println("No annotations");
319           }
320           else if (al.getSequenceAt(i).getAnnotation() != null
321                   && al_input.getSequenceAt(in).getAnnotation() == null)
322           {
323             assertTrue("Annotations differed between sequences ("
324                     + al.getSequenceAt(i).getName() + ") and ("
325                     + al_input.getSequenceAt(i).getName() + ")", false);
326           }
327           break;
328         }
329       }
330     }
331   }
332
333   /*
334    * compare annotations
335    */
336   private static boolean equalss(AlignmentAnnotation annot_or,
337           AlignmentAnnotation annot_new)
338   {
339     if (annot_or.annotations.length != annot_new.annotations.length)
340     {
341       System.err.println("Different lengths for annotation row elements: "
342               + annot_or.annotations.length + "!="
343               + annot_new.annotations.length);
344       return false;
345     }
346     for (int i = 0; i < annot_or.annotations.length; i++)
347     {
348       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
349       if (an_or != null && an_new != null)
350       {
351         if (!an_or.displayCharacter.trim().equals(
352                 an_new.displayCharacter.trim())
353                 || !("" + an_or.secondaryStructure).trim().equals(
354                         ("" + an_new.secondaryStructure).trim())
355                 || (an_or.description != an_new.description && (an_or.description == null
356                         || an_new.description == null || !an_or.description
357                           .equals(an_new.description))))
358         {
359           System.err.println("Annotation Element Mismatch\nElement " + i
360                   + " in original: " + annot_or.annotations[i].toString()
361                   + "\nElement " + i + " in new: "
362                   + annot_new.annotations[i].toString());
363           return false;
364         }
365       }
366       else if (annot_or.annotations[i] == null
367               && annot_new.annotations[i] == null)
368       {
369         continue;
370       }
371       else
372       {
373         System.err.println("Annotation Element Mismatch\nElement "
374                 + i
375                 + " in original: "
376                 + (annot_or.annotations[i] == null ? "is null"
377                         : annot_or.annotations[i].toString())
378                 + "\nElement "
379                 + i
380                 + " in new: "
381                 + (annot_new.annotations[i] == null ? "is null"
382                         : annot_new.annotations[i].toString()));
383         return false;
384       }
385     }
386     return true;
387   }
388 }