JAL-1645 Version-Rel Version 2.9 Year-Rel 2015 Licensing glob
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9)
3  * Copyright (C) 2015 The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNotNull;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.Annotation;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceI;
32
33 import java.io.File;
34 import java.util.BitSet;
35 import java.util.HashMap;
36 import java.util.Map;
37
38 import org.testng.annotations.Test;
39
40 public class StockholmFileTest
41 {
42
43   static String PfamFile = "examples/PF00111_seed.stk",
44           RfamFile = "examples/RF00031_folded.stk";
45
46   @Test(groups = { "Functional" })
47   public void pfamFileIO() throws Exception
48   {
49     testFileIOwithFormat(new File(PfamFile), "STH", -1, 0);
50   }
51
52   @Test(groups = { "Functional" })
53   public void pfamFileDataExtraction() throws Exception
54   {
55     AppletFormatAdapter af = new AppletFormatAdapter();
56     AlignmentI al = af.readFile(PfamFile, af.FILE,
57             new IdentifyFile().Identify(PfamFile, af.FILE));
58     int numpdb = 0;
59     for (SequenceI sq : al.getSequences())
60     {
61       if (sq.getAllPDBEntries() != null)
62       {
63         numpdb += sq.getAllPDBEntries().size();
64       }
65     }
66     assertTrue(
67             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
68             numpdb > 0);
69   }
70
71   @Test(groups = { "Functional" })
72   public void rfamFileIO() throws Exception
73   {
74     testFileIOwithFormat(new File(RfamFile), "STH", 2, 1);
75   }
76
77   /**
78    * test alignment data in given file can be imported, exported and reimported
79    * with no dataloss
80    * 
81    * @param f
82    *          - source datafile (IdentifyFile.identify() should work with it)
83    * @param ioformat
84    *          - label for IO class used to write and read back in the data from
85    *          f
86    */
87
88   public static void testFileIOwithFormat(File f, String ioformat,
89           int naliannot, int nminseqann)
90   {
91     System.out.println("Reading file: " + f);
92     String ff = f.getPath();
93     try
94     {
95       AppletFormatAdapter rf = new AppletFormatAdapter();
96
97       AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
98               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
99
100       assertNotNull("Couldn't read supplied alignment data.", al);
101
102       // make sure dataset is initialised ? not sure about this
103       for (int i = 0; i < al.getSequencesArray().length; ++i)
104       {
105         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
106       }
107       String outputfile = rf.formatSequences(ioformat, al, true);
108       System.out.println("Output file in '" + ioformat + "':\n"
109               + outputfile + "\n<<EOF\n");
110       // test for consistency in io
111       AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
112               AppletFormatAdapter.PASTE, ioformat);
113       assertNotNull("Couldn't parse reimported alignment data.", al_input);
114
115       String identifyoutput = new IdentifyFile().Identify(outputfile,
116               AppletFormatAdapter.PASTE);
117       assertNotNull("Identify routine failed for outputformat " + ioformat,
118               identifyoutput);
119       assertTrue(
120               "Identify routine could not recognise output generated by '"
121                       + ioformat + "' writer",
122               ioformat.equals(identifyoutput));
123       testAlignmentEquivalence(al, al_input, false);
124       int numaliannot = 0, numsqswithali = 0;
125       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
126       {
127         if (ala.sequenceRef == null)
128         {
129           numaliannot++;
130         }
131         else
132         {
133           numsqswithali++;
134         }
135       }
136       if (naliannot > -1)
137       {
138         assertEquals("Number of alignment annotations", naliannot,
139                 numaliannot);
140       }
141
142       assertTrue(
143               "Number of sequence associated annotations wasn't at least "
144                       + nminseqann, numsqswithali >= nminseqann);
145
146     } catch (Exception e)
147     {
148       e.printStackTrace();
149       assertTrue("Couln't format the alignment for output file.", false);
150     }
151   }
152
153   /**
154    * assert alignment equivalence
155    * 
156    * @param al
157    *          'original'
158    * @param al_input
159    *          'secondary' or generated alignment from some datapreserving
160    *          transformation
161    * @param ignoreFeatures
162    *          when true, differences in seuqence feature annotation are ignored.
163    */
164   public static void testAlignmentEquivalence(AlignmentI al,
165           AlignmentI al_input, boolean ignoreFeatures)
166   {
167     assertNotNull("Original alignment was null", al);
168     assertNotNull("Generated alignment was null", al_input);
169
170     assertTrue(
171             "Alignment dimension mismatch: originl contains "
172                     + al.getHeight() + " and generated has "
173                     + al_input.getHeight() + " sequences; original has "
174                     + al.getWidth() + " and generated has "
175                     + al_input.getWidth() + " columns.",
176             al.getHeight() == al_input.getHeight()
177                     && al.getWidth() == al_input.getWidth());
178
179     // check Alignment annotation
180     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
181     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
182
183     // note - at moment we do not distinguish between alignment without any
184     // annotation rows and alignment with no annotation row vector
185     // we might want to revise this in future
186     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
187             : aa_original.length);
188     Map<Integer, java.util.BitSet> orig_groups = new HashMap<Integer, java.util.BitSet>(), new_groups = new HashMap<Integer, java.util.BitSet>();
189
190     if (aa_new != null && aa_original != null)
191     {
192       for (int i = 0; i < aa_original.length; i++)
193       {
194         if (aa_new.length > i)
195         {
196           assertTrue("Different alignment annotation at position " + i,
197                   equalss(aa_original[i], aa_new[i]));
198           // compare graphGroup or graph properties - needed to verify JAL-1299
199           assertTrue("Graph type not identical.",
200                   aa_original[i].graph == aa_new[i].graph);
201           assertTrue("Visibility not identical.",
202                   aa_original[i].visible == aa_new[i].visible);
203           assertTrue(
204                   "Threshold line not identical.",
205                   aa_original[i].threshold == null ? aa_new[i].threshold == null
206                           : aa_original[i].threshold
207                                   .equals(aa_new[i].threshold));
208           // graphGroup may differ, but pattern should be the same
209           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer(
210                   aa_new[i].graphGroup + 2);
211           BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups
212                   .get(n_ggrp);
213           if (orig_g == null)
214           {
215             orig_groups.put(o_ggrp, orig_g = new BitSet());
216           }
217           if (new_g == null)
218           {
219             new_groups.put(n_ggrp, new_g = new BitSet());
220           }
221           assertTrue("Graph Group pattern differs at annotation " + i,
222                   orig_g.equals(new_g));
223           orig_g.set(i);
224           new_g.set(i);
225         }
226         else
227         {
228           System.err.println("No matching annotation row for "
229                   + aa_original[i].toString());
230         }
231       }
232     }
233     assertTrue(
234             "Generated and imported alignment have different annotation sets ("
235                     + aa_new_size + " != " + aa_original_size + ")",
236             aa_new_size == aa_original_size);
237
238     // check sequences, annotation and features
239     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
240     seq_original = al.getSequencesArray();
241     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
242     seq_new = al_input.getSequencesArray();
243     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
244     AlignmentAnnotation annot_original, annot_new;
245     //
246     for (int i = 0; i < al.getSequencesArray().length; i++)
247     {
248       String name = seq_original[i].getName();
249       int start = seq_original[i].getStart();
250       int end = seq_original[i].getEnd();
251       System.out.println("Check sequence: " + name + "/" + start + "-"
252               + end);
253
254       // search equal sequence
255       for (int in = 0; in < al_input.getSequencesArray().length; in++)
256       {
257         if (name.equals(seq_new[in].getName())
258                 && start == seq_new[in].getStart()
259                 && end == seq_new[in].getEnd())
260         {
261           String ss_original = seq_original[i].getSequenceAsString();
262           String ss_new = seq_new[in].getSequenceAsString();
263           assertTrue("The sequences " + name + "/" + start + "-" + end
264                   + " are not equal", ss_original.equals(ss_new));
265
266           assertTrue(
267                   "Sequence Features were not equivalent"
268                           + (ignoreFeatures ? " ignoring." : ""),
269                   ignoreFeatures
270                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
271                                   .getSequenceFeatures() == null)
272                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
273                                   .getSequenceFeatures() != null));
274           // compare sequence features
275           if (seq_original[i].getSequenceFeatures() != null
276                   && seq_new[in].getSequenceFeatures() != null)
277           {
278             System.out.println("There are feature!!!");
279             sequenceFeatures_original = new SequenceFeature[seq_original[i]
280                     .getSequenceFeatures().length];
281             sequenceFeatures_original = seq_original[i]
282                     .getSequenceFeatures();
283             sequenceFeatures_new = new SequenceFeature[seq_new[in]
284                     .getSequenceFeatures().length];
285             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
286
287             assertTrue("different number of features", seq_original[i]
288                     .getSequenceFeatures().length == seq_new[in]
289                     .getSequenceFeatures().length);
290
291             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
292             {
293               assertTrue("Different features",
294                       sequenceFeatures_original[feat]
295                               .equals(sequenceFeatures_new[feat]));
296             }
297           }
298           // compare alignment annotation
299           if (al.getSequenceAt(i).getAnnotation() != null
300                   && al_input.getSequenceAt(in).getAnnotation() != null)
301           {
302             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
303             {
304               if (al.getSequenceAt(i).getAnnotation()[j] != null
305                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
306               {
307                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
308                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
309                 assertTrue("Different annotation elements",
310                         equalss(annot_original, annot_new));
311               }
312             }
313           }
314           else if (al.getSequenceAt(i).getAnnotation() == null
315                   && al_input.getSequenceAt(in).getAnnotation() == null)
316           {
317             System.out.println("No annotations");
318           }
319           else if (al.getSequenceAt(i).getAnnotation() != null
320                   && al_input.getSequenceAt(in).getAnnotation() == null)
321           {
322             assertTrue("Annotations differed between sequences ("
323                     + al.getSequenceAt(i).getName() + ") and ("
324                     + al_input.getSequenceAt(i).getName() + ")", false);
325           }
326           break;
327         }
328       }
329     }
330   }
331
332   /*
333    * compare annotations
334    */
335   private static boolean equalss(AlignmentAnnotation annot_or,
336           AlignmentAnnotation annot_new)
337   {
338     if (annot_or.annotations.length != annot_new.annotations.length)
339     {
340       System.err.println("Different lengths for annotation row elements: "
341               + annot_or.annotations.length + "!="
342               + annot_new.annotations.length);
343       return false;
344     }
345     for (int i = 0; i < annot_or.annotations.length; i++)
346     {
347       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
348       if (an_or != null && an_new != null)
349       {
350         if (!an_or.displayCharacter.trim().equals(
351                 an_new.displayCharacter.trim())
352                 || !("" + an_or.secondaryStructure).trim().equals(
353                         ("" + an_new.secondaryStructure).trim())
354                 || (an_or.description != an_new.description && !((an_or.description == null && an_new.description
355                         .trim().length() == 0)
356                         || (an_new.description == null && an_or.description
357                                 .trim().length() == 0) || an_or.description
358                         .trim().equals(an_new.description.trim()))))
359         {
360           System.err.println("Annotation Element Mismatch\nElement " + i
361                   + " in original: " + annot_or.annotations[i].toString()
362                   + "\nElement " + i + " in new: "
363                   + annot_new.annotations[i].toString());
364           return false;
365         }
366       }
367       else if (annot_or.annotations[i] == null
368               && annot_new.annotations[i] == null)
369       {
370         continue;
371       }
372       else
373       {
374         System.err.println("Annotation Element Mismatch\nElement "
375                 + i
376                 + " in original: "
377                 + (annot_or.annotations[i] == null ? "is null"
378                         : annot_or.annotations[i].toString())
379                 + "\nElement "
380                 + i
381                 + " in new: "
382                 + (annot_new.annotations[i] == null ? "is null"
383                         : annot_new.annotations[i].toString()));
384         return false;
385       }
386     }
387     return true;
388   }
389 }