JAL-1780 JAL-653 Format/AppletFormat import and export pipeline regularised, uses...
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.junit.Assert.assertEquals;
24 import static org.junit.Assert.assertNotNull;
25 import static org.junit.Assert.assertTrue;
26 import jalview.datamodel.AlignmentAnnotation;
27 import jalview.datamodel.AlignmentI;
28 import jalview.datamodel.Annotation;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31
32 import java.io.File;
33 import java.util.BitSet;
34 import java.util.HashMap;
35 import java.util.Map;
36
37 import org.junit.Test;
38
39 public class StockholmFileTest
40 {
41
42   static String PfamFile = "examples/PF00111_seed.stk",
43           RfamFile = "examples/RF00031_folded.stk";
44
45   @Test
46   public void pfamFileIO() throws Exception
47   {
48     testFileIOwithFormat(new File(PfamFile), "STH", -1, 0);
49   }
50
51   @Test
52   public void pfamFileDataExtraction() throws Exception
53   {
54     AppletFormatAdapter af = new AppletFormatAdapter();
55     AlignmentI al = af.readFile(PfamFile, af.FILE,
56             new IdentifyFile().Identify(PfamFile, af.FILE));
57     int numpdb = 0;
58     for (SequenceI sq : al.getSequences())
59     {
60       if (sq.getPDBId() != null)
61       {
62         numpdb += sq.getPDBId().size();
63       }
64     }
65     assertTrue(
66             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
67             numpdb > 0);
68   }
69
70   @Test
71   public void rfamFileIO() throws Exception
72   {
73     testFileIOwithFormat(new File(RfamFile), "STH", 2, 1);
74   }
75
76   /**
77    * test alignment data in given file can be imported, exported and reimported
78    * with no dataloss
79    * 
80    * @param f
81    *          - source datafile (IdentifyFile.identify() should work with it)
82    * @param ioformat
83    *          - label for IO class used to write and read back in the data from
84    *          f
85    */
86   public static void testFileIOwithFormat(File f, String ioformat,
87           int naliannot, int nminseqann)
88   {
89     System.out.println("Reading file: " + f);
90     String ff = f.getPath();
91     try
92     {
93       AppletFormatAdapter rf = new AppletFormatAdapter();
94
95       AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
96               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
97
98       assertNotNull("Couldn't read supplied alignment data.", al);
99
100       // make sure dataset is initialised ? not sure about this
101       for (int i = 0; i < al.getSequencesArray().length; ++i)
102       {
103         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
104       }
105       String outputfile = rf.formatSequences(ioformat, al, true);
106       System.out.println("Output file in '" + ioformat + "':\n"
107               + outputfile + "\n<<EOF\n");
108       // test for consistency in io
109       AlignmentI al_input = new AppletFormatAdapter().readFile(outputfile,
110               AppletFormatAdapter.PASTE, ioformat);
111       assertNotNull("Couldn't parse reimported alignment data.", al_input);
112
113       String identifyoutput = new IdentifyFile().Identify(outputfile,
114               AppletFormatAdapter.PASTE);
115       assertNotNull("Identify routine failed for outputformat " + ioformat,
116               identifyoutput);
117       assertTrue(
118               "Identify routine could not recognise output generated by '"
119                       + ioformat + "' writer",
120               ioformat.equals(identifyoutput));
121       testAlignmentEquivalence(al, al_input, false);
122       int numaliannot = 0, numsqswithali = 0;
123       for (AlignmentAnnotation ala : al_input.getAlignmentAnnotation())
124       {
125         if (ala.sequenceRef == null)
126         {
127           numaliannot++;
128         }
129         else
130         {
131           numsqswithali++;
132         }
133       }
134       if (naliannot > -1)
135       {
136         assertEquals("Number of alignment annotations", naliannot,
137               numaliannot);
138       }
139
140       assertTrue(
141               "Number of sequence associated annotations wasn't at least "
142                       + nminseqann, numsqswithali >= nminseqann);
143
144     } catch (Exception e)
145     {
146       e.printStackTrace();
147       assertTrue("Couln't format the alignment for output file.", false);
148     }
149   }
150
151   /**
152    * assert alignment equivalence
153    * 
154    * @param al
155    *          'original'
156    * @param al_input
157    *          'secondary' or generated alignment from some datapreserving
158    *          transformation
159    * @param ignoreFeatures
160    *          when true, differences in seuqence feature annotation are ignored.
161    */
162   public static void testAlignmentEquivalence(AlignmentI al,
163           AlignmentI al_input, boolean ignoreFeatures)
164   {
165     assertNotNull("Original alignment was null", al);
166     assertNotNull("Generated alignment was null", al_input);
167
168     assertTrue(
169             "Alignment dimension mismatch: originl contains "
170                     + al.getHeight() + " and generated has "
171                     + al_input.getHeight() + " sequences; original has "
172                     + al.getWidth() + " and generated has "
173                     + al_input.getWidth() + " columns.",
174             al.getHeight() == al_input.getHeight()
175                     && al.getWidth() == al_input.getWidth());
176
177     // check Alignment annotation
178     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
179     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
180
181     // note - at moment we do not distinguish between alignment without any
182     // annotation rows and alignment with no annotation row vector
183     // we might want to revise this in future
184     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
185             : aa_original.length);
186     Map<Integer, java.util.BitSet> orig_groups = new HashMap<Integer, java.util.BitSet>(), new_groups = new HashMap<Integer, java.util.BitSet>();
187
188     if (aa_new != null && aa_original != null)
189     {
190       for (int i = 0; i < aa_original.length; i++)
191       {
192         if (aa_new.length > i)
193         {
194           assertTrue("Different alignment annotation at position " + i,
195                   equalss(aa_original[i], aa_new[i]));
196           // compare graphGroup or graph properties - needed to verify JAL-1299
197           assertTrue("Graph type not identical.",
198                   aa_original[i].graph == aa_new[i].graph);
199           assertTrue("Visibility not identical.",
200                   aa_original[i].visible == aa_new[i].visible);
201           assertTrue(
202                   "Threshold line not identical.",
203                   aa_original[i].threshold == null ? aa_new[i].threshold == null
204                           : aa_original[i].threshold
205                                   .equals(aa_new[i].threshold));
206           // graphGroup may differ, but pattern should be the same
207           Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer(
208                   aa_new[i].graphGroup + 2);
209           BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups
210                   .get(n_ggrp);
211           if (orig_g == null)
212           {
213             orig_groups.put(o_ggrp, orig_g = new BitSet());
214           }
215           if (new_g == null)
216           {
217             new_groups.put(n_ggrp, new_g = new BitSet());
218           }
219           assertTrue("Graph Group pattern differs at annotation " + i,
220                   orig_g.equals(new_g));
221           orig_g.set(i);
222           new_g.set(i);
223         }
224         else
225         {
226           System.err.println("No matching annotation row for "
227                   + aa_original[i].toString());
228         }
229       }
230     }
231     assertTrue(
232             "Generated and imported alignment have different annotation sets ("
233                     + aa_new_size + " != " + aa_original_size + ")",
234             aa_new_size == aa_original_size);
235
236     // check sequences, annotation and features
237     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
238     seq_original = al.getSequencesArray();
239     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
240     seq_new = al_input.getSequencesArray();
241     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
242     AlignmentAnnotation annot_original, annot_new;
243     //
244     for (int i = 0; i < al.getSequencesArray().length; i++)
245     {
246       String name = seq_original[i].getName();
247       int start = seq_original[i].getStart();
248       int end = seq_original[i].getEnd();
249       System.out.println("Check sequence: " + name + "/" + start + "-"
250               + end);
251
252       // search equal sequence
253       for (int in = 0; in < al_input.getSequencesArray().length; in++)
254       {
255         if (name.equals(seq_new[in].getName())
256                 && start == seq_new[in].getStart()
257                 && end == seq_new[in].getEnd())
258         {
259           String ss_original = seq_original[i].getSequenceAsString();
260           String ss_new = seq_new[in].getSequenceAsString();
261           assertTrue("The sequences " + name + "/" + start + "-" + end
262                   + " are not equal", ss_original.equals(ss_new));
263
264           assertTrue(
265                   "Sequence Features were not equivalent"
266                           + (ignoreFeatures ? " ignoring." : ""),
267                   ignoreFeatures
268                           || (seq_original[i].getSequenceFeatures() == null && seq_new[in]
269                           .getSequenceFeatures() == null)
270                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
271                                   .getSequenceFeatures() != null));
272           // compare sequence features
273           if (seq_original[i].getSequenceFeatures() != null
274                   && seq_new[in].getSequenceFeatures() != null)
275           {
276             System.out.println("There are feature!!!");
277             sequenceFeatures_original = new SequenceFeature[seq_original[i]
278                     .getSequenceFeatures().length];
279             sequenceFeatures_original = seq_original[i]
280                     .getSequenceFeatures();
281             sequenceFeatures_new = new SequenceFeature[seq_new[in]
282                     .getSequenceFeatures().length];
283             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
284
285             assertTrue("different number of features", seq_original[i]
286                     .getSequenceFeatures().length == seq_new[in]
287                     .getSequenceFeatures().length);
288
289             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
290             {
291               assertTrue("Different features",
292                       sequenceFeatures_original[feat]
293                               .equals(sequenceFeatures_new[feat]));
294             }
295           }
296           // compare alignment annotation
297           if (al.getSequenceAt(i).getAnnotation() != null
298                   && al_input.getSequenceAt(in).getAnnotation() != null)
299           {
300             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
301             {
302               if (al.getSequenceAt(i).getAnnotation()[j] != null
303                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
304               {
305                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
306                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
307                 assertTrue("Different annotation elements",
308                         equalss(annot_original, annot_new));
309               }
310             }
311           }
312           else if (al.getSequenceAt(i).getAnnotation() == null
313                   && al_input.getSequenceAt(in).getAnnotation() == null)
314           {
315             System.out.println("No annotations");
316           }
317           else if (al.getSequenceAt(i).getAnnotation() != null
318                   && al_input.getSequenceAt(in).getAnnotation() == null)
319           {
320             assertTrue("Annotations differed between sequences ("
321                     + al.getSequenceAt(i).getName() + ") and ("
322                     + al_input.getSequenceAt(i).getName() + ")", false);
323           }
324           break;
325         }
326       }
327     }
328   }
329
330   /*
331    * compare annotations
332    */
333   private static boolean equalss(AlignmentAnnotation annot_or,
334           AlignmentAnnotation annot_new)
335   {
336     if (annot_or.annotations.length != annot_new.annotations.length)
337     {
338       System.err.println("Different lengths for annotation row elements: "
339               + annot_or.annotations.length + "!="
340               + annot_new.annotations.length);
341       return false;
342     }
343     for (int i = 0; i < annot_or.annotations.length; i++)
344     {
345       Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
346       if (an_or != null && an_new != null)
347       {
348         if (!an_or.displayCharacter.trim().equals(
349                 an_new.displayCharacter.trim())
350                 || !("" + an_or.secondaryStructure).trim().equals(
351                         ("" + an_new.secondaryStructure).trim())
352                 || (an_or.description != an_new.description && (an_or.description == null
353                         || an_new.description == null || !an_or.description
354                           .equals(an_new.description))))
355         {
356           System.err.println("Annotation Element Mismatch\nElement " + i
357                   + " in original: " + annot_or.annotations[i].toString()
358                   + "\nElement " + i + " in new: "
359                   + annot_new.annotations[i].toString());
360           return false;
361         }
362       }
363       else if (annot_or.annotations[i] == null
364               && annot_new.annotations[i] == null)
365       {
366         continue;
367       }
368       else
369       {
370         System.err.println("Annotation Element Mismatch\nElement "
371                 + i
372                 + " in original: "
373                 + (annot_or.annotations[i] == null ? "is null"
374                         : annot_or.annotations[i].toString())
375                 + "\nElement "
376                 + i
377                 + " in new: "
378                 + (annot_new.annotations[i] == null ? "is null"
379                         : annot_new.annotations[i].toString()));
380         return false;
381       }
382     }
383     return true;
384   }
385 }