JAL-1199 JAL-1272 refactored Natasha's test to a generic alignment data consistency...
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 package jalview.io;
2
3 import static org.junit.Assert.*;
4 import jalview.datamodel.Alignment;
5 import jalview.datamodel.AlignmentAnnotation;
6 import jalview.datamodel.AlignmentI;
7 import jalview.datamodel.SequenceFeature;
8 import jalview.datamodel.SequenceI;
9
10 import java.io.File;
11 import java.io.IOException;
12 import java.io.InputStream;
13
14 import org.junit.Test;
15
16 public class StockholmFileTest
17 {
18
19   public static void main(String argv[])
20   {
21     try {
22       new StockholmFileTest().pfamFileIO();
23     
24     } catch (Exception x)
25     {
26       x.printStackTrace();
27     }
28   }
29   static String PfamFile = "examples/PF00111_seed.stk", RfamFile="examples/RF00031_folded.stk";
30
31   @Test
32   public void pfamFileIO() throws Exception
33   {
34       test(new File(PfamFile));
35       AppletFormatAdapter af = new AppletFormatAdapter();
36       AlignmentI al = af.readFile(PfamFile, af.FILE, new IdentifyFile().Identify(PfamFile, af.FILE));
37       int numpdb=0;
38       for (SequenceI sq:al.getSequences())
39       {
40         if (sq.getPDBId()!=null)
41         {
42           numpdb+=sq.getPDBId().size();
43         }
44       }
45       assertTrue("PF00111 seed alignment has at least 1 PDB file, but the reader found none.",numpdb>0);
46   }
47   @Test
48   public void rfamFileIO() throws Exception
49   {
50     testFileIOwithFormat(new File(RfamFile), "STH");
51   }
52
53   /**
54    * test alignment data in given file can be imported, exported and reimported
55    * with no dataloss
56    * 
57    * @param f
58    *          - source datafile (IdentifyFile.identify() should work with it)
59    * @param ioformat
60    *          - label for IO class used to write and read back in the data from
61    *          f
62    */
63   public static void testFileIOwithFormat(File f, String ioformat)
64   {
65     System.out.println("Reading file: " + f);
66     String ff = f.getPath();
67     try
68     {
69       AppletFormatAdapter rf = new AppletFormatAdapter();
70
71       Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
72               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
73
74       assertNotNull("Couldn't read supplied alignment data.", al);
75
76       // make sure dataset is initialised ? not sure about this
77       for (int i = 0; i < al.getSequencesArray().length; ++i)
78       {
79         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
80       }
81       String outputfile = rf.formatSequences(ioformat, al, true);
82       System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n<<EOF\n");
83       // test for consistency in io
84       Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
85               AppletFormatAdapter.PASTE, ioformat);
86       assertNotNull("Couldn't parse reimported alignment data.", al_input);
87
88       String identifyoutput = new IdentifyFile().Identify(outputfile,
89               AppletFormatAdapter.PASTE);
90       assertNotNull("Identify routine failed for outputformat " + ioformat,
91               identifyoutput);
92       assertTrue(
93               "Identify routine could not recognise output generated by '"
94                       + ioformat + "' writer",
95               ioformat.equals(identifyoutput));
96       testAlignmentEquivalence(al, al_input);
97     } catch (Exception e)
98     {
99       e.printStackTrace();
100       assertTrue("Couln't format the alignment for output file.", false);
101     }
102   }
103
104   /**
105    * assert alignment equivalence
106    * 
107    * @param al
108    *          'original'
109    * @param al_input
110    *          'secondary' or generated alignment from some datapreserving
111    *          transformation
112    */
113   private static void testAlignmentEquivalence(AlignmentI al,
114           AlignmentI al_input)
115   {
116     assertNotNull("Original alignment was null", al);
117     assertNotNull("Generated alignment was null", al_input);
118
119     assertTrue(
120             "Alignment dimension mismatch: originl contains "
121                     + al.getHeight() + " and generated has "
122                     + al_input.getHeight() + " sequences; original has "
123                     + al.getWidth() + " and generated has "
124                     + al_input.getWidth() + " columns.",
125             al.getHeight() == al_input.getHeight()
126                     && al.getWidth() == al_input.getWidth());
127
128     // check Alignment annotation
129     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
130     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
131
132     // note - at moment we do not distinguish between alignment without any
133     // annotation rows and alignment with no annotation row vector
134     // we might want to revise this in future
135     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
136             : aa_original.length);
137
138     if (aa_new != null && aa_original != null)
139     {
140       for (int i = 0; i < aa_original.length; i++)
141       {
142         if (aa_new.length>i) {
143           assertTrue("Different alignment annotation ordering",
144                 equalss(aa_original[i], aa_new[i]));
145         } else {
146           System.err.println("No matching annotation row for "+aa_original[i].toString());
147         }
148       }
149     }
150     assertTrue(
151             "Generated and imported alignment have different annotation sets ("
152                     + aa_new_size + " != " + aa_original_size + ")",
153             aa_new_size == aa_original_size);
154
155     // check sequences, annotation and features
156     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
157     seq_original = al.getSequencesArray();
158     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
159     seq_new = al_input.getSequencesArray();
160     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
161     AlignmentAnnotation annot_original, annot_new;
162     //
163     for (int i = 0; i < al.getSequencesArray().length; i++)
164     {
165       String name = seq_original[i].getName();
166       int start = seq_original[i].getStart();
167       int end = seq_original[i].getEnd();
168       System.out.println("Check sequence: " + name + "/" + start + "-"
169               + end);
170
171       // search equal sequence
172       for (int in = 0; in < al_input.getSequencesArray().length; in++)
173       {
174         if (name.equals(seq_new[in].getName())
175                 && start == seq_new[in].getStart()
176                 && end == seq_new[in].getEnd())
177         {
178           String ss_original = seq_original[i].getSequenceAsString();
179           String ss_new = seq_new[in].getSequenceAsString();
180           assertTrue("The sequences " + name + "/" + start + "-" + end
181                   + " are not equal", ss_original.equals(ss_new));
182
183           assertTrue(
184                   "Sequence Features were not equivalent",
185                   (seq_original[i].getSequenceFeatures() == null && seq_new[in]
186                           .getSequenceFeatures() == null)
187                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
188                                   .getSequenceFeatures() != null));
189           // compare sequence features
190           if (seq_original[i].getSequenceFeatures() != null
191                   && seq_new[in].getSequenceFeatures() != null)
192           {
193             System.out.println("There are feature!!!");
194             sequenceFeatures_original = new SequenceFeature[seq_original[i]
195                     .getSequenceFeatures().length];
196             sequenceFeatures_original = seq_original[i]
197                     .getSequenceFeatures();
198             sequenceFeatures_new = new SequenceFeature[seq_new[in]
199                     .getSequenceFeatures().length];
200             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
201
202             assertTrue("different number of features", seq_original[i]
203                     .getSequenceFeatures().length == seq_new[in]
204                     .getSequenceFeatures().length);
205
206             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
207             {
208               assertTrue("Different features",
209                       sequenceFeatures_original[feat]
210                               .equals(sequenceFeatures_new[feat]));
211             }
212           }
213
214           // compare alignment annotation
215           if (al.getSequenceAt(i).getAnnotation() != null
216                   && al_input.getSequenceAt(in).getAnnotation() != null)
217           {
218             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
219             {
220               if (al.getSequenceAt(i).getAnnotation()[j] != null
221                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
222               {
223                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
224                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
225                 assertTrue("Different annotation",
226                         equalss(annot_original, annot_new));
227               }
228             }
229           }
230           else if (al.getSequenceAt(i).getAnnotation() == null
231                   && al_input.getSequenceAt(in).getAnnotation() == null)
232           {
233             System.out.println("No annotations");
234           }
235           else if (al.getSequenceAt(i).getAnnotation() != null
236                   && al_input.getSequenceAt(in).getAnnotation() == null)
237           {
238             assertTrue("Annotations differed between sequences ("
239                     + al.getSequenceAt(i).getName() + ") and ("
240                     + al_input.getSequenceAt(i).getName() + ")", false);
241           }
242           break;
243         }
244       }
245     }
246   }
247
248   /*
249    * compare annotations
250    */
251   private static boolean equalss(AlignmentAnnotation annot_or,
252           AlignmentAnnotation annot_new)
253   {
254     if (annot_or.annotations.length != annot_new.annotations.length)
255     {
256       System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length);
257       return false;
258     }
259     for (int i = 0; i < annot_or.annotations.length; i++)
260     {
261       if (annot_or.annotations[i] != null
262               && annot_new.annotations[i] != null)
263       {
264         // Jim's comment - shouldn't the conditional here be using || not && for
265         // all these clauses ?
266         if (!annot_or.annotations[i].displayCharacter
267                 .equals(annot_new.annotations[i].displayCharacter)
268                 && annot_or.annotations[i].secondaryStructure != annot_new.annotations[i].secondaryStructure
269                 && !annot_or.annotations[i].description
270                         .equals(annot_new.annotations[i].description))
271         {
272           System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString());
273           return false;
274         }
275       }
276       else if (annot_or.annotations[i] == null
277               && annot_new.annotations[i] == null)
278       {
279         continue;
280       }
281       else
282       {
283         System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString()));
284         return false;
285       }
286     }
287     return true;
288   }
289 }