split test for general Stockholm IO consistency (JAL-1199) from test for IO of sequen...
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 package jalview.io;
2
3 import static org.junit.Assert.*;
4 import jalview.datamodel.Alignment;
5 import jalview.datamodel.AlignmentAnnotation;
6 import jalview.datamodel.AlignmentI;
7 import jalview.datamodel.SequenceFeature;
8 import jalview.datamodel.SequenceI;
9
10 import java.io.File;
11 import java.io.IOException;
12 import java.io.InputStream;
13
14 import org.junit.Test;
15
16 public class StockholmFileTest
17 {
18
19   static String PfamFile = "examples/PF00111_seed.stk",
20           RfamFile = "examples/RF00031_folded.stk";
21
22   @Test
23   public void pfamFileIO() throws Exception
24   {
25     testFileIOwithFormat(new File(PfamFile), "STH");
26   }
27   @Test
28   public void pfamFileDataExtraction() throws Exception
29   {  AppletFormatAdapter af = new AppletFormatAdapter();
30     AlignmentI al = af.readFile(PfamFile, af.FILE,
31             new IdentifyFile().Identify(PfamFile, af.FILE));
32     int numpdb = 0;
33     for (SequenceI sq : al.getSequences())
34     {
35       if (sq.getPDBId() != null)
36       {
37         numpdb += sq.getPDBId().size();
38       }
39     }
40     assertTrue(
41             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
42             numpdb > 0);
43   }
44
45   @Test
46   public void rfamFileIO() throws Exception
47   {
48     testFileIOwithFormat(new File(RfamFile), "STH");
49   }
50
51   /**
52    * test alignment data in given file can be imported, exported and reimported
53    * with no dataloss
54    * 
55    * @param f
56    *          - source datafile (IdentifyFile.identify() should work with it)
57    * @param ioformat
58    *          - label for IO class used to write and read back in the data from
59    *          f
60    */
61   public static void testFileIOwithFormat(File f, String ioformat)
62   {
63     System.out.println("Reading file: " + f);
64     String ff = f.getPath();
65     try
66     {
67       AppletFormatAdapter rf = new AppletFormatAdapter();
68
69       Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
70               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
71
72       assertNotNull("Couldn't read supplied alignment data.", al);
73
74       // make sure dataset is initialised ? not sure about this
75       for (int i = 0; i < al.getSequencesArray().length; ++i)
76       {
77         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
78       }
79       String outputfile = rf.formatSequences(ioformat, al, true);
80       System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n<<EOF\n");
81       // test for consistency in io
82       Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
83               AppletFormatAdapter.PASTE, ioformat);
84       assertNotNull("Couldn't parse reimported alignment data.", al_input);
85
86       String identifyoutput = new IdentifyFile().Identify(outputfile,
87               AppletFormatAdapter.PASTE);
88       assertNotNull("Identify routine failed for outputformat " + ioformat,
89               identifyoutput);
90       assertTrue(
91               "Identify routine could not recognise output generated by '"
92                       + ioformat + "' writer",
93               ioformat.equals(identifyoutput));
94       testAlignmentEquivalence(al, al_input);
95     } catch (Exception e)
96     {
97       e.printStackTrace();
98       assertTrue("Couln't format the alignment for output file.", false);
99     }
100   }
101
102   /**
103    * assert alignment equivalence
104    * 
105    * @param al
106    *          'original'
107    * @param al_input
108    *          'secondary' or generated alignment from some datapreserving
109    *          transformation
110    */
111   private static void testAlignmentEquivalence(AlignmentI al,
112           AlignmentI al_input)
113   {
114     assertNotNull("Original alignment was null", al);
115     assertNotNull("Generated alignment was null", al_input);
116
117     assertTrue(
118             "Alignment dimension mismatch: originl contains "
119                     + al.getHeight() + " and generated has "
120                     + al_input.getHeight() + " sequences; original has "
121                     + al.getWidth() + " and generated has "
122                     + al_input.getWidth() + " columns.",
123             al.getHeight() == al_input.getHeight()
124                     && al.getWidth() == al_input.getWidth());
125
126     // check Alignment annotation
127     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
128     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
129
130     // note - at moment we do not distinguish between alignment without any
131     // annotation rows and alignment with no annotation row vector
132     // we might want to revise this in future
133     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
134             : aa_original.length);
135
136     if (aa_new != null && aa_original != null)
137     {
138       for (int i = 0; i < aa_original.length; i++)
139       {
140         if (aa_new.length>i) {
141           assertTrue("Different alignment annotation ordering",
142                 equalss(aa_original[i], aa_new[i]));
143         } else {
144           System.err.println("No matching annotation row for "+aa_original[i].toString());
145         }
146       }
147     }
148     assertTrue(
149             "Generated and imported alignment have different annotation sets ("
150                     + aa_new_size + " != " + aa_original_size + ")",
151             aa_new_size == aa_original_size);
152
153     // check sequences, annotation and features
154     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
155     seq_original = al.getSequencesArray();
156     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
157     seq_new = al_input.getSequencesArray();
158     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
159     AlignmentAnnotation annot_original, annot_new;
160     //
161     for (int i = 0; i < al.getSequencesArray().length; i++)
162     {
163       String name = seq_original[i].getName();
164       int start = seq_original[i].getStart();
165       int end = seq_original[i].getEnd();
166       System.out.println("Check sequence: " + name + "/" + start + "-"
167               + end);
168
169       // search equal sequence
170       for (int in = 0; in < al_input.getSequencesArray().length; in++)
171       {
172         if (name.equals(seq_new[in].getName())
173                 && start == seq_new[in].getStart()
174                 && end == seq_new[in].getEnd())
175         {
176           String ss_original = seq_original[i].getSequenceAsString();
177           String ss_new = seq_new[in].getSequenceAsString();
178           assertTrue("The sequences " + name + "/" + start + "-" + end
179                   + " are not equal", ss_original.equals(ss_new));
180
181           assertTrue(
182                   "Sequence Features were not equivalent",
183                   (seq_original[i].getSequenceFeatures() == null && seq_new[in]
184                           .getSequenceFeatures() == null)
185                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
186                                   .getSequenceFeatures() != null));
187           // compare sequence features
188           if (seq_original[i].getSequenceFeatures() != null
189                   && seq_new[in].getSequenceFeatures() != null)
190           {
191             System.out.println("There are feature!!!");
192             sequenceFeatures_original = new SequenceFeature[seq_original[i]
193                     .getSequenceFeatures().length];
194             sequenceFeatures_original = seq_original[i]
195                     .getSequenceFeatures();
196             sequenceFeatures_new = new SequenceFeature[seq_new[in]
197                     .getSequenceFeatures().length];
198             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
199
200             assertTrue("different number of features", seq_original[i]
201                     .getSequenceFeatures().length == seq_new[in]
202                     .getSequenceFeatures().length);
203
204             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
205             {
206               assertTrue("Different features",
207                       sequenceFeatures_original[feat]
208                               .equals(sequenceFeatures_new[feat]));
209             }
210           }
211
212           // compare alignment annotation
213           if (al.getSequenceAt(i).getAnnotation() != null
214                   && al_input.getSequenceAt(in).getAnnotation() != null)
215           {
216             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
217             {
218               if (al.getSequenceAt(i).getAnnotation()[j] != null
219                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
220               {
221                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
222                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
223                 assertTrue("Different annotation",
224                         equalss(annot_original, annot_new));
225               }
226             }
227           }
228           else if (al.getSequenceAt(i).getAnnotation() == null
229                   && al_input.getSequenceAt(in).getAnnotation() == null)
230           {
231             System.out.println("No annotations");
232           }
233           else if (al.getSequenceAt(i).getAnnotation() != null
234                   && al_input.getSequenceAt(in).getAnnotation() == null)
235           {
236             assertTrue("Annotations differed between sequences ("
237                     + al.getSequenceAt(i).getName() + ") and ("
238                     + al_input.getSequenceAt(i).getName() + ")", false);
239           }
240           break;
241         }
242       }
243     }
244   }
245
246   /*
247    * compare annotations
248    */
249   private static boolean equalss(AlignmentAnnotation annot_or,
250           AlignmentAnnotation annot_new)
251   {
252     if (annot_or.annotations.length != annot_new.annotations.length)
253     {
254       System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length);
255       return false;
256     }
257     for (int i = 0; i < annot_or.annotations.length; i++)
258     {
259       if (annot_or.annotations[i] != null
260               && annot_new.annotations[i] != null)
261       {
262         // Jim's comment - shouldn't the conditional here be using || not && for
263         // all these clauses ?
264         if (!annot_or.annotations[i].displayCharacter
265                 .equals(annot_new.annotations[i].displayCharacter)
266                 && annot_or.annotations[i].secondaryStructure != annot_new.annotations[i].secondaryStructure
267                 && !annot_or.annotations[i].description
268                         .equals(annot_new.annotations[i].description))
269         {
270           System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString());
271           return false;
272         }
273       }
274       else if (annot_or.annotations[i] == null
275               && annot_new.annotations[i] == null)
276       {
277         continue;
278       }
279       else
280       {
281         System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString()));
282         return false;
283       }
284     }
285     return true;
286   }
287 }