4505f169e8c5737944e22847ab612088b3f1f934
[jalview.git] / test / jalview / io / StockholmFileTest.java
1 package jalview.io;
2
3 import static org.junit.Assert.assertNotNull;
4 import static org.junit.Assert.assertTrue;
5 import jalview.datamodel.Alignment;
6 import jalview.datamodel.AlignmentAnnotation;
7 import jalview.datamodel.AlignmentI;
8 import jalview.datamodel.Annotation;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11
12 import java.io.File;
13 import java.util.BitSet;
14 import java.util.HashMap;
15 import java.util.Map;
16
17 import org.junit.Test;
18
19 public class StockholmFileTest
20 {
21
22   static String PfamFile = "examples/PF00111_seed.stk",
23           RfamFile = "examples/RF00031_folded.stk";
24
25   @Test
26   public void pfamFileIO() throws Exception
27   {
28     testFileIOwithFormat(new File(PfamFile), "STH");
29   }
30   @Test
31   public void pfamFileDataExtraction() throws Exception
32   {  AppletFormatAdapter af = new AppletFormatAdapter();
33     AlignmentI al = af.readFile(PfamFile, af.FILE,
34             new IdentifyFile().Identify(PfamFile, af.FILE));
35     int numpdb = 0;
36     for (SequenceI sq : al.getSequences())
37     {
38       if (sq.getPDBId() != null)
39       {
40         numpdb += sq.getPDBId().size();
41       }
42     }
43     assertTrue(
44             "PF00111 seed alignment has at least 1 PDB file, but the reader found none.",
45             numpdb > 0);
46   }
47
48   @Test
49   public void rfamFileIO() throws Exception
50   {
51     testFileIOwithFormat(new File(RfamFile), "STH");
52   }
53
54   /**
55    * test alignment data in given file can be imported, exported and reimported
56    * with no dataloss
57    * 
58    * @param f
59    *          - source datafile (IdentifyFile.identify() should work with it)
60    * @param ioformat
61    *          - label for IO class used to write and read back in the data from
62    *          f
63    */
64   public static void testFileIOwithFormat(File f, String ioformat)
65   {
66     System.out.println("Reading file: " + f);
67     String ff = f.getPath();
68     try
69     {
70       AppletFormatAdapter rf = new AppletFormatAdapter();
71
72       Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
73               new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
74
75       assertNotNull("Couldn't read supplied alignment data.", al);
76
77       // make sure dataset is initialised ? not sure about this
78       for (int i = 0; i < al.getSequencesArray().length; ++i)
79       {
80         al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
81       }
82       String outputfile = rf.formatSequences(ioformat, al, true);
83       System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n<<EOF\n");
84       // test for consistency in io
85       Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
86               AppletFormatAdapter.PASTE, ioformat);
87       assertNotNull("Couldn't parse reimported alignment data.", al_input);
88
89       String identifyoutput = new IdentifyFile().Identify(outputfile,
90               AppletFormatAdapter.PASTE);
91       assertNotNull("Identify routine failed for outputformat " + ioformat,
92               identifyoutput);
93       assertTrue(
94               "Identify routine could not recognise output generated by '"
95                       + ioformat + "' writer",
96               ioformat.equals(identifyoutput));
97       testAlignmentEquivalence(al, al_input);
98     } catch (Exception e)
99     {
100       e.printStackTrace();
101       assertTrue("Couln't format the alignment for output file.", false);
102     }
103   }
104
105   /**
106    * assert alignment equivalence
107    * 
108    * @param al
109    *          'original'
110    * @param al_input
111    *          'secondary' or generated alignment from some datapreserving
112    *          transformation
113    */
114   public static void testAlignmentEquivalence(AlignmentI al,
115           AlignmentI al_input)
116   {
117     assertNotNull("Original alignment was null", al);
118     assertNotNull("Generated alignment was null", al_input);
119
120     assertTrue(
121             "Alignment dimension mismatch: originl contains "
122                     + al.getHeight() + " and generated has "
123                     + al_input.getHeight() + " sequences; original has "
124                     + al.getWidth() + " and generated has "
125                     + al_input.getWidth() + " columns.",
126             al.getHeight() == al_input.getHeight()
127                     && al.getWidth() == al_input.getWidth());
128
129     // check Alignment annotation
130     AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
131     AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
132
133     // note - at moment we do not distinguish between alignment without any
134     // annotation rows and alignment with no annotation row vector
135     // we might want to revise this in future
136     int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
137             : aa_original.length);
138     Map<Integer,java.util.BitSet> orig_groups=new HashMap<Integer,java.util.BitSet>(),new_groups=new HashMap<Integer,java.util.BitSet>();
139
140     if (aa_new != null && aa_original != null)
141     {
142       for (int i = 0; i < aa_original.length; i++)
143       {
144         if (aa_new.length>i) {
145           assertTrue("Different alignment annotation at position "+i,
146                 equalss(aa_original[i], aa_new[i]));
147           // compare graphGroup or graph properties - needed to verify JAL-1299
148           assertTrue("Graph type not identical.",aa_original[i].graph==aa_new[i].graph);
149           assertTrue("Visibility not identical.", aa_original[i].visible==aa_new[i].visible);
150           assertTrue(
151                   "Threshold line not identical.",
152                   aa_original[i].threshold == null ? aa_new[i].threshold == null
153                           : aa_original[i].threshold
154                                   .equals(aa_new[i].threshold));
155           // graphGroup may differ, but pattern should be the same
156           Integer o_ggrp=new Integer(aa_original[i].graphGroup+2),n_ggrp=new Integer(aa_new[i].graphGroup+2);
157           BitSet orig_g=orig_groups.get(o_ggrp),new_g=new_groups.get(n_ggrp);
158           if (orig_g==null) {
159             orig_groups.put(o_ggrp,orig_g= new BitSet());
160           }
161           if (new_g==null) {
162             new_groups.put(n_ggrp, new_g=new BitSet());
163           }
164           assertTrue("Graph Group pattern differs at annotation "+i, orig_g.equals(new_g));
165           orig_g.set(i); new_g.set(i);
166         } else {
167           System.err.println("No matching annotation row for "+aa_original[i].toString());
168         }
169       }
170     }
171     assertTrue(
172             "Generated and imported alignment have different annotation sets ("
173                     + aa_new_size + " != " + aa_original_size + ")",
174             aa_new_size == aa_original_size);
175
176     // check sequences, annotation and features
177     SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
178     seq_original = al.getSequencesArray();
179     SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
180     seq_new = al_input.getSequencesArray();
181     SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
182     AlignmentAnnotation annot_original, annot_new;
183     //
184     for (int i = 0; i < al.getSequencesArray().length; i++)
185     {
186       String name = seq_original[i].getName();
187       int start = seq_original[i].getStart();
188       int end = seq_original[i].getEnd();
189       System.out.println("Check sequence: " + name + "/" + start + "-"
190               + end);
191
192       // search equal sequence
193       for (int in = 0; in < al_input.getSequencesArray().length; in++)
194       {
195         if (name.equals(seq_new[in].getName())
196                 && start == seq_new[in].getStart()
197                 && end == seq_new[in].getEnd())
198         {
199           String ss_original = seq_original[i].getSequenceAsString();
200           String ss_new = seq_new[in].getSequenceAsString();
201           assertTrue("The sequences " + name + "/" + start + "-" + end
202                   + " are not equal", ss_original.equals(ss_new));
203
204           assertTrue(
205                   "Sequence Features were not equivalent",
206                   (seq_original[i].getSequenceFeatures() == null && seq_new[in]
207                           .getSequenceFeatures() == null)
208                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
209                                   .getSequenceFeatures() != null));
210           // compare sequence features
211           if (seq_original[i].getSequenceFeatures() != null
212                   && seq_new[in].getSequenceFeatures() != null)
213           {
214             System.out.println("There are feature!!!");
215             sequenceFeatures_original = new SequenceFeature[seq_original[i]
216                     .getSequenceFeatures().length];
217             sequenceFeatures_original = seq_original[i]
218                     .getSequenceFeatures();
219             sequenceFeatures_new = new SequenceFeature[seq_new[in]
220                     .getSequenceFeatures().length];
221             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
222
223             assertTrue("different number of features", seq_original[i]
224                     .getSequenceFeatures().length == seq_new[in]
225                     .getSequenceFeatures().length);
226
227             for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
228             {
229               assertTrue("Different features",
230                       sequenceFeatures_original[feat]
231                               .equals(sequenceFeatures_new[feat]));
232             }
233           }
234           // compare alignment annotation
235           if (al.getSequenceAt(i).getAnnotation() != null
236                   && al_input.getSequenceAt(in).getAnnotation() != null)
237           {
238             for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
239             {
240               if (al.getSequenceAt(i).getAnnotation()[j] != null
241                       && al_input.getSequenceAt(in).getAnnotation()[j] != null)
242               {
243                 annot_original = al.getSequenceAt(i).getAnnotation()[j];
244                 annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
245                 assertTrue("Different annotation elements",
246                         equalss(annot_original, annot_new));
247               }
248             }
249           }
250           else if (al.getSequenceAt(i).getAnnotation() == null
251                   && al_input.getSequenceAt(in).getAnnotation() == null)
252           {
253             System.out.println("No annotations");
254           }
255           else if (al.getSequenceAt(i).getAnnotation() != null
256                   && al_input.getSequenceAt(in).getAnnotation() == null)
257           {
258             assertTrue("Annotations differed between sequences ("
259                     + al.getSequenceAt(i).getName() + ") and ("
260                     + al_input.getSequenceAt(i).getName() + ")", false);
261           }
262           break;
263         }
264       }
265     }
266   }
267
268   /*
269    * compare annotations
270    */
271   private static boolean equalss(AlignmentAnnotation annot_or,
272           AlignmentAnnotation annot_new)
273   {
274     if (annot_or.annotations.length != annot_new.annotations.length)
275     {
276       System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length);
277       return false;
278     }
279     for (int i = 0; i < annot_or.annotations.length; i++)
280     {
281       Annotation an_or=annot_or.annotations[i],an_new=annot_new.annotations[i];
282       if (an_or != null
283               && an_new!= null)
284       {
285         if (!an_or.displayCharacter.trim()
286                 .equals(an_new.displayCharacter.trim())
287                 || !(""+an_or.secondaryStructure).trim().equals((""+an_new.secondaryStructure).trim())
288                 || (an_or.description != an_new.description && (an_or.description == null
289                         || an_new.description == null || !an_or.description
290                           .equals(an_new.description))))
291         {
292           System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString());
293           return false;
294         }
295       }
296       else if (annot_or.annotations[i] == null
297               && annot_new.annotations[i] == null)
298       {
299         continue;
300       }
301       else
302       {
303         System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString()));
304         return false;
305       }
306     }
307     return true;
308   }
309 }