5c7bf151fd7caa820572b50b2fa011f4421ef400
[jalview.git] / test / jalview / io / FeaturesFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertSame;
28 import static org.testng.AssertJUnit.assertTrue;
29 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
30
31 import jalview.api.FeatureColourI;
32 import jalview.api.FeatureRenderer;
33 import jalview.datamodel.Alignment;
34 import jalview.datamodel.AlignmentI;
35 import jalview.datamodel.SequenceDummy;
36 import jalview.datamodel.SequenceFeature;
37 import jalview.datamodel.SequenceI;
38 import jalview.datamodel.features.FeatureMatcher;
39 import jalview.datamodel.features.FeatureMatcherI;
40 import jalview.datamodel.features.FeatureMatcherSet;
41 import jalview.datamodel.features.FeatureMatcherSetI;
42 import jalview.datamodel.features.SequenceFeatures;
43 import jalview.gui.AlignFrame;
44 import jalview.gui.Desktop;
45 import jalview.gui.JvOptionPane;
46 import jalview.schemes.FeatureColour;
47 import jalview.structure.StructureSelectionManager;
48 import jalview.util.matcher.Condition;
49 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
50 import jalview.viewmodel.seqfeatures.FeatureRendererModel.FeatureSettingsBean;
51
52 import java.awt.Color;
53 import java.io.File;
54 import java.io.IOException;
55 import java.util.HashMap;
56 import java.util.Iterator;
57 import java.util.List;
58 import java.util.Map;
59
60 import org.testng.annotations.AfterClass;
61 import org.testng.annotations.BeforeClass;
62 import org.testng.annotations.Test;
63
64 public class FeaturesFileTest
65 {
66   private static final String TAB = "\t";
67   private static String simpleGffFile = "examples/testdata/simpleGff3.gff";
68
69   @AfterClass(alwaysRun = true)
70   public void tearDownAfterClass()
71   {
72     /*
73      * remove any sequence mappings created so they don't pollute other tests
74      */
75     StructureSelectionManager ssm = StructureSelectionManager
76             .getStructureSelectionManager(Desktop.instance);
77     ssm.resetAll();
78   }
79
80   @BeforeClass(alwaysRun = true)
81   public void setUpJvOptionPane()
82   {
83     JvOptionPane.setInteractiveMode(false);
84     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
85   }
86
87   @Test(groups = { "Functional" })
88   public void testParse() throws Exception
89   {
90     File f = new File("examples/uniref50.fa");
91     AlignmentI al = readAlignmentFile(f);
92     AlignFrame af = new AlignFrame(al, 500, 500);
93     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
94             .getFeatureColours();
95     FeaturesFile featuresFile = new FeaturesFile(
96             "examples/exampleFeatures.txt", DataSourceType.FILE);
97     assertTrue("Test " + "Features file test"
98             + "\nFailed to parse features file.",
99             featuresFile.parse(al.getDataset(), colours, true));
100
101     /*
102      * Refetch the colour map from the FeatureRenderer (to confirm it has been
103      * updated - JAL-1904), and verify (some) feature group colours
104      */
105     colours = af.getFeatureRenderer().getFeatureColours();
106     assertEquals("27 feature group colours not found", 27, colours.size());
107     assertEquals(colours.get("Cath").getColour(), new Color(0x93b1d1));
108     assertEquals(colours.get("ASX-MOTIF").getColour(), new Color(0x6addbb));
109     FeatureColourI kdColour = colours.get("kdHydrophobicity");
110     assertTrue(kdColour.isGraduatedColour());
111     assertTrue(kdColour.isAboveThreshold());
112     assertEquals(-2f, kdColour.getThreshold());
113
114     /*
115      * verify (some) features on sequences
116      */
117     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
118             .getSequenceFeatures(); // FER_CAPAA
119     SequenceFeatures.sortFeatures(sfs, true);
120     assertEquals(8, sfs.size());
121
122     /*
123      * verify (in ascending start position order)
124      */
125     SequenceFeature sf = sfs.get(0);
126     assertEquals("Pfam family%LINK%", sf.description);
127     assertEquals(0, sf.begin);
128     assertEquals(0, sf.end);
129     assertEquals("uniprot", sf.featureGroup);
130     assertEquals("Pfam", sf.type);
131     assertEquals(1, sf.links.size());
132     assertEquals("Pfam family|http://pfam.xfam.org/family/PF00111",
133             sf.links.get(0));
134
135     sf = sfs.get(1);
136     assertEquals("Ferredoxin_fold Status: True Positive ", sf.description);
137     assertEquals(3, sf.begin);
138     assertEquals(93, sf.end);
139     assertEquals("uniprot", sf.featureGroup);
140     assertEquals("Cath", sf.type);
141
142     sf = sfs.get(2);
143     assertEquals("Fer2 Status: True Positive Pfam 8_8%LINK%",
144             sf.description);
145     assertEquals("Pfam 8_8|http://pfam.xfam.org/family/PF00111",
146             sf.links.get(0));
147     assertEquals(8, sf.begin);
148     assertEquals(83, sf.end);
149     assertEquals("uniprot", sf.featureGroup);
150     assertEquals("Pfam", sf.type);
151
152     sf = sfs.get(3);
153     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
154     assertEquals(39, sf.begin);
155     assertEquals(39, sf.end);
156     assertEquals("uniprot", sf.featureGroup);
157     assertEquals("METAL", sf.type);
158
159     sf = sfs.get(4);
160     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
161     assertEquals(44, sf.begin);
162     assertEquals(44, sf.end);
163     assertEquals("uniprot", sf.featureGroup);
164     assertEquals("METAL", sf.type);
165
166     sf = sfs.get(5);
167     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
168     assertEquals(47, sf.begin);
169     assertEquals(47, sf.end);
170     assertEquals("uniprot", sf.featureGroup);
171     assertEquals("METAL", sf.type);
172
173     sf = sfs.get(6);
174     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
175     assertEquals(77, sf.begin);
176     assertEquals(77, sf.end);
177     assertEquals("uniprot", sf.featureGroup);
178     assertEquals("METAL", sf.type);
179
180     sf = sfs.get(7);
181     assertEquals(
182             "High confidence server. Only hits with scores over 0.8 are reported. PHOSPHORYLATION (T) 89_8%LINK%",
183             sf.description);
184     assertEquals(
185             "PHOSPHORYLATION (T) 89_8|http://www.cbs.dtu.dk/cgi-bin/proview/webface-link?seqid=P83527&amp;service=NetPhos-2.0",
186             sf.links.get(0));
187     assertEquals(89, sf.begin);
188     assertEquals(89, sf.end);
189     assertEquals("netphos", sf.featureGroup);
190     assertEquals("PHOSPHORYLATION (T)", sf.type);
191   }
192
193   /**
194    * Test parsing a features file with a mix of Jalview and GFF formatted
195    * content
196    * 
197    * @throws Exception
198    */
199   @Test(groups = { "Functional" })
200   public void testParse_mixedJalviewGff() throws Exception
201   {
202     File f = new File("examples/uniref50.fa");
203     AlignmentI al = readAlignmentFile(f);
204     AlignFrame af = new AlignFrame(al, 500, 500);
205     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
206             .getFeatureColours();
207     // GFF2 uses space as name/value separator in column 9
208     String gffData = "METAL\tcc9900\n"
209             + "GFF\n"
210             + "FER_CAPAA\tuniprot\tMETAL\t44\t45\t4.0\t.\t.\tNote Iron-sulfur; Note 2Fe-2S\n"
211             + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t2.0\t.\t.";
212     FeaturesFile featuresFile = new FeaturesFile(gffData,
213             DataSourceType.PASTE);
214     assertTrue("Failed to parse features file",
215             featuresFile.parse(al.getDataset(), colours, true));
216
217     // verify colours read or synthesized
218     colours = af.getFeatureRenderer().getFeatureColours();
219     assertEquals("1 feature group colours not found", 1, colours.size());
220     assertEquals(colours.get("METAL").getColour(), new Color(0xcc9900));
221
222     // verify feature on FER_CAPAA
223     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
224             .getSequenceFeatures();
225     assertEquals(1, sfs.size());
226     SequenceFeature sf = sfs.get(0);
227     assertEquals("Iron-sulfur,2Fe-2S", sf.description);
228     assertEquals(44, sf.begin);
229     assertEquals(45, sf.end);
230     assertEquals("uniprot", sf.featureGroup);
231     assertEquals("METAL", sf.type);
232     assertEquals(4f, sf.getScore(), 0.001f);
233
234     // verify feature on FER1_SOLLC
235     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
236     assertEquals(1, sfs.size());
237     sf = sfs.get(0);
238     assertEquals("uniprot", sf.description);
239     assertEquals(55, sf.begin);
240     assertEquals(130, sf.end);
241     assertEquals("uniprot", sf.featureGroup);
242     assertEquals("Pfam", sf.type);
243     assertEquals(2f, sf.getScore(), 0.001f);
244   }
245
246   public static AlignmentI readAlignmentFile(File f) throws IOException
247   {
248     System.out.println("Reading file: " + f);
249     String ff = f.getPath();
250     FormatAdapter rf = new FormatAdapter();
251
252     AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
253             new IdentifyFile().identify(ff, DataSourceType.FILE));
254
255     al.setDataset(null); // creates dataset sequences
256     assertNotNull("Couldn't read supplied alignment data.", al);
257     return al;
258   }
259
260   /**
261    * Test parsing a features file with GFF formatted content only
262    * 
263    * @throws Exception
264    */
265   @Test(groups = { "Functional" })
266   public void testParse_pureGff3() throws Exception
267   {
268     File f = new File("examples/uniref50.fa");
269     AlignmentI al = readAlignmentFile(f);
270     AlignFrame af = new AlignFrame(al, 500, 500);
271     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
272             .getFeatureColours();
273     // GFF3 uses '=' separator for name/value pairs in colum 9
274     String gffData = "##gff-version 3\n"
275             + "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
276             + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n"
277             + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
278     FeaturesFile featuresFile = new FeaturesFile(gffData,
279             DataSourceType.PASTE);
280     assertTrue("Failed to parse features file",
281             featuresFile.parse(al.getDataset(), colours, true));
282
283     // verify feature on FER_CAPAA
284     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
285             .getSequenceFeatures();
286     assertEquals(1, sfs.size());
287     SequenceFeature sf = sfs.get(0);
288     // description parsed from Note attribute
289     assertEquals("Iron-sulfur (2Fe-2S),another note", sf.description);
290     assertEquals(39, sf.begin);
291     assertEquals(39, sf.end);
292     assertEquals("uniprot", sf.featureGroup);
293     assertEquals("METAL", sf.type);
294     assertEquals(
295             "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465",
296             sf.getValue("ATTRIBUTES"));
297
298     // verify feature on FER1_SOLLC1
299     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
300     assertEquals(1, sfs.size());
301     sf = sfs.get(0);
302     // ID used for description if available
303     assertEquals("$23", sf.description);
304     assertEquals(55, sf.begin);
305     assertEquals(130, sf.end);
306     assertEquals("uniprot", sf.featureGroup);
307     assertEquals("Pfam", sf.type);
308     assertEquals(3f, sf.getScore(), 0.001f);
309   }
310
311   /**
312    * Test parsing a features file with Jalview format features (but no colour
313    * descriptors or startgroup to give the hint not to parse as GFF)
314    * 
315    * @throws Exception
316    */
317   @Test(groups = { "Functional" })
318   public void testParse_jalviewFeaturesOnly() throws Exception
319   {
320     File f = new File("examples/uniref50.fa");
321     AlignmentI al = readAlignmentFile(f);
322     AlignFrame af = new AlignFrame(al, 500, 500);
323     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
324             .getFeatureColours();
325
326     /*
327      * one feature on FER_CAPAA and one on sequence 3 (index 2) FER1_SOLLC
328      */
329     String featureData = "Iron-sulfur (2Fe-2S)\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
330             + "Iron-phosphorus (2Fe-P)\tID_NOT_SPECIFIED\t2\t86\t87\tMETALLIC\n";
331     FeaturesFile featuresFile = new FeaturesFile(featureData,
332             DataSourceType.PASTE);
333     assertTrue("Failed to parse features file",
334             featuresFile.parse(al.getDataset(), colours, true));
335
336     // verify FER_CAPAA feature
337     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
338             .getSequenceFeatures();
339     assertEquals(1, sfs.size());
340     SequenceFeature sf = sfs.get(0);
341     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
342     assertEquals(39, sf.begin);
343     assertEquals(39, sf.end);
344     assertEquals("METAL", sf.type);
345
346     // verify FER1_SOLLC feature
347     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
348     assertEquals(1, sfs.size());
349     sf = sfs.get(0);
350     assertEquals("Iron-phosphorus (2Fe-P)", sf.description);
351     assertEquals(86, sf.begin);
352     assertEquals(87, sf.end);
353     assertEquals("METALLIC", sf.type);
354   }
355
356   private void checkDatasetfromSimpleGff3(AlignmentI dataset)
357   {
358     assertEquals("no sequences extracted from GFF3 file", 2,
359             dataset.getHeight());
360
361     SequenceI seq1 = dataset.findName("seq1");
362     SequenceI seq2 = dataset.findName("seq2");
363     assertNotNull(seq1);
364     assertNotNull(seq2);
365     assertFalse(
366             "Failed to replace dummy seq1 with real sequence",
367             seq1 instanceof SequenceDummy
368                     && ((SequenceDummy) seq1).isDummy());
369     assertFalse(
370             "Failed to replace dummy seq2 with real sequence",
371             seq2 instanceof SequenceDummy
372                     && ((SequenceDummy) seq2).isDummy());
373     String placeholderseq = new SequenceDummy("foo").getSequenceAsString();
374     assertFalse("dummy replacement buggy for seq1",
375             placeholderseq.equals(seq1.getSequenceAsString()));
376     assertFalse("dummy replacement buggy for seq2",
377             placeholderseq.equals(seq2.getSequenceAsString()));
378     assertNotNull("No features added to seq1", seq1.getSequenceFeatures());
379     assertEquals("Wrong number of features", 3, seq1.getSequenceFeatures()
380             .size());
381     assertTrue(seq2.getSequenceFeatures().isEmpty());
382     assertEquals(
383             "Wrong number of features",
384             0,
385             seq2.getSequenceFeatures() == null ? 0 : seq2
386                     .getSequenceFeatures().size());
387     assertTrue(
388             "Expected at least one CDNA/Protein mapping for seq1",
389             dataset.getCodonFrame(seq1) != null
390                     && dataset.getCodonFrame(seq1).size() > 0);
391
392   }
393
394   @Test(groups = { "Functional" })
395   public void readGff3File() throws IOException
396   {
397     FeaturesFile gffreader = new FeaturesFile(true, simpleGffFile,
398             DataSourceType.FILE);
399     Alignment dataset = new Alignment(gffreader.getSeqsAsArray());
400     gffreader.addProperties(dataset);
401     checkDatasetfromSimpleGff3(dataset);
402   }
403
404   @Test(groups = { "Functional" })
405   public void simpleGff3FileClass() throws IOException
406   {
407     AlignmentI dataset = new Alignment(new SequenceI[] {});
408     FeaturesFile ffile = new FeaturesFile(simpleGffFile,
409             DataSourceType.FILE);
410   
411     boolean parseResult = ffile.parse(dataset, null, false, false);
412     assertTrue("return result should be true", parseResult);
413     checkDatasetfromSimpleGff3(dataset);
414   }
415
416   @Test(groups = { "Functional" })
417   public void simpleGff3FileLoader() throws IOException
418   {
419     AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded(
420             simpleGffFile, DataSourceType.FILE);
421     assertTrue(
422             "Didn't read the alignment into an alignframe from Gff3 File",
423             af != null);
424     checkDatasetfromSimpleGff3(af.getViewport().getAlignment());
425   }
426
427   @Test(groups = { "Functional" })
428   public void simpleGff3RelaxedIdMatching() throws IOException
429   {
430     AlignmentI dataset = new Alignment(new SequenceI[] {});
431     FeaturesFile ffile = new FeaturesFile(simpleGffFile,
432             DataSourceType.FILE);
433   
434     boolean parseResult = ffile.parse(dataset, null, false, true);
435     assertTrue("return result (relaxedID matching) should be true",
436             parseResult);
437     checkDatasetfromSimpleGff3(dataset);
438   }
439
440   @Test(groups = { "Functional" })
441   public void testPrintJalviewFormat() throws Exception
442   {
443     File f = new File("examples/uniref50.fa");
444     AlignmentI al = readAlignmentFile(f);
445     AlignFrame af = new AlignFrame(al, 500, 500);
446     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
447             .getFeatureColours();
448     String features = "METAL\tcc9900\n"
449             + "GAMMA-TURN\tred|0,255,255|20.0|95.0|below|66.0\n"
450             + "Pfam\tred\n"
451             + "STARTGROUP\tuniprot\n"
452             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\n" // non-positional feature
453             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
454             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\n"
455             + "<html>Pfam domain<a href=\"http://pfam.xfam.org/family/PF00111\">Pfam_3_4</a></html>\tFER_CAPAA\t-1\t20\t20\tPfam\n"
456             + "ENDGROUP\tuniprot\n";
457     FeaturesFile featuresFile = new FeaturesFile(features,
458             DataSourceType.PASTE);
459     featuresFile.parse(al.getDataset(), colours, false);
460
461     /*
462      * add positional and non-positional features with null and
463      * empty feature group to check handled correctly
464      */
465     SequenceI seq = al.getSequenceAt(1); // FER_CAPAN
466     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc1", 0, 0, 1.3f,
467             null));
468     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc2", 4, 9,
469             Float.NaN, null));
470     seq = al.getSequenceAt(2); // FER1_SOLLC
471     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc3", 0, 0,
472             Float.NaN, ""));
473     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc4", 5, 8,
474             -2.6f, ""));
475
476     /*
477      * first with no features displayed, exclude non-positional features
478      */
479     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
480     String exported = featuresFile
481             .printJalviewFormat(al.getSequencesArray(), fr, false, false);
482     String expected = "No Features Visible";
483     assertEquals(expected, exported);
484
485     /*
486      * include non-positional features, but still no positional features
487      */
488     fr.setGroupVisibility("uniprot", true);
489     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
490             true, false);
491     expected = "\nSTARTGROUP\tuniprot\n"
492             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
493             + "ENDGROUP\tuniprot\n\n"
494             + "desc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n\n"
495             + "desc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n"; // NaN is not output
496     assertEquals(expected, exported);
497
498     /*
499      * set METAL (in uniprot group) and GAMMA-TURN visible, but not Pfam
500      */
501     fr.setVisible("METAL");
502     fr.setVisible("GAMMA-TURN");
503     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
504             false, false);
505     expected = "METAL\tcc9900\n"
506             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
507             + "\nSTARTGROUP\tuniprot\n"
508             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\t0.0\n"
509             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\t0.0\n"
510             + "ENDGROUP\tuniprot\n";
511     assertEquals(expected, exported);
512
513     /*
514      * now set Pfam visible
515      */
516     fr.setVisible("Pfam");
517     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
518             false, false);
519     /*
520      * features are output within group, ordered by sequence and type
521      */
522     expected = "METAL\tcc9900\n"
523             + "Pfam\tff0000\n"
524             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
525             + "\nSTARTGROUP\tuniprot\n"
526             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\t0.0\n"
527             + "<html>Pfam domain<a href=\"http://pfam.xfam.org/family/PF00111\">Pfam_3_4</a></html>\tFER_CAPAA\t-1\t20\t20\tPfam\t0.0\n"
528             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\t0.0\n"
529             + "ENDGROUP\tuniprot\n"
530             // null / empty group features are output after named groups
531             + "\ndesc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
532             + "\ndesc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
533     assertEquals(expected, exported);
534
535     /*
536      * hide uniprot group
537      */
538     fr.setGroupVisibility("uniprot", false);
539     expected = "METAL\tcc9900\n" + "Pfam\tff0000\n"
540             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
541             + "\ndesc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
542             + "\ndesc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
543     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
544             false, false);
545     assertEquals(expected, exported);
546
547     /*
548      * include non-positional (overrides group not shown)
549      */
550     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
551             true, false);
552     expected = "METAL\tcc9900\n" + "Pfam\tff0000\n"
553             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
554             + "\nSTARTGROUP\tuniprot\n"
555             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
556             + "ENDGROUP\tuniprot\n"
557             + "\ndesc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n"
558             + "desc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
559             + "\ndesc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n"
560             + "desc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
561     assertEquals(expected, exported);
562   }
563
564   @Test(groups = { "Functional" })
565   public void testPrintGffFormat() throws Exception
566   {
567     File f = new File("examples/uniref50.fa");
568     AlignmentI al = readAlignmentFile(f);
569     AlignFrame af = new AlignFrame(al, 500, 500);
570
571     /*
572      * no features
573      */
574     FeaturesFile featuresFile = new FeaturesFile();
575     FeatureRendererModel fr = (FeatureRendererModel) af.alignPanel
576             .getFeatureRenderer();
577     String exported = featuresFile.printGffFormat(al.getSequencesArray(),
578             fr, false, false);
579     String gffHeader = "##gff-version 2\n";
580     assertEquals(gffHeader, exported);
581     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
582             true, false);
583     assertEquals(gffHeader, exported);
584
585     /*
586      * add some features
587      */
588     al.getSequenceAt(0).addSequenceFeature(
589             new SequenceFeature("Domain", "Cath", 0, 0, 0f, "Uniprot"));
590     al.getSequenceAt(0).addSequenceFeature(
591             new SequenceFeature("METAL", "Cath", 39, 39, 1.2f, null));
592     al.getSequenceAt(1)
593             .addSequenceFeature(
594                     new SequenceFeature("GAMMA-TURN", "Turn", 36, 38, 2.1f,
595                             "s3dm"));
596     SequenceFeature sf = new SequenceFeature("Pfam", "", 20, 20, 0f,
597             "Uniprot");
598     sf.setAttributes("x=y;black=white");
599     sf.setStrand("+");
600     sf.setPhase("2");
601     al.getSequenceAt(1).addSequenceFeature(sf);
602
603     /*
604      * 'discover' features then hide all feature types
605      */
606     fr.findAllFeatures(true);
607     FeatureSettingsBean[] data = new FeatureSettingsBean[4];
608     FeatureColourI fc = new FeatureColour(Color.PINK);
609     data[0] = new FeatureSettingsBean("Domain", fc, null, false);
610     data[1] = new FeatureSettingsBean("METAL", fc, null, false);
611     data[2] = new FeatureSettingsBean("GAMMA-TURN", fc, null, false);
612     data[3] = new FeatureSettingsBean("Pfam", fc, null, false);
613     fr.setFeaturePriority(data);
614
615     /*
616      * with no features displayed, exclude non-positional features
617      */
618     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
619             false, false);
620     assertEquals(gffHeader, exported);
621
622     /*
623      * include non-positional features
624      */
625     fr.setGroupVisibility("Uniprot", true);
626     fr.setGroupVisibility("s3dm", false);
627     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
628             true, false);
629     String expected = gffHeader
630             + "FER_CAPAA\tUniprot\tDomain\t0\t0\t0.0\t.\t.\n";
631     assertEquals(expected, exported);
632
633     /*
634      * set METAL (in uniprot group) and GAMMA-TURN visible, but not Pfam
635      * only Uniprot group visible here...
636      */
637     fr.setVisible("METAL");
638     fr.setVisible("GAMMA-TURN");
639     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
640             false, false);
641     // METAL feature has null group: description used for column 2
642     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
643     assertEquals(expected, exported);
644
645     /*
646      * set s3dm group visible
647      */
648     fr.setGroupVisibility("s3dm", true);
649     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
650             false, false);
651     // METAL feature has null group: description used for column 2
652     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
653             + "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
654     assertEquals(expected, exported);
655
656     /*
657      * now set Pfam visible
658      */
659     fr.setVisible("Pfam");
660     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
661             false, false);
662     // Pfam feature columns include strand(+), phase(2), attributes
663     expected = gffHeader
664             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
665             + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n"
666             + "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
667     assertEquals(expected, exported);
668   }
669
670   /**
671    * Test for parsing of feature filters as represented in a Jalview features
672    * file
673    * 
674    * @throws Exception
675    */
676   @Test(groups = { "Functional" })
677   public void testParseFilters() throws Exception
678   {
679     Map<String, FeatureMatcherSetI> filters = new HashMap<>();
680     String text = "sequence_variant\tCSQ:PolyPhen NotContains 'damaging'\n"
681             + "missense_variant\t(label contains foobar) and (Score lt 1.3)";
682     FeaturesFile featuresFile = new FeaturesFile(text,
683             DataSourceType.PASTE);
684     featuresFile.parseFilters(filters);
685     assertEquals(filters.size(), 2);
686
687     FeatureMatcherSetI fm = filters.get("sequence_variant");
688     assertNotNull(fm);
689     Iterator<FeatureMatcherI> matchers = fm.getMatchers().iterator();
690     FeatureMatcherI matcher = matchers.next();
691     assertFalse(matchers.hasNext());
692     String[] attributes = matcher.getAttribute();
693     assertArrayEquals(attributes, new String[] { "CSQ", "PolyPhen" });
694     assertSame(matcher.getMatcher().getCondition(), Condition.NotContains);
695     assertEquals(matcher.getMatcher().getPattern(), "damaging");
696
697     fm = filters.get("missense_variant");
698     assertNotNull(fm);
699     matchers = fm.getMatchers().iterator();
700     matcher = matchers.next();
701     assertTrue(matcher.isByLabel());
702     assertSame(matcher.getMatcher().getCondition(), Condition.Contains);
703     assertEquals(matcher.getMatcher().getPattern(), "foobar");
704     matcher = matchers.next();
705     assertTrue(matcher.isByScore());
706     assertSame(matcher.getMatcher().getCondition(), Condition.LT);
707     assertEquals(matcher.getMatcher().getPattern(), "1.3");
708     assertEquals(matcher.getMatcher().getFloatValue(), 1.3f);
709
710     assertFalse(matchers.hasNext());
711   }
712
713   @Test(groups = { "Functional" })
714   public void testOutputFeatureFilters()
715   {
716     FeaturesFile ff = new FeaturesFile();
717     StringBuilder sb = new StringBuilder();
718     Map<String, FeatureColourI> visible = new HashMap<>();
719     visible.put("pfam", new FeatureColour(Color.red));
720     Map<String, FeatureMatcherSetI> featureFilters = new HashMap<>();
721
722     // with no filters, nothing is output
723     ff.outputFeatureFilters(sb, visible, featureFilters);
724     assertEquals("", sb.toString());
725
726     // with filter for not visible features only, nothing is output
727     FeatureMatcherSet filter = new FeatureMatcherSet();
728     filter.and(FeatureMatcher.byLabel(Condition.Present, null));
729     featureFilters.put("foobar", filter);
730     ff.outputFeatureFilters(sb, visible, featureFilters);
731     assertEquals("", sb.toString());
732
733     // with filters for visible feature types
734     FeatureMatcherSet filter2 = new FeatureMatcherSet();
735     filter2.and(FeatureMatcher.byAttribute(Condition.Present, null, "CSQ",
736             "PolyPhen"));
737     filter2.and(FeatureMatcher.byScore(Condition.LE, "-2.4"));
738     featureFilters.put("pfam", filter2);
739     visible.put("foobar", new FeatureColour(Color.blue));
740     ff.outputFeatureFilters(sb, visible, featureFilters);
741     String expected = "\nSTARTFILTERS\nfoobar\tLabel Present\npfam\t(CSQ:PolyPhen Present) AND (Score LE -2.4)\nENDFILTERS\n";
742     assertEquals(expected, sb.toString());
743   }
744
745   /**
746    * Output as GFF should not include features which are not visible due to
747    * colour threshold or feature filter settings
748    * 
749    * @throws Exception
750    */
751   @Test(groups = { "Functional" })
752   public void testPrintGffFormat_withFilters() throws Exception
753   {
754     File f = new File("examples/uniref50.fa");
755     AlignmentI al = readAlignmentFile(f);
756     AlignFrame af = new AlignFrame(al, 500, 500);
757     SequenceFeature sf1 = new SequenceFeature("METAL", "Cath", 39, 39, 1.2f,
758             null);
759     sf1.setValue("clin_sig", "Likely Pathogenic");
760     sf1.setValue("AF", "24");
761     al.getSequenceAt(0).addSequenceFeature(sf1);
762     SequenceFeature sf2 = new SequenceFeature("METAL", "Cath", 41, 41, 0.6f,
763             null);
764     sf2.setValue("clin_sig", "Benign");
765     sf2.setValue("AF", "46");
766     al.getSequenceAt(0).addSequenceFeature(sf2);
767   
768     FeaturesFile featuresFile = new FeaturesFile();
769     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
770     final String gffHeader = "##gff-version 2\n";
771
772     fr.setVisible("METAL");
773     fr.setColour("METAL", new FeatureColour(Color.PINK));
774     String exported = featuresFile.printGffFormat(al.getSequencesArray(),
775             fr, false, false);
776     String expected = gffHeader
777             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
778             + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
779     assertEquals(expected, exported);
780
781     /*
782      * now threshold to Score > 1.1 - should exclude sf2
783      */
784     FeatureColourI fc = new FeatureColour(null, Color.white, Color.BLACK,
785             Color.white, 0f, 2f);
786     fc.setAboveThreshold(true);
787     fc.setThreshold(1.1f);
788     fr.setColour("METAL", fc);
789     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
790             false, false);
791     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
792     assertEquals(expected, exported);
793
794     /*
795      * remove threshold and check sf2 is exported
796      */
797     fc.setAboveThreshold(false);
798     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
799             false, false);
800     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
801             + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
802     assertEquals(expected, exported);
803
804     /*
805      * filter on (clin_sig contains Benign) - should include sf2 and exclude sf1
806      */
807     FeatureMatcherSetI filter = new FeatureMatcherSet();
808     filter.and(FeatureMatcher.byAttribute(Condition.Contains, "benign",
809             "clin_sig"));
810     fr.setFeatureFilter("METAL", filter);
811     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
812             false, false);
813     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
814     assertEquals(expected, exported);
815   }
816
817   /**
818    * Output as Jalview should not include features which are not visible due to
819    * colour threshold or feature filter settings
820    * 
821    * @throws Exception
822    */
823   @Test(groups = { "Functional" })
824   public void testPrintJalviewFormat_withFilters() throws Exception
825   {
826     File f = new File("examples/uniref50.fa");
827     AlignmentI al = readAlignmentFile(f);
828     AlignFrame af = new AlignFrame(al, 500, 500);
829     SequenceFeature sf1 = new SequenceFeature("METAL", "Cath", 39, 39, 1.2f,
830             "grp1");
831     sf1.setValue("clin_sig", "Likely Pathogenic");
832     sf1.setValue("AF", "24");
833     al.getSequenceAt(0).addSequenceFeature(sf1);
834     SequenceFeature sf2 = new SequenceFeature("METAL", "Cath", 41, 41, 0.6f,
835             "grp2");
836     sf2.setValue("clin_sig", "Benign");
837     sf2.setValue("AF", "46");
838     al.getSequenceAt(0).addSequenceFeature(sf2);
839   
840     FeaturesFile featuresFile = new FeaturesFile();
841     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
842     fr.findAllFeatures(true);
843   
844     fr.setVisible("METAL");
845     fr.setColour("METAL", new FeatureColour(Color.PINK));
846     String exported = featuresFile.printJalviewFormat(
847             al.getSequencesArray(),
848             fr, false, false);
849     String expected = "METAL\tffafaf\n\nSTARTGROUP\tgrp1\n"
850             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
851             + "ENDGROUP\tgrp1\n\nSTARTGROUP\tgrp2\n"
852             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
853             + "ENDGROUP\tgrp2\n";
854     assertEquals(expected, exported);
855   
856     /*
857      * now threshold to Score > 1.1 - should exclude sf2
858      * (and there should be no empty STARTGROUP/ENDGROUP output)
859      */
860     FeatureColourI fc = new FeatureColour(null, Color.white, Color.BLACK,
861             Color.white, 0f, 2f);
862     fc.setAboveThreshold(true);
863     fc.setThreshold(1.1f);
864     fr.setColour("METAL", fc);
865     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
866             false, false);
867     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|above|1.1\n\n"
868             + "STARTGROUP\tgrp1\n"
869             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
870             + "ENDGROUP\tgrp1\n";
871     assertEquals(expected, exported);
872   
873     /*
874      * remove threshold and check sf2 is exported
875      */
876     fc.setAboveThreshold(false);
877     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
878             false, false);
879     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|none\n\n"
880             + "STARTGROUP\tgrp1\n"
881             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
882             + "ENDGROUP\tgrp1\n\nSTARTGROUP\tgrp2\n"
883             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
884             + "ENDGROUP\tgrp2\n";
885     assertEquals(expected, exported);
886   
887     /*
888      * filter on (clin_sig contains Benign) - should include sf2 and exclude sf1
889      */
890     FeatureMatcherSetI filter = new FeatureMatcherSet();
891     filter.and(FeatureMatcher.byAttribute(Condition.Contains, "benign",
892             "clin_sig"));
893     fr.setFeatureFilter("METAL", filter);
894     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
895             false, false);
896     expected = "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
897     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|none\n\n"
898             + "STARTFILTERS\nMETAL\tclin_sig Contains benign\nENDFILTERS\n\n"
899             + "STARTGROUP\tgrp2\n"
900             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
901             + "ENDGROUP\tgrp2\n";
902     assertEquals(expected, exported);
903   }
904
905   /**
906    * Test parsing a features file with Jalview format features, including
907    * STARTMOTIFS/ENDMOTIFS lines with motifs to be matched to create features
908    * 
909    * @throws Exception
910    */
911   @Test(groups = { "Functional" })
912   public void testParse_jalviewFeaturesWithMotifs() throws IOException
913   {
914     File f = new File("examples/uniref50.fa");
915     AlignmentI al = readAlignmentFile(f);
916     AlignFrame af = new AlignFrame(al, 500, 500);
917     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
918             .getFeatureColours();
919
920     /*
921      * hide columns with YKV motif; these should not get
922      * matched by the Finder
923      */
924     al.getHiddenColumns().hideColumns(62, 64);
925
926     // @formatter:off
927     String featureData = 
928             /*
929              * features in the null grup
930              */
931             "HELIX" + TAB + "blue\n" +
932             "MOTIF1" + TAB + "green\n" +
933             "MOTIF2" + TAB + "250,200,150|100,50,0|-3.9|4.5|above|-2.0\n" +
934             "adescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "42" + TAB + "45" + TAB + "HELIX\n" +
935             "STARTMOTIFS\n" +
936             "FLP" + TAB + "MOTIF1" + TAB + "flxMotifP\n" +
937             "F[LR]N" + TAB + "MOTIF1" + TAB + "flxMotifN\n" +
938             "fld" + TAB + "MOTIF1" + TAB + "flxMotifD\n" +
939             "YKV" + TAB + "MOTIF1" + TAB + "ykvMotif\n" +
940             "ENDMOTIFS\n" +
941             /*
942              * features in group uniprot
943              */
944             "STARTGROUP" + TAB + "uniprot\n" +
945             "bdescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "47" + TAB + "48" + TAB + "HELIX\n" +
946             "STARTMOTIFS\n" +
947             "FLG" + TAB + "MOTIF1" + TAB + "flxMotifG\n" +
948             "VTT" + TAB + "MOTIF2" + TAB + "vxtMotifT" + TAB + "-3.21\n" +
949             "VRT" + TAB + "MOTIF2" + TAB + "vxtMotifR\n" +
950             "ENDMOTIFS\n" +
951             "ENDGROUP"; 
952             // @formatter:on
953     FeaturesFile featuresFile = new FeaturesFile(featureData,
954             DataSourceType.PASTE);
955     assertTrue("Failed to parse features file",
956             featuresFile.parse(al, colours, true));
957
958     // verify HELIX features were parsed as normal
959     List<SequenceFeature> sfs = al.getSequenceAt(1).findFeatures(0, 999,
960             "HELIX");
961     assertEquals(2, sfs.size());
962     SequenceFeature sf = sfs.get(0);
963     assertNull(sf.getFeatureGroup());
964     assertEquals(42, sf.getBegin());
965     assertEquals(45, sf.getEnd());
966     assertEquals("adescription", sf.getDescription());
967     sf = sfs.get(1);
968     assertEquals("uniprot", sf.getFeatureGroup());
969     assertEquals(47, sf.getBegin());
970     assertEquals(48, sf.getEnd());
971     assertEquals("bdescription", sf.getDescription());
972
973     /*
974      * feature type MOTIF1
975      * FLP motif should match FER1_SOLLC/13-15 and Q93XJ9_SOLTU/13-15
976      * F[LR]N should match O80429_MAIZE/107-109
977      * fld should match nothing (as case sensitive)
978      * feature group should be null for the above
979      * FLG should match FER1_PEA/36-38, feature group uniprot
980      * YKV should match nothing as entirely within hidden columns
981      */
982     for (SequenceI seq : al.getSequences())
983     {
984       List<SequenceFeature> features = seq.findFeatures(0, 9999, "MOTIF1");
985       String name = seq.getName();
986       if (name.equals("FER1_SOLLC") || name.equals("Q93XJ9_SOLTU"))
987       {
988         assertEquals(1, features.size());
989         sf = features.get(0);
990         assertNull(sf.getFeatureGroup());
991         assertEquals(13, sf.getBegin());
992         assertEquals(15, sf.getEnd());
993         assertEquals("flxMotifP", sf.getDescription());
994       }
995       else if (name.equals("O80429_MAIZE"))
996       {
997         assertEquals(1, features.size());
998         sf = features.get(0);
999         assertNull(sf.getFeatureGroup());
1000         assertEquals(107, sf.getBegin());
1001         assertEquals(109, sf.getEnd());
1002         assertEquals("flxMotifN", sf.getDescription());
1003       }
1004       else if (name.equals("FER1_PEA"))
1005       {
1006         assertEquals(1, features.size());
1007         sf = features.get(0);
1008         assertEquals("uniprot", sf.getFeatureGroup());
1009         assertEquals(36, sf.getBegin());
1010         assertEquals(38, sf.getEnd());
1011         assertEquals("flxMotifG", sf.getDescription());
1012       }
1013       else
1014       {
1015         assertTrue("MOTIF1 features found for " + name, features.isEmpty());
1016       }
1017     }
1018
1019     /*
1020      * feature type MOTIF2
1021      * VTT motif should match FER1_PEA/26-28
1022      * VRT should match nothing
1023      */
1024     for (SequenceI seq : al.getSequences())
1025     {
1026       List<SequenceFeature> features = seq.findFeatures(0, 9999, "MOTIF2");
1027       String name = seq.getName();
1028       if (name.equals("FER1_PEA"))
1029       {
1030         assertEquals(1, features.size());
1031         sf = features.get(0);
1032         assertEquals("uniprot", sf.getFeatureGroup());
1033         assertEquals(26, sf.getBegin());
1034         assertEquals(28, sf.getEnd());
1035         assertEquals("vxtMotifT", sf.getDescription());
1036         assertEquals(-3.21f, sf.getScore());
1037       }
1038       else
1039       {
1040         assertTrue("MOTIF2 features found for " + name, features.isEmpty());
1041         assertTrue(features.isEmpty());
1042       }
1043     }
1044   }
1045 }