JAL-3121 'attributes map' in GFF3 without special 'jvmap_' token
[jalview.git] / test / jalview / io / FeaturesFileTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.io;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertSame;
27 import static org.testng.AssertJUnit.assertTrue;
28 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
29
30 import jalview.api.FeatureColourI;
31 import jalview.api.FeatureRenderer;
32 import jalview.datamodel.Alignment;
33 import jalview.datamodel.AlignmentI;
34 import jalview.datamodel.SequenceDummy;
35 import jalview.datamodel.SequenceFeature;
36 import jalview.datamodel.SequenceI;
37 import jalview.datamodel.features.FeatureMatcher;
38 import jalview.datamodel.features.FeatureMatcherI;
39 import jalview.datamodel.features.FeatureMatcherSet;
40 import jalview.datamodel.features.FeatureMatcherSetI;
41 import jalview.datamodel.features.SequenceFeatures;
42 import jalview.gui.AlignFrame;
43 import jalview.gui.Desktop;
44 import jalview.gui.JvOptionPane;
45 import jalview.schemes.FeatureColour;
46 import jalview.structure.StructureSelectionManager;
47 import jalview.util.matcher.Condition;
48 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
49 import jalview.viewmodel.seqfeatures.FeatureRendererModel.FeatureSettingsBean;
50
51 import java.awt.Color;
52 import java.io.File;
53 import java.io.IOException;
54 import java.util.HashMap;
55 import java.util.Iterator;
56 import java.util.List;
57 import java.util.Map;
58
59 import org.testng.annotations.AfterClass;
60 import org.testng.annotations.BeforeClass;
61 import org.testng.annotations.Test;
62
63 public class FeaturesFileTest
64 {
65   private static String simpleGffFile = "examples/testdata/simpleGff3.gff";
66
67   @AfterClass(alwaysRun = true)
68   public void tearDownAfterClass()
69   {
70     /*
71      * remove any sequence mappings created so they don't pollute other tests
72      */
73     StructureSelectionManager ssm = StructureSelectionManager
74             .getStructureSelectionManager(Desktop.instance);
75     ssm.resetAll();
76   }
77
78   @BeforeClass(alwaysRun = true)
79   public void setUpJvOptionPane()
80   {
81     JvOptionPane.setInteractiveMode(false);
82     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
83   }
84
85   @Test(groups = { "Functional" })
86   public void testParse() throws Exception
87   {
88     File f = new File("examples/uniref50.fa");
89     AlignmentI al = readAlignmentFile(f);
90     AlignFrame af = new AlignFrame(al, 500, 500);
91     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
92             .getFeatureColours();
93     FeaturesFile featuresFile = new FeaturesFile(
94             "examples/exampleFeatures.txt", DataSourceType.FILE);
95     assertTrue("Test " + "Features file test"
96             + "\nFailed to parse features file.",
97             featuresFile.parse(al.getDataset(), colours, true));
98
99     /*
100      * Refetch the colour map from the FeatureRenderer (to confirm it has been
101      * updated - JAL-1904), and verify (some) feature group colours
102      */
103     colours = af.getFeatureRenderer().getFeatureColours();
104     assertEquals("27 feature group colours not found", 27, colours.size());
105     assertEquals(colours.get("Cath").getColour(), new Color(0x93b1d1));
106     assertEquals(colours.get("ASX-MOTIF").getColour(), new Color(0x6addbb));
107     FeatureColourI kdColour = colours.get("kdHydrophobicity");
108     assertTrue(kdColour.isGraduatedColour());
109     assertTrue(kdColour.isAboveThreshold());
110     assertEquals(-2f, kdColour.getThreshold());
111
112     /*
113      * verify (some) features on sequences
114      */
115     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
116             .getSequenceFeatures(); // FER_CAPAA
117     SequenceFeatures.sortFeatures(sfs, true);
118     assertEquals(8, sfs.size());
119
120     /*
121      * verify (in ascending start position order)
122      */
123     SequenceFeature sf = sfs.get(0);
124     assertEquals("Pfam family%LINK%", sf.description);
125     assertEquals(0, sf.begin);
126     assertEquals(0, sf.end);
127     assertEquals("uniprot", sf.featureGroup);
128     assertEquals("Pfam", sf.type);
129     assertEquals(1, sf.links.size());
130     assertEquals("Pfam family|http://pfam.xfam.org/family/PF00111",
131             sf.links.get(0));
132
133     sf = sfs.get(1);
134     assertEquals("Ferredoxin_fold Status: True Positive ", sf.description);
135     assertEquals(3, sf.begin);
136     assertEquals(93, sf.end);
137     assertEquals("uniprot", sf.featureGroup);
138     assertEquals("Cath", sf.type);
139
140     sf = sfs.get(2);
141     assertEquals("Fer2 Status: True Positive Pfam 8_8%LINK%",
142             sf.description);
143     assertEquals("Pfam 8_8|http://pfam.xfam.org/family/PF00111",
144             sf.links.get(0));
145     assertEquals(8, sf.begin);
146     assertEquals(83, sf.end);
147     assertEquals("uniprot", sf.featureGroup);
148     assertEquals("Pfam", sf.type);
149
150     sf = sfs.get(3);
151     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
152     assertEquals(39, sf.begin);
153     assertEquals(39, sf.end);
154     assertEquals("uniprot", sf.featureGroup);
155     assertEquals("METAL", sf.type);
156
157     sf = sfs.get(4);
158     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
159     assertEquals(44, sf.begin);
160     assertEquals(44, sf.end);
161     assertEquals("uniprot", sf.featureGroup);
162     assertEquals("METAL", sf.type);
163
164     sf = sfs.get(5);
165     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
166     assertEquals(47, sf.begin);
167     assertEquals(47, sf.end);
168     assertEquals("uniprot", sf.featureGroup);
169     assertEquals("METAL", sf.type);
170
171     sf = sfs.get(6);
172     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
173     assertEquals(77, sf.begin);
174     assertEquals(77, sf.end);
175     assertEquals("uniprot", sf.featureGroup);
176     assertEquals("METAL", sf.type);
177
178     sf = sfs.get(7);
179     assertEquals(
180             "High confidence server. Only hits with scores over 0.8 are reported. PHOSPHORYLATION (T) 89_8%LINK%",
181             sf.description);
182     assertEquals(
183             "PHOSPHORYLATION (T) 89_8|http://www.cbs.dtu.dk/cgi-bin/proview/webface-link?seqid=P83527&amp;service=NetPhos-2.0",
184             sf.links.get(0));
185     assertEquals(89, sf.begin);
186     assertEquals(89, sf.end);
187     assertEquals("netphos", sf.featureGroup);
188     assertEquals("PHOSPHORYLATION (T)", sf.type);
189   }
190
191   /**
192    * Test parsing a features file with a mix of Jalview and GFF formatted
193    * content
194    * 
195    * @throws Exception
196    */
197   @Test(groups = { "Functional" })
198   public void testParse_mixedJalviewGff() throws Exception
199   {
200     File f = new File("examples/uniref50.fa");
201     AlignmentI al = readAlignmentFile(f);
202     AlignFrame af = new AlignFrame(al, 500, 500);
203     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
204             .getFeatureColours();
205     // GFF2 uses space as name/value separator in column 9
206     String gffData = "METAL\tcc9900\n"
207             + "GFF\n"
208             + "FER_CAPAA\tuniprot\tMETAL\t44\t45\t4.0\t.\t.\tNote Iron-sulfur; Note 2Fe-2S\n"
209             + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t2.0\t.\t.";
210     FeaturesFile featuresFile = new FeaturesFile(gffData,
211             DataSourceType.PASTE);
212     assertTrue("Failed to parse features file",
213             featuresFile.parse(al.getDataset(), colours, true));
214
215     // verify colours read or synthesized
216     colours = af.getFeatureRenderer().getFeatureColours();
217     assertEquals("1 feature group colours not found", 1, colours.size());
218     assertEquals(colours.get("METAL").getColour(), new Color(0xcc9900));
219
220     // verify feature on FER_CAPAA
221     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
222             .getSequenceFeatures();
223     assertEquals(1, sfs.size());
224     SequenceFeature sf = sfs.get(0);
225     assertEquals("Iron-sulfur,2Fe-2S", sf.description);
226     assertEquals(44, sf.begin);
227     assertEquals(45, sf.end);
228     assertEquals("uniprot", sf.featureGroup);
229     assertEquals("METAL", sf.type);
230     assertEquals(4f, sf.getScore(), 0.001f);
231
232     // verify feature on FER1_SOLLC
233     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
234     assertEquals(1, sfs.size());
235     sf = sfs.get(0);
236     assertEquals("uniprot", sf.description);
237     assertEquals(55, sf.begin);
238     assertEquals(130, sf.end);
239     assertEquals("uniprot", sf.featureGroup);
240     assertEquals("Pfam", sf.type);
241     assertEquals(2f, sf.getScore(), 0.001f);
242   }
243
244   public static AlignmentI readAlignmentFile(File f) throws IOException
245   {
246     System.out.println("Reading file: " + f);
247     String ff = f.getPath();
248     FormatAdapter rf = new FormatAdapter();
249
250     AlignmentI al = rf.readFile(ff, DataSourceType.FILE,
251             new IdentifyFile().identify(ff, DataSourceType.FILE));
252
253     al.setDataset(null); // creates dataset sequences
254     assertNotNull("Couldn't read supplied alignment data.", al);
255     return al;
256   }
257
258   /**
259    * Test parsing a features file with GFF formatted content only
260    * 
261    * @throws Exception
262    */
263   @Test(groups = { "Functional" })
264   public void testParse_pureGff3() throws Exception
265   {
266     File f = new File("examples/uniref50.fa");
267     AlignmentI al = readAlignmentFile(f);
268     AlignFrame af = new AlignFrame(al, 500, 500);
269     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
270             .getFeatureColours();
271     // GFF3 uses '=' separator for name/value pairs in column 9
272     // comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values
273     String gffData = "##gff-version 3\n"
274             + "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
275             + "Note=Iron-sulfur (2Fe-2S);Note=another note,and another;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;"
276             + "CSQ=AF=21,POLYPHEN=benign,possibly_damaging,clin_sig=Benign%3Dgood\n"
277             + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
278     FeaturesFile featuresFile = new FeaturesFile(gffData,
279             DataSourceType.PASTE);
280     assertTrue("Failed to parse features file",
281             featuresFile.parse(al.getDataset(), colours, true));
282
283     // verify feature on FER_CAPAA
284     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
285             .getSequenceFeatures();
286     assertEquals(1, sfs.size());
287     SequenceFeature sf = sfs.get(0);
288     // description parsed from Note attribute
289     assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
290             sf.description);
291     assertEquals(39, sf.begin);
292     assertEquals(39, sf.end);
293     assertEquals("uniprot", sf.featureGroup);
294     assertEquals("METAL", sf.type);
295     assertEquals(5, sf.otherDetails.size());
296     assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465", // url decoded
297             sf.getValue("evidence"));
298     assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
299             sf.getValue("Note"));
300     assertEquals("21", sf.getValueAsString("CSQ", "AF"));
301     assertEquals("benign,possibly_damaging",
302             sf.getValueAsString("CSQ", "POLYPHEN"));
303     assertEquals("Benign=good", sf.getValueAsString("CSQ", "clin_sig")); // url decoded
304     // todo change STRAND and !Phase into fields of SequenceFeature instead
305     assertEquals(".", sf.otherDetails.get("STRAND"));
306     assertEquals(0, sf.getStrand());
307     assertEquals(".", sf.getPhase());
308
309     // verify feature on FER1_SOLLC1
310     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
311     assertEquals(1, sfs.size());
312     sf = sfs.get(0);
313     // ID used for description if available
314     assertEquals("$23", sf.description);
315     assertEquals(55, sf.begin);
316     assertEquals(130, sf.end);
317     assertEquals("uniprot", sf.featureGroup);
318     assertEquals("Pfam", sf.type);
319     assertEquals(3f, sf.getScore(), 0.001f);
320   }
321
322   /**
323    * Test parsing a features file with Jalview format features (but no colour
324    * descriptors or startgroup to give the hint not to parse as GFF)
325    * 
326    * @throws Exception
327    */
328   @Test(groups = { "Functional" })
329   public void testParse_jalviewFeaturesOnly() throws Exception
330   {
331     File f = new File("examples/uniref50.fa");
332     AlignmentI al = readAlignmentFile(f);
333     AlignFrame af = new AlignFrame(al, 500, 500);
334     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
335             .getFeatureColours();
336
337     /*
338      * one feature on FER_CAPAA and one on sequence 3 (index 2) FER1_SOLLC
339      */
340     String featureData = "Iron-sulfur (2Fe-2S)\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
341             + "Iron-phosphorus (2Fe-P)\tID_NOT_SPECIFIED\t2\t86\t87\tMETALLIC\n";
342     FeaturesFile featuresFile = new FeaturesFile(featureData,
343             DataSourceType.PASTE);
344     assertTrue("Failed to parse features file",
345             featuresFile.parse(al.getDataset(), colours, true));
346
347     // verify FER_CAPAA feature
348     List<SequenceFeature> sfs = al.getSequenceAt(0).getDatasetSequence()
349             .getSequenceFeatures();
350     assertEquals(1, sfs.size());
351     SequenceFeature sf = sfs.get(0);
352     assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
353     assertEquals(39, sf.begin);
354     assertEquals(39, sf.end);
355     assertEquals("METAL", sf.type);
356
357     // verify FER1_SOLLC feature
358     sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
359     assertEquals(1, sfs.size());
360     sf = sfs.get(0);
361     assertEquals("Iron-phosphorus (2Fe-P)", sf.description);
362     assertEquals(86, sf.begin);
363     assertEquals(87, sf.end);
364     assertEquals("METALLIC", sf.type);
365   }
366
367   private void checkDatasetfromSimpleGff3(AlignmentI dataset)
368   {
369     assertEquals("no sequences extracted from GFF3 file", 2,
370             dataset.getHeight());
371
372     SequenceI seq1 = dataset.findName("seq1");
373     SequenceI seq2 = dataset.findName("seq2");
374     assertNotNull(seq1);
375     assertNotNull(seq2);
376     assertFalse(
377             "Failed to replace dummy seq1 with real sequence",
378             seq1 instanceof SequenceDummy
379                     && ((SequenceDummy) seq1).isDummy());
380     assertFalse(
381             "Failed to replace dummy seq2 with real sequence",
382             seq2 instanceof SequenceDummy
383                     && ((SequenceDummy) seq2).isDummy());
384     String placeholderseq = new SequenceDummy("foo").getSequenceAsString();
385     assertFalse("dummy replacement buggy for seq1",
386             placeholderseq.equals(seq1.getSequenceAsString()));
387     assertFalse("dummy replacement buggy for seq2",
388             placeholderseq.equals(seq2.getSequenceAsString()));
389     assertNotNull("No features added to seq1", seq1.getSequenceFeatures());
390     assertEquals("Wrong number of features", 3, seq1.getSequenceFeatures()
391             .size());
392     assertTrue(seq2.getSequenceFeatures().isEmpty());
393     assertEquals(
394             "Wrong number of features",
395             0,
396             seq2.getSequenceFeatures() == null ? 0 : seq2
397                     .getSequenceFeatures().size());
398     assertTrue(
399             "Expected at least one CDNA/Protein mapping for seq1",
400             dataset.getCodonFrame(seq1) != null
401                     && dataset.getCodonFrame(seq1).size() > 0);
402
403   }
404
405   @Test(groups = { "Functional" })
406   public void readGff3File() throws IOException
407   {
408     FeaturesFile gffreader = new FeaturesFile(true, simpleGffFile,
409             DataSourceType.FILE);
410     Alignment dataset = new Alignment(gffreader.getSeqsAsArray());
411     gffreader.addProperties(dataset);
412     checkDatasetfromSimpleGff3(dataset);
413   }
414
415   @Test(groups = { "Functional" })
416   public void simpleGff3FileClass() throws IOException
417   {
418     AlignmentI dataset = new Alignment(new SequenceI[] {});
419     FeaturesFile ffile = new FeaturesFile(simpleGffFile,
420             DataSourceType.FILE);
421   
422     boolean parseResult = ffile.parse(dataset, null, false, false);
423     assertTrue("return result should be true", parseResult);
424     checkDatasetfromSimpleGff3(dataset);
425   }
426
427   @Test(groups = { "Functional" })
428   public void simpleGff3FileLoader() throws IOException
429   {
430     AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded(
431             simpleGffFile, DataSourceType.FILE);
432     assertTrue(
433             "Didn't read the alignment into an alignframe from Gff3 File",
434             af != null);
435     checkDatasetfromSimpleGff3(af.getViewport().getAlignment());
436   }
437
438   @Test(groups = { "Functional" })
439   public void simpleGff3RelaxedIdMatching() throws IOException
440   {
441     AlignmentI dataset = new Alignment(new SequenceI[] {});
442     FeaturesFile ffile = new FeaturesFile(simpleGffFile,
443             DataSourceType.FILE);
444   
445     boolean parseResult = ffile.parse(dataset, null, false, true);
446     assertTrue("return result (relaxedID matching) should be true",
447             parseResult);
448     checkDatasetfromSimpleGff3(dataset);
449   }
450
451   @Test(groups = { "Functional" })
452   public void testPrintJalviewFormat() throws Exception
453   {
454     File f = new File("examples/uniref50.fa");
455     AlignmentI al = readAlignmentFile(f);
456     AlignFrame af = new AlignFrame(al, 500, 500);
457     Map<String, FeatureColourI> colours = af.getFeatureRenderer()
458             .getFeatureColours();
459     String features = "METAL\tcc9900\n"
460             + "GAMMA-TURN\tred|0,255,255|20.0|95.0|below|66.0\n"
461             + "Pfam\tred\n"
462             + "STARTGROUP\tuniprot\n"
463             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\n" // non-positional feature
464             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
465             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\n"
466             + "<html>Pfam domain<a href=\"http://pfam.xfam.org/family/PF00111\">Pfam_3_4</a></html>\tFER_CAPAA\t-1\t20\t20\tPfam\n"
467             + "ENDGROUP\tuniprot\n";
468     FeaturesFile featuresFile = new FeaturesFile(features,
469             DataSourceType.PASTE);
470     featuresFile.parse(al.getDataset(), colours, false);
471
472     /*
473      * add positional and non-positional features with null and
474      * empty feature group to check handled correctly
475      */
476     SequenceI seq = al.getSequenceAt(1); // FER_CAPAN
477     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc1", 0, 0, 1.3f,
478             null));
479     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc2", 4, 9,
480             Float.NaN, null));
481     seq = al.getSequenceAt(2); // FER1_SOLLC
482     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc3", 0, 0,
483             Float.NaN, ""));
484     seq.addSequenceFeature(new SequenceFeature("Pfam", "desc4", 5, 8,
485             -2.6f, ""));
486
487     /*
488      * first with no features displayed, exclude non-positional features
489      */
490     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
491     String exported = featuresFile
492             .printJalviewFormat(al.getSequencesArray(), fr, false, false);
493     String expected = "No Features Visible";
494     assertEquals(expected, exported);
495
496     /*
497      * include non-positional features, but still no positional features
498      */
499     fr.setGroupVisibility("uniprot", true);
500     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
501             true, false);
502     expected = "\nSTARTGROUP\tuniprot\n"
503             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
504             + "ENDGROUP\tuniprot\n\n"
505             + "desc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n\n"
506             + "desc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n"; // NaN is not output
507     assertEquals(expected, exported);
508
509     /*
510      * set METAL (in uniprot group) and GAMMA-TURN visible, but not Pfam
511      */
512     fr.setVisible("METAL");
513     fr.setVisible("GAMMA-TURN");
514     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
515             false, false);
516     expected = "METAL\tcc9900\n"
517             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
518             + "\nSTARTGROUP\tuniprot\n"
519             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\t0.0\n"
520             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\t0.0\n"
521             + "ENDGROUP\tuniprot\n";
522     assertEquals(expected, exported);
523
524     /*
525      * now set Pfam visible
526      */
527     fr.setVisible("Pfam");
528     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
529             false, false);
530     /*
531      * features are output within group, ordered by sequence and type
532      */
533     expected = "METAL\tcc9900\n"
534             + "Pfam\tff0000\n"
535             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
536             + "\nSTARTGROUP\tuniprot\n"
537             + "Iron\tFER_CAPAA\t-1\t39\t39\tMETAL\t0.0\n"
538             + "<html>Pfam domain<a href=\"http://pfam.xfam.org/family/PF00111\">Pfam_3_4</a></html>\tFER_CAPAA\t-1\t20\t20\tPfam\t0.0\n"
539             + "Turn\tFER_CAPAA\t-1\t36\t38\tGAMMA-TURN\t0.0\n"
540             + "ENDGROUP\tuniprot\n"
541             // null / empty group features are output after named groups
542             + "\ndesc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
543             + "\ndesc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
544     assertEquals(expected, exported);
545
546     /*
547      * hide uniprot group
548      */
549     fr.setGroupVisibility("uniprot", false);
550     expected = "METAL\tcc9900\n" + "Pfam\tff0000\n"
551             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
552             + "\ndesc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
553             + "\ndesc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
554     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
555             false, false);
556     assertEquals(expected, exported);
557
558     /*
559      * include non-positional (overrides group not shown)
560      */
561     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
562             true, false);
563     expected = "METAL\tcc9900\n" + "Pfam\tff0000\n"
564             + "GAMMA-TURN\tscore|ff0000|00ffff|noValueMin|20.0|95.0|below|66.0\n"
565             + "\nSTARTGROUP\tuniprot\n"
566             + "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
567             + "ENDGROUP\tuniprot\n"
568             + "\ndesc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n"
569             + "desc2\tFER_CAPAN\t-1\t4\t9\tPfam\n"
570             + "\ndesc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n"
571             + "desc4\tFER1_SOLLC\t-1\t5\t8\tPfam\t-2.6\n";
572     assertEquals(expected, exported);
573   }
574
575   @Test(groups = { "Functional" })
576   public void testPrintGffFormat() throws Exception
577   {
578     File f = new File("examples/uniref50.fa");
579     AlignmentI al = readAlignmentFile(f);
580     AlignFrame af = new AlignFrame(al, 500, 500);
581
582     /*
583      * no features
584      */
585     FeaturesFile featuresFile = new FeaturesFile();
586     FeatureRendererModel fr = (FeatureRendererModel) af.alignPanel
587             .getFeatureRenderer();
588     String exported = featuresFile.printGffFormat(al.getSequencesArray(),
589             fr, false, false);
590     String gffHeader = "##gff-version 2\n";
591     assertEquals(gffHeader, exported);
592     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
593             true, false);
594     assertEquals(gffHeader, exported);
595
596     /*
597      * add some features
598      */
599     al.getSequenceAt(0).addSequenceFeature(
600             new SequenceFeature("Domain", "Cath", 0, 0, 0f, "Uniprot"));
601     al.getSequenceAt(0).addSequenceFeature(
602             new SequenceFeature("METAL", "Cath", 39, 39, 1.2f, null));
603     al.getSequenceAt(1)
604             .addSequenceFeature(
605                     new SequenceFeature("GAMMA-TURN", "Turn", 36, 38, 2.1f,
606                             "s3dm"));
607     SequenceFeature sf = new SequenceFeature("Pfam", "", 20, 20, 0f,
608             "Uniprot");
609     sf.setStrand("+");
610     sf.setPhase("2");
611     sf.setValue("x", "y");
612     sf.setValue("black", "white");
613     Map<String, String> csq = new HashMap<>();
614     csq.put("SIFT", "benign,mostly benign,cloudy, with meatballs");
615     csq.put("consequence", "missense_variant");
616     sf.setValue("CSQ", csq);
617     al.getSequenceAt(1).addSequenceFeature(sf);
618
619     /*
620      * 'discover' features then hide all feature types
621      */
622     fr.findAllFeatures(true);
623     FeatureSettingsBean[] data = new FeatureSettingsBean[4];
624     FeatureColourI fc = new FeatureColour(Color.PINK);
625     data[0] = new FeatureSettingsBean("Domain", fc, null, false);
626     data[1] = new FeatureSettingsBean("METAL", fc, null, false);
627     data[2] = new FeatureSettingsBean("GAMMA-TURN", fc, null, false);
628     data[3] = new FeatureSettingsBean("Pfam", fc, null, false);
629     fr.setFeaturePriority(data);
630
631     /*
632      * with no features displayed, exclude non-positional features
633      */
634     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
635             false, false);
636     assertEquals(gffHeader, exported);
637
638     /*
639      * include non-positional features
640      */
641     fr.setGroupVisibility("Uniprot", true);
642     fr.setGroupVisibility("s3dm", false);
643     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
644             true, false);
645     String expected = gffHeader
646             + "FER_CAPAA\tUniprot\tDomain\t0\t0\t0.0\t.\t.\n";
647     assertEquals(expected, exported);
648
649     /*
650      * set METAL (in uniprot group) and GAMMA-TURN visible, but not Pfam
651      * only Uniprot group visible here...
652      */
653     fr.setVisible("METAL");
654     fr.setVisible("GAMMA-TURN");
655     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
656             false, false);
657     // METAL feature has null group: description used for column 2
658     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
659     assertEquals(expected, exported);
660
661     /*
662      * set s3dm group visible
663      */
664     fr.setGroupVisibility("s3dm", true);
665     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
666             false, false);
667     // METAL feature has null group: description used for column 2
668     expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
669             + "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
670     assertEquals(expected, exported);
671
672     /*
673      * now set Pfam visible
674      */
675     fr.setVisible("Pfam");
676     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
677             false, false);
678     // Pfam feature columns include strand(+), phase(2), attributes
679     expected = gffHeader
680             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
681             // CSQ output as CSQ=att1=value1,att2=value2
682             // note all commas are encoded here which is wrong - it should be
683             // SIFT=benign,mostly benign,cloudy%2C with meatballs
684             + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white;"
685             + "CSQ=SIFT=benign%2Cmostly benign%2Ccloudy%2C with meatballs,consequence=missense_variant\n"
686             + "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
687     assertEquals(expected, exported);
688   }
689
690   /**
691    * Test for parsing of feature filters as represented in a Jalview features
692    * file
693    * 
694    * @throws Exception
695    */
696   @Test(groups = { "Functional" })
697   public void testParseFilters() throws Exception
698   {
699     Map<String, FeatureMatcherSetI> filters = new HashMap<>();
700     String text = "sequence_variant\tCSQ:PolyPhen NotContains 'damaging'\n"
701             + "missense_variant\t(label contains foobar) and (Score lt 1.3)";
702     FeaturesFile featuresFile = new FeaturesFile(text,
703             DataSourceType.PASTE);
704     featuresFile.parseFilters(filters);
705     assertEquals(filters.size(), 2);
706
707     FeatureMatcherSetI fm = filters.get("sequence_variant");
708     assertNotNull(fm);
709     Iterator<FeatureMatcherI> matchers = fm.getMatchers().iterator();
710     FeatureMatcherI matcher = matchers.next();
711     assertFalse(matchers.hasNext());
712     String[] attributes = matcher.getAttribute();
713     assertArrayEquals(attributes, new String[] { "CSQ", "PolyPhen" });
714     assertSame(matcher.getMatcher().getCondition(), Condition.NotContains);
715     assertEquals(matcher.getMatcher().getPattern(), "damaging");
716
717     fm = filters.get("missense_variant");
718     assertNotNull(fm);
719     matchers = fm.getMatchers().iterator();
720     matcher = matchers.next();
721     assertTrue(matcher.isByLabel());
722     assertSame(matcher.getMatcher().getCondition(), Condition.Contains);
723     assertEquals(matcher.getMatcher().getPattern(), "foobar");
724     matcher = matchers.next();
725     assertTrue(matcher.isByScore());
726     assertSame(matcher.getMatcher().getCondition(), Condition.LT);
727     assertEquals(matcher.getMatcher().getPattern(), "1.3");
728     assertEquals(matcher.getMatcher().getFloatValue(), 1.3f);
729
730     assertFalse(matchers.hasNext());
731   }
732
733   @Test(groups = { "Functional" })
734   public void testOutputFeatureFilters()
735   {
736     FeaturesFile ff = new FeaturesFile();
737     StringBuilder sb = new StringBuilder();
738     Map<String, FeatureColourI> visible = new HashMap<>();
739     visible.put("pfam", new FeatureColour(Color.red));
740     Map<String, FeatureMatcherSetI> featureFilters = new HashMap<>();
741
742     // with no filters, nothing is output
743     ff.outputFeatureFilters(sb, visible, featureFilters);
744     assertEquals("", sb.toString());
745
746     // with filter for not visible features only, nothing is output
747     FeatureMatcherSet filter = new FeatureMatcherSet();
748     filter.and(FeatureMatcher.byLabel(Condition.Present, null));
749     featureFilters.put("foobar", filter);
750     ff.outputFeatureFilters(sb, visible, featureFilters);
751     assertEquals("", sb.toString());
752
753     // with filters for visible feature types
754     FeatureMatcherSet filter2 = new FeatureMatcherSet();
755     filter2.and(FeatureMatcher.byAttribute(Condition.Present, null, "CSQ",
756             "PolyPhen"));
757     filter2.and(FeatureMatcher.byScore(Condition.LE, "-2.4"));
758     featureFilters.put("pfam", filter2);
759     visible.put("foobar", new FeatureColour(Color.blue));
760     ff.outputFeatureFilters(sb, visible, featureFilters);
761     String expected = "\nSTARTFILTERS\nfoobar\tLabel Present\npfam\t(CSQ:PolyPhen Present) AND (Score LE -2.4)\nENDFILTERS\n";
762     assertEquals(expected, sb.toString());
763   }
764
765   /**
766    * Output as GFF should not include features which are not visible due to
767    * colour threshold or feature filter settings
768    * 
769    * @throws Exception
770    */
771   @Test(groups = { "Functional" })
772   public void testPrintGffFormat_withFilters() throws Exception
773   {
774     File f = new File("examples/uniref50.fa");
775     AlignmentI al = readAlignmentFile(f);
776     AlignFrame af = new AlignFrame(al, 500, 500);
777     SequenceFeature sf1 = new SequenceFeature("METAL", "Cath", 39, 39, 1.2f,
778             null);
779     sf1.setValue("clin_sig", "Likely Pathogenic");
780     sf1.setValue("AF", "24");
781     al.getSequenceAt(0).addSequenceFeature(sf1);
782     SequenceFeature sf2 = new SequenceFeature("METAL", "Cath", 41, 41, 0.6f,
783             null);
784     sf2.setValue("clin_sig", "Benign");
785     sf2.setValue("AF", "46");
786     al.getSequenceAt(0).addSequenceFeature(sf2);
787   
788     FeaturesFile featuresFile = new FeaturesFile();
789     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
790     final String gffHeader = "##gff-version 2\n";
791
792     fr.setVisible("METAL");
793     fr.setColour("METAL", new FeatureColour(Color.PINK));
794     String exported = featuresFile.printGffFormat(al.getSequencesArray(),
795             fr, false, false);
796     String expected = gffHeader
797             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
798             + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
799     assertEquals(expected, exported);
800
801     /*
802      * now threshold to Score > 1.1 - should exclude sf2
803      */
804     FeatureColourI fc = new FeatureColour(null, Color.white, Color.BLACK,
805             Color.white, 0f, 2f);
806     fc.setAboveThreshold(true);
807     fc.setThreshold(1.1f);
808     fr.setColour("METAL", fc);
809     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
810             false, false);
811     expected = gffHeader
812             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n";
813     assertEquals(expected, exported);
814
815     /*
816      * remove threshold and check sf2 is exported
817      */
818     fc.setAboveThreshold(false);
819     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
820             false, false);
821     expected = gffHeader
822             + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
823             + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
824     assertEquals(expected, exported);
825
826     /*
827      * filter on (clin_sig contains Benign) - should include sf2 and exclude sf1
828      */
829     FeatureMatcherSetI filter = new FeatureMatcherSet();
830     filter.and(FeatureMatcher.byAttribute(Condition.Contains, "benign",
831             "clin_sig"));
832     fr.setFeatureFilter("METAL", filter);
833     exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
834             false, false);
835     expected = gffHeader
836             + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
837     assertEquals(expected, exported);
838   }
839
840   /**
841    * Output as Jalview should not include features which are not visible due to
842    * colour threshold or feature filter settings
843    * 
844    * @throws Exception
845    */
846   @Test(groups = { "Functional" })
847   public void testPrintJalviewFormat_withFilters() throws Exception
848   {
849     File f = new File("examples/uniref50.fa");
850     AlignmentI al = readAlignmentFile(f);
851     AlignFrame af = new AlignFrame(al, 500, 500);
852     SequenceFeature sf1 = new SequenceFeature("METAL", "Cath", 39, 39, 1.2f,
853             "grp1");
854     sf1.setValue("clin_sig", "Likely Pathogenic");
855     sf1.setValue("AF", "24");
856     al.getSequenceAt(0).addSequenceFeature(sf1);
857     SequenceFeature sf2 = new SequenceFeature("METAL", "Cath", 41, 41, 0.6f,
858             "grp2");
859     sf2.setValue("clin_sig", "Benign");
860     sf2.setValue("AF", "46");
861     al.getSequenceAt(0).addSequenceFeature(sf2);
862   
863     FeaturesFile featuresFile = new FeaturesFile();
864     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
865     fr.findAllFeatures(true);
866   
867     fr.setVisible("METAL");
868     fr.setColour("METAL", new FeatureColour(Color.PINK));
869     String exported = featuresFile.printJalviewFormat(
870             al.getSequencesArray(),
871             fr, false, false);
872     String expected = "METAL\tffafaf\n\nSTARTGROUP\tgrp1\n"
873             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
874             + "ENDGROUP\tgrp1\n\nSTARTGROUP\tgrp2\n"
875             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
876             + "ENDGROUP\tgrp2\n";
877     assertEquals(expected, exported);
878   
879     /*
880      * now threshold to Score > 1.1 - should exclude sf2
881      * (and there should be no empty STARTGROUP/ENDGROUP output)
882      */
883     FeatureColourI fc = new FeatureColour(null, Color.white, Color.BLACK,
884             Color.white, 0f, 2f);
885     fc.setAboveThreshold(true);
886     fc.setThreshold(1.1f);
887     fr.setColour("METAL", fc);
888     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
889             false, false);
890     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|above|1.1\n\n"
891             + "STARTGROUP\tgrp1\n"
892             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
893             + "ENDGROUP\tgrp1\n";
894     assertEquals(expected, exported);
895   
896     /*
897      * remove threshold and check sf2 is exported
898      */
899     fc.setAboveThreshold(false);
900     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
901             false, false);
902     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|none\n\n"
903             + "STARTGROUP\tgrp1\n"
904             + "Cath\tFER_CAPAA\t-1\t39\t39\tMETAL\t1.2\n"
905             + "ENDGROUP\tgrp1\n\nSTARTGROUP\tgrp2\n"
906             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
907             + "ENDGROUP\tgrp2\n";
908     assertEquals(expected, exported);
909   
910     /*
911      * filter on (clin_sig contains Benign) - should include sf2 and exclude sf1
912      */
913     FeatureMatcherSetI filter = new FeatureMatcherSet();
914     filter.and(FeatureMatcher.byAttribute(Condition.Contains, "benign",
915             "clin_sig"));
916     fr.setFeatureFilter("METAL", filter);
917     exported = featuresFile.printJalviewFormat(al.getSequencesArray(), fr,
918             false, false);
919     expected = "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
920     expected = "METAL\tscore|ffffff|000000|noValueMin|abso|0.0|2.0|none\n\n"
921             + "STARTFILTERS\nMETAL\tclin_sig Contains benign\nENDFILTERS\n\n"
922             + "STARTGROUP\tgrp2\n"
923             + "Cath\tFER_CAPAA\t-1\t41\t41\tMETAL\t0.6\n"
924             + "ENDGROUP\tgrp2\n";
925     assertEquals(expected, exported);
926   }
927 }