From 19b1c75919e4d2f255cbaf811dc15d26850490e6 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 8 Dec 2017 15:21:04 +0000 Subject: [PATCH] JAL-2843 read/write feature filters from/to Jalview features file --- src/jalview/controller/AlignViewController.java | 18 +- src/jalview/io/FeaturesFile.java | 190 +++++++++++++++++--- .../datamodel/features/FeatureAttributesTest.java | 17 +- test/jalview/io/FeaturesFileTest.java | 63 ++++++- 4 files changed, 247 insertions(+), 41 deletions(-) diff --git a/src/jalview/controller/AlignViewController.java b/src/jalview/controller/AlignViewController.java index 5662d0c..d992e4e 100644 --- a/src/jalview/controller/AlignViewController.java +++ b/src/jalview/controller/AlignViewController.java @@ -352,25 +352,25 @@ public class AlignViewController implements AlignViewControllerI public boolean parseFeaturesFile(String file, DataSourceType protocol, boolean relaxedIdMatching) { - boolean featuresFile = false; + boolean featuresAdded = false; + FeatureRenderer fr = alignPanel.getFeatureRenderer(); try { - featuresFile = new FeaturesFile(false, file, protocol).parse( - viewport.getAlignment().getDataset(), - alignPanel.getFeatureRenderer().getFeatureColours(), false, - relaxedIdMatching); + featuresAdded = new FeaturesFile(false, file, protocol).parse( + viewport.getAlignment().getDataset(), fr.getFeatureColours(), + fr.getFeatureFilters(), false, relaxedIdMatching); } catch (Exception ex) { ex.printStackTrace(); } - if (featuresFile) + if (featuresAdded) { avcg.refreshFeatureUI(true); - if (alignPanel.getFeatureRenderer() != null) + if (fr != null) { // update the min/max ranges where necessary - alignPanel.getFeatureRenderer().findAllFeatures(true); + fr.findAllFeatures(true); } if (avcg.getFeatureSettingsUI() != null) { @@ -379,7 +379,7 @@ public class AlignViewController implements AlignViewControllerI alignPanel.paintAlignment(true, true); } - return featuresFile; + return featuresAdded; } diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index d2282b1..99663c8 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -31,6 +31,8 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureMatcherSet; +import jalview.datamodel.features.FeatureMatcherSetI; import jalview.io.gff.GffHelperBase; import jalview.io.gff.GffHelperFactory; import jalview.io.gff.GffHelperI; @@ -68,6 +70,16 @@ import java.util.Map.Entry; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { + private static final String TAB_REGEX = "\\t"; + + private static final String STARTGROUP = "STARTGROUP"; + + private static final String ENDGROUP = "ENDGROUP"; + + private static final String STARTFILTERS = "STARTFILTERS"; + + private static final String ENDFILTERS = "ENDFILTERS"; + private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; private static final String NOTE = "Note"; @@ -169,7 +181,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * @param align * - alignment/dataset containing sequences that are to be annotated * @param colours - * - hashtable to store feature colour definitions + * - map to store feature colour definitions * @param removeHTML * - process html strings into plain text * @param relaxedIdmatching @@ -180,11 +192,34 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI Map colours, boolean removeHTML, boolean relaxedIdmatching) { - Map gffProps = new HashMap(); + return parse(align, colours, null, removeHTML, relaxedIdmatching); + } + + /** + * Parse GFF or Jalview format sequence features file + * + * @param align + * - alignment/dataset containing sequences that are to be annotated + * @param colours + * - map to store feature colour definitions + * @param filters + * - map to store feature filter definitions + * @param removeHTML + * - process html strings into plain text + * @param relaxedIdmatching + * - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Map colours, + Map filters, boolean removeHTML, + boolean relaxedIdmatching) + { + Map gffProps = new HashMap<>(); /* * keep track of any sequences we try to create from the data */ - List newseqs = new ArrayList(); + List newseqs = new ArrayList<>(); String line = null; try @@ -204,7 +239,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI continue; } - gffColumns = line.split("\\t"); // tab as regex + gffColumns = line.split(TAB_REGEX); if (gffColumns.length == 1) { if (line.trim().equalsIgnoreCase("GFF")) @@ -218,18 +253,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } } - if (gffColumns.length > 1 && gffColumns.length < 4) + if (gffColumns.length > 0 && gffColumns.length < 4) { /* * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or * a feature type colour specification */ String ft = gffColumns[0]; - if (ft.equalsIgnoreCase("startgroup")) + if (ft.equalsIgnoreCase(STARTFILTERS)) + { + parseFilters(filters); + continue; + } + if (ft.equalsIgnoreCase(STARTGROUP)) { featureGroup = gffColumns[1]; } - else if (ft.equalsIgnoreCase("endgroup")) + else if (ft.equalsIgnoreCase(ENDGROUP)) { // We should check whether this is the current group, // but at present there's no way of showing more than 1 group @@ -290,6 +330,43 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** + * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type + * filter to the map for each line parsed. After exit from this method, + * nextLine() should return the line after ENDFILTERS (or we are already at + * end of file if ENDFILTERS was missing). + * + * @param filters + * @throws IOException + */ + protected void parseFilters(Map filters) + throws IOException + { + String line; + while ((line = nextLine()) != null) + { + if (line.toUpperCase().startsWith(ENDFILTERS)) + { + return; + } + String[] tokens = line.split(TAB_REGEX); + if (tokens.length != 2) + { + System.err.println(String.format("Invalid token count %d for %d", + tokens.length, line)); + } + else + { + String featureType = tokens[0]; + FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]); + if (fm != null && filters != null) + { + filters.put(featureType, fm); + } + } + } + } + + /** * Try to parse a Jalview format feature specification and add it as a * sequence feature to any matching sequences in the alignment. Returns true * if successful (a feature was added), or false if not. @@ -487,15 +564,16 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Returns contents of a Jalview format features file, for visible features, - * as filtered by type and group. Features with a null group are displayed if - * their feature type is visible. Non-positional features may optionally be - * included (with no check on type or group). + * Returns contents of a Jalview format features file, for visible features, as + * filtered by type and group. Features with a null group are displayed if their + * feature type is visible. Non-positional features may optionally be included + * (with no check on type or group). * * @param sequences * source of features * @param visible * map of colour for each visible feature type + * @param featureFilters * @param visibleFeatureGroups * @param includeNonPositional * if true, include non-positional features (regardless of group or @@ -504,6 +582,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI */ public String printJalviewFormat(SequenceI[] sequences, Map visible, + Map featureFilters, List visibleFeatureGroups, boolean includeNonPositional) { if (!includeNonPositional && (visible == null || visible.isEmpty())) @@ -531,10 +610,15 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI .toArray(new String[visible.keySet().size()]); /* + * feature filters if any + */ + outputFeatureFilters(out, visible, featureFilters); + + /* * sort groups alphabetically, and ensure that features with a * null or empty group are output after those in named groups */ - List sortedGroups = new ArrayList(visibleFeatureGroups); + List sortedGroups = new ArrayList<>(visibleFeatureGroups); sortedGroups.remove(null); sortedGroups.remove(""); Collections.sort(sortedGroups); @@ -560,13 +644,76 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } } - for (String group : sortedGroups) + /* + * positional features within groups + */ + foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences); + + return foundSome ? out.toString() : "No Features Visible"; + } + + /** + * Outputs any feature filters defined for visible feature types, sandwiched by + * STARTFILTERS and ENDFILTERS lines + * + * @param out + * @param visible + * @param featureFilters + */ + void outputFeatureFilters(StringBuilder out, + Map visible, + Map featureFilters) + { + if (visible == null || featureFilters == null + || featureFilters.isEmpty()) + { + return; + } + + boolean first = true; + for (String featureType : visible.keySet()) + { + FeatureMatcherSetI filter = featureFilters.get(featureType); + if (filter != null) + { + if (first) + { + first = false; + out.append(newline).append(STARTFILTERS).append(newline); + } + out.append(featureType).append(TAB).append(filter.toStableString()) + .append(newline); + } + } + if (!first) + { + out.append(ENDFILTERS).append(newline).append(newline); + } + + } + + /** + * Appends output of sequence features within feature groups to the output + * buffer. Groups other than the null or empty group are sandwiched by + * STARTGROUP and ENDGROUP lines. + * + * @param out + * @param groups + * @param featureTypes + * @param sequences + * @return + */ + private boolean outputFeaturesByGroup(StringBuilder out, + List groups, String[] featureTypes, SequenceI[] sequences) + { + boolean foundSome = false; + for (String group : groups) { boolean isNamedGroup = (group != null && !"".equals(group)); if (isNamedGroup) { out.append(newline); - out.append("STARTGROUP").append(TAB); + out.append(STARTGROUP).append(TAB); out.append(group); out.append(newline); } @@ -577,11 +724,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI for (int i = 0; i < sequences.length; i++) { String sequenceName = sequences[i].getName(); - List features = new ArrayList(); - if (types.length > 0) + List features = new ArrayList<>(); + if (featureTypes.length > 0) { features.addAll(sequences[i].getFeatures().getFeaturesForGroup( - true, group, types)); + true, group, featureTypes)); } for (SequenceFeature sequenceFeature : features) @@ -593,13 +740,12 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI if (isNamedGroup) { - out.append("ENDGROUP").append(TAB); + out.append(ENDGROUP).append(TAB); out.append(group); out.append(newline); } } - - return foundSome ? out.toString() : "No Features Visible"; + return foundSome; } /** @@ -688,7 +834,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI dataset = new Alignment(new SequenceI[] {}); } - Map featureColours = new HashMap(); + Map featureColours = new HashMap<>(); boolean parseResult = parse(dataset, featureColours, false, true); if (!parseResult) { @@ -748,7 +894,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI for (SequenceI seq : sequences) { - List features = new ArrayList(); + List features = new ArrayList<>(); if (includeNonPositionalFeatures) { features.addAll(seq.getFeatures().getNonPositionalFeatures()); diff --git a/test/jalview/datamodel/features/FeatureAttributesTest.java b/test/jalview/datamodel/features/FeatureAttributesTest.java index 4b7a435..e47c787 100644 --- a/test/jalview/datamodel/features/FeatureAttributesTest.java +++ b/test/jalview/datamodel/features/FeatureAttributesTest.java @@ -12,6 +12,7 @@ import java.util.HashMap; import java.util.Map; import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import junit.extensions.PA; @@ -20,6 +21,16 @@ public class FeatureAttributesTest { /** + * clear down attributes map before tests + */ + @BeforeClass + public void setUp() + { + FeatureAttributes fa = FeatureAttributes.getInstance(); + ((Map) PA.getValue(fa, "attributes")).clear(); + } + + /** * clear down attributes map after tests */ @AfterMethod @@ -56,7 +67,7 @@ public class FeatureAttributesTest "csq", "AF" }) < 0); } - @Test + @Test(groups = "Functional") public void testGetMinMax() { SequenceFeature sf = new SequenceFeature("Pfam", "desc", 10, 20, @@ -88,7 +99,7 @@ public class FeatureAttributesTest * Test the method that returns an attribute description, provided it is * recorded and unique */ - @Test + @Test(groups = "Functional") public void testGetDescription() { FeatureAttributes fa = FeatureAttributes.getInstance(); @@ -102,7 +113,7 @@ public class FeatureAttributesTest assertNull(fa.getDescription("Pfam", "kd")); } - @Test + @Test(groups = "Functional") public void testDatatype() { FeatureAttributes fa = FeatureAttributes.getInstance(); diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java index 152ab84..fd4cad7 100644 --- a/test/jalview/io/FeaturesFileTest.java +++ b/test/jalview/io/FeaturesFileTest.java @@ -23,7 +23,9 @@ package jalview.io; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.api.FeatureColourI; import jalview.api.FeatureRenderer; @@ -32,11 +34,14 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureMatcherI; +import jalview.datamodel.features.FeatureMatcherSetI; import jalview.datamodel.features.SequenceFeatures; import jalview.gui.AlignFrame; import jalview.gui.Desktop; import jalview.gui.JvOptionPane; import jalview.structure.StructureSelectionManager; +import jalview.util.matcher.Condition; import java.awt.Color; import java.io.File; @@ -44,6 +49,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -467,10 +473,10 @@ public class FeaturesFileTest */ FeatureRenderer fr = af.alignPanel.getFeatureRenderer(); Map visible = fr.getDisplayedFeatureCols(); - List visibleGroups = new ArrayList( + List visibleGroups = new ArrayList<>( Arrays.asList(new String[] {})); String exported = featuresFile.printJalviewFormat( - al.getSequencesArray(), visible, visibleGroups, false); + al.getSequencesArray(), visible, null, visibleGroups, false); String expected = "No Features Visible"; assertEquals(expected, exported); @@ -479,7 +485,7 @@ public class FeaturesFileTest */ visibleGroups.add("uniprot"); exported = featuresFile.printJalviewFormat(al.getSequencesArray(), - visible, visibleGroups, true); + visible, null, visibleGroups, true); expected = "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n" + "desc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n" + "desc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n" // NaN is not output @@ -493,7 +499,7 @@ public class FeaturesFileTest fr.setVisible("GAMMA-TURN"); visible = fr.getDisplayedFeatureCols(); exported = featuresFile.printJalviewFormat(al.getSequencesArray(), - visible, visibleGroups, false); + visible, null, visibleGroups, false); expected = "METAL\tcc9900\n" + "GAMMA-TURN\tff0000|00ffff|20.0|95.0|below|66.0\n" + "\nSTARTGROUP\tuniprot\n" @@ -508,7 +514,7 @@ public class FeaturesFileTest fr.setVisible("Pfam"); visible = fr.getDisplayedFeatureCols(); exported = featuresFile.printJalviewFormat(al.getSequencesArray(), - visible, visibleGroups, false); + visible, null, visibleGroups, false); /* * features are output within group, ordered by sequence and by type */ @@ -539,8 +545,8 @@ public class FeaturesFileTest */ FeaturesFile featuresFile = new FeaturesFile(); FeatureRenderer fr = af.alignPanel.getFeatureRenderer(); - Map visible = new HashMap(); - List visibleGroups = new ArrayList( + Map visible = new HashMap<>(); + List visibleGroups = new ArrayList<>( Arrays.asList(new String[] {})); String exported = featuresFile.printGffFormat(al.getSequencesArray(), visible, visibleGroups, false); @@ -623,4 +629,47 @@ public class FeaturesFileTest + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n"; assertEquals(expected, exported); } + + /** + * Test for parsing of feature filters as represented in a Jalview features + * file + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParseFilters() throws Exception + { + Map filters = new HashMap<>(); + String text = "sequence_variant\tCSQ:PolyPhen NotContains 'damaging'\n" + + "missense_variant\t(label contains foobar) and (Score lt 1.3)"; + FeaturesFile featuresFile = new FeaturesFile(text, + DataSourceType.PASTE); + featuresFile.parseFilters(filters); + assertEquals(filters.size(), 2); + + FeatureMatcherSetI fm = filters.get("sequence_variant"); + assertNotNull(fm); + Iterator matchers = fm.getMatchers().iterator(); + FeatureMatcherI matcher = matchers.next(); + assertFalse(matchers.hasNext()); + String[] attributes = matcher.getAttribute(); + assertArrayEquals(attributes, new String[] { "CSQ", "PolyPhen" }); + assertSame(matcher.getMatcher().getCondition(), Condition.NotContains); + assertEquals(matcher.getMatcher().getPattern(), "damaging"); + + fm = filters.get("missense_variant"); + assertNotNull(fm); + matchers = fm.getMatchers().iterator(); + matcher = matchers.next(); + assertTrue(matcher.isByLabel()); + assertSame(matcher.getMatcher().getCondition(), Condition.Contains); + assertEquals(matcher.getMatcher().getPattern(), "foobar"); + matcher = matchers.next(); + assertTrue(matcher.isByScore()); + assertSame(matcher.getMatcher().getCondition(), Condition.LT); + assertEquals(matcher.getMatcher().getPattern(), "1.3"); + assertEquals(matcher.getMatcher().getFloatValue(), 1.3f); + + assertFalse(matchers.hasNext()); + } } -- 1.7.10.2