JAL-2843 read/write feature filters from/to Jalview features file
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 8 Dec 2017 15:21:04 +0000 (15:21 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 8 Dec 2017 15:21:04 +0000 (15:21 +0000)
src/jalview/controller/AlignViewController.java
src/jalview/io/FeaturesFile.java
test/jalview/datamodel/features/FeatureAttributesTest.java
test/jalview/io/FeaturesFileTest.java

index 5662d0c..d992e4e 100644 (file)
@@ -352,25 +352,25 @@ public class AlignViewController implements AlignViewControllerI
   public boolean parseFeaturesFile(String file, DataSourceType protocol,
           boolean relaxedIdMatching)
   {
-    boolean featuresFile = false;
+    boolean featuresAdded = false;
+    FeatureRenderer fr = alignPanel.getFeatureRenderer();
     try
     {
-      featuresFile = new FeaturesFile(false, file, protocol).parse(
-              viewport.getAlignment().getDataset(),
-              alignPanel.getFeatureRenderer().getFeatureColours(), false,
-              relaxedIdMatching);
+      featuresAdded = new FeaturesFile(false, file, protocol).parse(
+              viewport.getAlignment().getDataset(), fr.getFeatureColours(),
+              fr.getFeatureFilters(), false, relaxedIdMatching);
     } catch (Exception ex)
     {
       ex.printStackTrace();
     }
 
-    if (featuresFile)
+    if (featuresAdded)
     {
       avcg.refreshFeatureUI(true);
-      if (alignPanel.getFeatureRenderer() != null)
+      if (fr != null)
       {
         // update the min/max ranges where necessary
-        alignPanel.getFeatureRenderer().findAllFeatures(true);
+        fr.findAllFeatures(true);
       }
       if (avcg.getFeatureSettingsUI() != null)
       {
@@ -379,7 +379,7 @@ public class AlignViewController implements AlignViewControllerI
       alignPanel.paintAlignment(true, true);
     }
 
-    return featuresFile;
+    return featuresAdded;
 
   }
 
index d2282b1..99663c8 100755 (executable)
@@ -31,6 +31,8 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSet;
+import jalview.datamodel.features.FeatureMatcherSetI;
 import jalview.io.gff.GffHelperBase;
 import jalview.io.gff.GffHelperFactory;
 import jalview.io.gff.GffHelperI;
@@ -68,6 +70,16 @@ import java.util.Map.Entry;
  */
 public class FeaturesFile extends AlignFile implements FeaturesSourceI
 {
+  private static final String TAB_REGEX = "\\t";
+
+  private static final String STARTGROUP = "STARTGROUP";
+
+  private static final String ENDGROUP = "ENDGROUP";
+
+  private static final String STARTFILTERS = "STARTFILTERS";
+
+  private static final String ENDFILTERS = "ENDFILTERS";
+
   private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
 
   private static final String NOTE = "Note";
@@ -169,7 +181,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * @param align
    *          - alignment/dataset containing sequences that are to be annotated
    * @param colours
-   *          - hashtable to store feature colour definitions
+   *          - map to store feature colour definitions
    * @param removeHTML
    *          - process html strings into plain text
    * @param relaxedIdmatching
@@ -180,11 +192,34 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           Map<String, FeatureColourI> colours, boolean removeHTML,
           boolean relaxedIdmatching)
   {
-    Map<String, String> gffProps = new HashMap<String, String>();
+    return parse(align, colours, null, removeHTML, relaxedIdmatching);
+  }
+
+  /**
+   * Parse GFF or Jalview format sequence features file
+   * 
+   * @param align
+   *          - alignment/dataset containing sequences that are to be annotated
+   * @param colours
+   *          - map to store feature colour definitions
+   * @param filters
+   *          - map to store feature filter definitions
+   * @param removeHTML
+   *          - process html strings into plain text
+   * @param relaxedIdmatching
+   *          - when true, ID matches to compound sequence IDs are allowed
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align,
+          Map<String, FeatureColourI> colours,
+          Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
+          boolean relaxedIdmatching)
+  {
+    Map<String, String> gffProps = new HashMap<>();
     /*
      * keep track of any sequences we try to create from the data
      */
-    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    List<SequenceI> newseqs = new ArrayList<>();
 
     String line = null;
     try
@@ -204,7 +239,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           continue;
         }
 
-        gffColumns = line.split("\\t"); // tab as regex
+        gffColumns = line.split(TAB_REGEX);
         if (gffColumns.length == 1)
         {
           if (line.trim().equalsIgnoreCase("GFF"))
@@ -218,18 +253,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           }
         }
 
-        if (gffColumns.length > 1 && gffColumns.length < 4)
+        if (gffColumns.length > 0 && gffColumns.length < 4)
         {
           /*
            * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
            * a feature type colour specification
            */
           String ft = gffColumns[0];
-          if (ft.equalsIgnoreCase("startgroup"))
+          if (ft.equalsIgnoreCase(STARTFILTERS))
+          {
+            parseFilters(filters);
+            continue;
+          }
+          if (ft.equalsIgnoreCase(STARTGROUP))
           {
             featureGroup = gffColumns[1];
           }
-          else if (ft.equalsIgnoreCase("endgroup"))
+          else if (ft.equalsIgnoreCase(ENDGROUP))
           {
             // We should check whether this is the current group,
             // but at present there's no way of showing more than 1 group
@@ -290,6 +330,43 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
+   * filter to the map for each line parsed. After exit from this method,
+   * nextLine() should return the line after ENDFILTERS (or we are already at
+   * end of file if ENDFILTERS was missing).
+   * 
+   * @param filters
+   * @throws IOException
+   */
+  protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
+          throws IOException
+  {
+    String line;
+    while ((line = nextLine()) != null)
+    {
+      if (line.toUpperCase().startsWith(ENDFILTERS))
+      {
+        return;
+      }
+      String[] tokens = line.split(TAB_REGEX);
+      if (tokens.length != 2)
+      {
+        System.err.println(String.format("Invalid token count %d for %d",
+                tokens.length, line));
+      }
+      else
+      {
+        String featureType = tokens[0];
+        FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
+        if (fm != null && filters != null)
+        {
+          filters.put(featureType, fm);
+        }
+      }
+    }
+  }
+
+  /**
    * Try to parse a Jalview format feature specification and add it as a
    * sequence feature to any matching sequences in the alignment. Returns true
    * if successful (a feature was added), or false if not.
@@ -487,15 +564,16 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
-   * Returns contents of a Jalview format features file, for visible features,
-   * as filtered by type and group. Features with a null group are displayed if
-   * their feature type is visible. Non-positional features may optionally be
-   * included (with no check on type or group).
+   * Returns contents of a Jalview format features file, for visible features, as
+   * filtered by type and group. Features with a null group are displayed if their
+   * feature type is visible. Non-positional features may optionally be included
+   * (with no check on type or group).
    * 
    * @param sequences
    *          source of features
    * @param visible
    *          map of colour for each visible feature type
+   * @param featureFilters
    * @param visibleFeatureGroups
    * @param includeNonPositional
    *          if true, include non-positional features (regardless of group or
@@ -504,6 +582,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    */
   public String printJalviewFormat(SequenceI[] sequences,
           Map<String, FeatureColourI> visible,
+          Map<String, FeatureMatcherSetI> featureFilters,
           List<String> visibleFeatureGroups, boolean includeNonPositional)
   {
     if (!includeNonPositional && (visible == null || visible.isEmpty()))
@@ -531,10 +610,15 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
             .toArray(new String[visible.keySet().size()]);
 
     /*
+     * feature filters if any
+     */
+    outputFeatureFilters(out, visible, featureFilters);
+
+    /*
      * sort groups alphabetically, and ensure that features with a
      * null or empty group are output after those in named groups
      */
-    List<String> sortedGroups = new ArrayList<String>(visibleFeatureGroups);
+    List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
     sortedGroups.remove(null);
     sortedGroups.remove("");
     Collections.sort(sortedGroups);
@@ -560,13 +644,76 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       }
     }
 
-    for (String group : sortedGroups)
+    /*
+     * positional features within groups
+     */
+    foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);
+
+    return foundSome ? out.toString() : "No Features Visible";
+  }
+
+  /**
+   * Outputs any feature filters defined for visible feature types, sandwiched by
+   * STARTFILTERS and ENDFILTERS lines
+   * 
+   * @param out
+   * @param visible
+   * @param featureFilters
+   */
+  void outputFeatureFilters(StringBuilder out,
+          Map<String, FeatureColourI> visible,
+          Map<String, FeatureMatcherSetI> featureFilters)
+  {
+    if (visible == null || featureFilters == null
+            || featureFilters.isEmpty())
+    {
+      return;
+    }
+
+    boolean first = true;
+    for (String featureType : visible.keySet())
+    {
+      FeatureMatcherSetI filter = featureFilters.get(featureType);
+      if (filter != null)
+      {
+        if (first)
+        {
+          first = false;
+          out.append(newline).append(STARTFILTERS).append(newline);
+        }
+        out.append(featureType).append(TAB).append(filter.toStableString())
+                .append(newline);
+      }
+    }
+    if (!first)
+    {
+      out.append(ENDFILTERS).append(newline).append(newline);
+    }
+
+  }
+
+  /**
+   * Appends output of sequence features within feature groups to the output
+   * buffer. Groups other than the null or empty group are sandwiched by
+   * STARTGROUP and ENDGROUP lines.
+   * 
+   * @param out
+   * @param groups
+   * @param featureTypes
+   * @param sequences
+   * @return
+   */
+  private boolean outputFeaturesByGroup(StringBuilder out,
+          List<String> groups, String[] featureTypes, SequenceI[] sequences)
+  {
+    boolean foundSome = false;
+    for (String group : groups)
     {
       boolean isNamedGroup = (group != null && !"".equals(group));
       if (isNamedGroup)
       {
         out.append(newline);
-        out.append("STARTGROUP").append(TAB);
+        out.append(STARTGROUP).append(TAB);
         out.append(group);
         out.append(newline);
       }
@@ -577,11 +724,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       for (int i = 0; i < sequences.length; i++)
       {
         String sequenceName = sequences[i].getName();
-        List<SequenceFeature> features = new ArrayList<SequenceFeature>();
-        if (types.length > 0)
+        List<SequenceFeature> features = new ArrayList<>();
+        if (featureTypes.length > 0)
         {
           features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
-                  true, group, types));
+                  true, group, featureTypes));
         }
 
         for (SequenceFeature sequenceFeature : features)
@@ -593,13 +740,12 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
       if (isNamedGroup)
       {
-        out.append("ENDGROUP").append(TAB);
+        out.append(ENDGROUP).append(TAB);
         out.append(group);
         out.append(newline);
       }
     }
-
-    return foundSome ? out.toString() : "No Features Visible";
+    return foundSome;
   }
 
   /**
@@ -688,7 +834,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       dataset = new Alignment(new SequenceI[] {});
     }
 
-    Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+    Map<String, FeatureColourI> featureColours = new HashMap<>();
     boolean parseResult = parse(dataset, featureColours, false, true);
     if (!parseResult)
     {
@@ -748,7 +894,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
     for (SequenceI seq : sequences)
     {
-      List<SequenceFeature> features = new ArrayList<SequenceFeature>();
+      List<SequenceFeature> features = new ArrayList<>();
       if (includeNonPositionalFeatures)
       {
         features.addAll(seq.getFeatures().getNonPositionalFeatures());
index 4b7a435..e47c787 100644 (file)
@@ -12,6 +12,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import junit.extensions.PA;
@@ -20,6 +21,16 @@ public class FeatureAttributesTest
 {
 
   /**
+   * clear down attributes map before tests
+   */
+  @BeforeClass
+  public void setUp()
+  {
+    FeatureAttributes fa = FeatureAttributes.getInstance();
+    ((Map<?, ?>) PA.getValue(fa, "attributes")).clear();
+  }
+
+  /**
    * clear down attributes map after tests
    */
   @AfterMethod
@@ -56,7 +67,7 @@ public class FeatureAttributesTest
         "csq", "AF" }) < 0);
   }
 
-  @Test
+  @Test(groups = "Functional")
   public void testGetMinMax()
   {
     SequenceFeature sf = new SequenceFeature("Pfam", "desc", 10, 20,
@@ -88,7 +99,7 @@ public class FeatureAttributesTest
    * Test the method that returns an attribute description, provided it is
    * recorded and unique
    */
-  @Test
+  @Test(groups = "Functional")
   public void testGetDescription()
   {
     FeatureAttributes fa = FeatureAttributes.getInstance();
@@ -102,7 +113,7 @@ public class FeatureAttributesTest
     assertNull(fa.getDescription("Pfam", "kd"));
   }
 
-  @Test
+  @Test(groups = "Functional")
   public void testDatatype()
   {
     FeatureAttributes fa = FeatureAttributes.getInstance();
index 152ab84..fd4cad7 100644 (file)
@@ -23,7 +23,9 @@ package jalview.io;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
 import jalview.api.FeatureColourI;
 import jalview.api.FeatureRenderer;
@@ -32,11 +34,14 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherI;
+import jalview.datamodel.features.FeatureMatcherSetI;
 import jalview.datamodel.features.SequenceFeatures;
 import jalview.gui.AlignFrame;
 import jalview.gui.Desktop;
 import jalview.gui.JvOptionPane;
 import jalview.structure.StructureSelectionManager;
+import jalview.util.matcher.Condition;
 
 import java.awt.Color;
 import java.io.File;
@@ -44,6 +49,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -467,10 +473,10 @@ public class FeaturesFileTest
      */
     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
     Map<String, FeatureColourI> visible = fr.getDisplayedFeatureCols();
-    List<String> visibleGroups = new ArrayList<String>(
+    List<String> visibleGroups = new ArrayList<>(
             Arrays.asList(new String[] {}));
     String exported = featuresFile.printJalviewFormat(
-            al.getSequencesArray(), visible, visibleGroups, false);
+            al.getSequencesArray(), visible, null, visibleGroups, false);
     String expected = "No Features Visible";
     assertEquals(expected, exported);
 
@@ -479,7 +485,7 @@ public class FeaturesFileTest
      */
     visibleGroups.add("uniprot");
     exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
-            visible, visibleGroups, true);
+            visible, null, visibleGroups, true);
     expected = "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
             + "desc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n"
             + "desc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n" // NaN is not output
@@ -493,7 +499,7 @@ public class FeaturesFileTest
     fr.setVisible("GAMMA-TURN");
     visible = fr.getDisplayedFeatureCols();
     exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
-            visible, visibleGroups, false);
+            visible, null, visibleGroups, false);
     expected = "METAL\tcc9900\n"
             + "GAMMA-TURN\tff0000|00ffff|20.0|95.0|below|66.0\n"
             + "\nSTARTGROUP\tuniprot\n"
@@ -508,7 +514,7 @@ public class FeaturesFileTest
     fr.setVisible("Pfam");
     visible = fr.getDisplayedFeatureCols();
     exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
-            visible, visibleGroups, false);
+            visible, null, visibleGroups, false);
     /*
      * features are output within group, ordered by sequence and by type
      */
@@ -539,8 +545,8 @@ public class FeaturesFileTest
      */
     FeaturesFile featuresFile = new FeaturesFile();
     FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
-    Map<String, FeatureColourI> visible = new HashMap<String, FeatureColourI>();
-    List<String> visibleGroups = new ArrayList<String>(
+    Map<String, FeatureColourI> visible = new HashMap<>();
+    List<String> visibleGroups = new ArrayList<>(
             Arrays.asList(new String[] {}));
     String exported = featuresFile.printGffFormat(al.getSequencesArray(),
             visible, visibleGroups, false);
@@ -623,4 +629,47 @@ public class FeaturesFileTest
             + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n";
     assertEquals(expected, exported);
   }
+
+  /**
+   * Test for parsing of feature filters as represented in a Jalview features
+   * file
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParseFilters() throws Exception
+  {
+    Map<String, FeatureMatcherSetI> filters = new HashMap<>();
+    String text = "sequence_variant\tCSQ:PolyPhen NotContains 'damaging'\n"
+            + "missense_variant\t(label contains foobar) and (Score lt 1.3)";
+    FeaturesFile featuresFile = new FeaturesFile(text,
+            DataSourceType.PASTE);
+    featuresFile.parseFilters(filters);
+    assertEquals(filters.size(), 2);
+
+    FeatureMatcherSetI fm = filters.get("sequence_variant");
+    assertNotNull(fm);
+    Iterator<FeatureMatcherI> matchers = fm.getMatchers().iterator();
+    FeatureMatcherI matcher = matchers.next();
+    assertFalse(matchers.hasNext());
+    String[] attributes = matcher.getAttribute();
+    assertArrayEquals(attributes, new String[] { "CSQ", "PolyPhen" });
+    assertSame(matcher.getMatcher().getCondition(), Condition.NotContains);
+    assertEquals(matcher.getMatcher().getPattern(), "damaging");
+
+    fm = filters.get("missense_variant");
+    assertNotNull(fm);
+    matchers = fm.getMatchers().iterator();
+    matcher = matchers.next();
+    assertTrue(matcher.isByLabel());
+    assertSame(matcher.getMatcher().getCondition(), Condition.Contains);
+    assertEquals(matcher.getMatcher().getPattern(), "foobar");
+    matcher = matchers.next();
+    assertTrue(matcher.isByScore());
+    assertSame(matcher.getMatcher().getCondition(), Condition.LT);
+    assertEquals(matcher.getMatcher().getPattern(), "1.3");
+    assertEquals(matcher.getMatcher().getFloatValue(), 1.3f);
+
+    assertFalse(matchers.hasNext());
+  }
 }