JAL-3499 process motifs in features file to create features

[jalview.git] / src / jalview / io / FeaturesFile.java
diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java

index a69788b..df3d352 100755 (executable)
--- a/src/jalview/io/FeaturesFile.java
+++ b/src/jalview/io/FeaturesFile.java
@@ -21,15 +21,18 @@
  package jalview.io;
  
  import jalview.analysis.AlignmentUtils;
+import jalview.analysis.Finder;
  import jalview.analysis.SequenceIdMatcher;
  import jalview.api.AlignViewportI;
  import jalview.api.FeatureColourI;
  import jalview.api.FeatureRenderer;
  import jalview.api.FeaturesSourceI;
+import jalview.api.FinderI;
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.MappedFeatures;
+import jalview.datamodel.SearchResultMatchI;
  import jalview.datamodel.SequenceDummy;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
@@ -85,6 +88,10 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
  
    private static final String ENDFILTERS = "ENDFILTERS";
  
+  private static final String STARTMOTIFS = "STARTMOTIFS";
+
+  private static final String ENDMOTIFS = "ENDMOTIFS";
+
    private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
  
    private static final String NOTE = "Note";
@@ -232,6 +239,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
  
        while ((line = nextLine()) != null)
        {
+        line = line.trim();
          // skip comments/process pragmas
          if (line.length() == 0 || line.startsWith("#"))
          {
@@ -259,8 +267,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
          if (gffColumns.length > 0 && gffColumns.length < 4)
          {
            /*
-           * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
-           * a feature type colour specification
+           * if 2 or 3 tokens, we anticipate one of either
+           * STARTGROUP ENDGROUP STARTFILTERS STARTMOTIFS
+           * or a feature type and colour specification
             */
            String ft = gffColumns[0];
            if (ft.equalsIgnoreCase(STARTFILTERS))
@@ -268,14 +277,17 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
              parseFilters(filters);
              continue;
            }
+          if (ft.equalsIgnoreCase(STARTMOTIFS))
+          {
+            parseMotifs(align, featureGroup);
+            continue;
+          }
            if (ft.equalsIgnoreCase(STARTGROUP))
            {
              featureGroup = gffColumns[1];
            }
            else if (ft.equalsIgnoreCase(ENDGROUP))
            {
-            // We should check whether this is the current group,
-            // but at present there's no way of showing more than 1 group
              featureGroup = null;
            }
            else
@@ -292,8 +304,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
          }
  
          /*
-         * if not a comment, GFF pragma, startgroup, endgroup or feature
-         * colour specification, that just leaves a feature details line
+         * if not handled above, that just leaves a feature details line
           * in either Jalview or GFF format
           */
          if (gffVersion == 0)
@@ -333,6 +344,62 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    }
  
    /**
+   * Reads lines up to and including the next ENDMOTIFS, and processes each one
+   * by
+   * <ul>
+   * <li>searching the alignment for the supplied motif (first column)</li>
+   * <li>creating features for matches, with feature type, description and
+   * (optionally) score given in the remaining columns</li>
+   * </ul>
+   * 
+   * @param alignment
+   * @param featureGroup
+   * @throws IOException
+   */
+  protected void parseMotifs(AlignmentI alignment, String featureGroup)
+          throws IOException
+  {
+    FinderI finder = new Finder(alignment);
+    String line;
+    while ((line = nextLine()) != null)
+    {
+      if (line.toUpperCase().startsWith(ENDMOTIFS))
+      {
+        return;
+      }
+      String[] tokens = line.split(TAB_REGEX);
+      if (tokens.length != 3 && tokens.length != 4)
+      {
+        System.err.println(String.format("Invalid token count %d for %s",
+                tokens.length, line));
+      }
+      String motif = tokens[0];
+      String featureType = tokens[1];
+      String description = tokens[2];
+      float score = 0f;
+      if (tokens.length > 3)
+      {
+        try
+        {
+          score = Float.valueOf(tokens[3]);
+        } catch (NumberFormatException e)
+        {
+          System.err.println("Invalid score in " + line);
+        }
+      }
+      finder.findAll(motif, true, false);
+      List<SearchResultMatchI> matches = finder.getSearchResults()
+              .getResults();
+      for (SearchResultMatchI match : matches)
+      {
+        SequenceFeature sf = new SequenceFeature(featureType, description,
+                match.getStart(), match.getEnd(), score, featureGroup);
+        match.getSequence().addSequenceFeature(sf);
+      }
+    }
+  }
+
+  /**
     * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
     * filter to the map for each line parsed. After exit from this method,
     * nextLine() should return the line after ENDFILTERS (or we are already at
@@ -354,7 +421,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
        String[] tokens = line.split(TAB_REGEX);
        if (tokens.length != 2)
        {
-        System.err.println(String.format("Invalid token count %d for %d",
+        System.err.println(String.format("Invalid token count %d for %s",
                  tokens.length, line));
        }
        else