apply version 2.7 copyright

[jalview.git] / src / jalview / io / FeaturesFile.java
diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java

index cfc08fb..8664cba 100755 (executable)
--- a/src/jalview/io/FeaturesFile.java
+++ b/src/jalview/io/FeaturesFile.java
@@ -1,6 +1,6 @@
  /*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)\r
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)\r
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
   * \r
   * This file is part of Jalview.\r
   * \r
@@ -20,6 +20,7 @@ package jalview.io;
  import java.io.*;\r
  import java.util.*;\r
  \r
+import jalview.analysis.SequenceIdMatcher;\r
  import jalview.datamodel.*;\r
  import jalview.schemes.*;\r
  import jalview.util.Format;\r
@@ -70,28 +71,60 @@ public class FeaturesFile extends AlignFile
    }\r
  \r
    /**\r
-   * The Application can render HTML, but the applet will remove HTML tags and\r
-   * replace links with %LINK% Both need to read links in HTML however\r
-   * \r
-   * @throws IOException\r
-   *           DOCUMENT ME!\r
+   * Parse GFF or sequence features file using case-independent matching, discarding URLs\r
+   * @param align - alignment/dataset containing sequences that are to be annotated\r
+   * @param colours - hashtable to store feature colour definitions\r
+   * @param removeHTML - process html strings into plain text\r
+   * @return true if features were added\r
     */\r
    public boolean parse(AlignmentI align, Hashtable colours,\r
            boolean removeHTML)\r
    {\r
-    return parse(align, colours, null, removeHTML);\r
+    return parse(align, colours, null, removeHTML, false);\r
    }\r
  \r
    /**\r
-   * The Application can render HTML, but the applet will remove HTML tags and\r
-   * replace links with %LINK% Both need to read links in HTML however\r
-   * \r
-   * @throws IOException\r
-   *           DOCUMENT ME!\r
+   * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs\r
+   * @param align - alignment/dataset containing sequences that are to be annotated\r
+   * @param colours - hashtable to store feature colour definitions\r
+   * @param removeHTML - process html strings into plain text\r
+   * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+   * @return true if features were added\r
+   */\r
+  public boolean parse(AlignmentI align, \r
+          Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)\r
+  {\r
+    return parse(align, colours, null, removeHTML, relaxedIdMatching);\r
+  }\r
+\r
+  /**\r
+   * Parse GFF or sequence features file optionally using case-independent matching\r
+   * @param align - alignment/dataset containing sequences that are to be annotated\r
+   * @param colours - hashtable to store feature colour definitions\r
+   * @param featureLink - hashtable to store associated URLs \r
+   * @param removeHTML - process html strings into plain text\r
+   * @return true if features were added\r
     */\r
    public boolean parse(AlignmentI align, Hashtable colours,\r
            Hashtable featureLink, boolean removeHTML)\r
    {\r
+    return parse(align, colours, featureLink, removeHTML, false);\r
+  }\r
+\r
+  /**\r
+  /**\r
+   * Parse GFF or sequence features file \r
+   * @param align - alignment/dataset containing sequences that are to be annotated\r
+   * @param colours - hashtable to store feature colour definitions\r
+   * @param featureLink - hashtable to store associated URLs \r
+   * @param removeHTML - process html strings into plain text\r
+   * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+   * @return true if features were added\r
+   */\r
+  public boolean parse(AlignmentI align,\r
+          Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)\r
+  {\r
+\r
      String line = null;\r
      try\r
      {\r
@@ -165,24 +198,27 @@ public class FeaturesFile extends AlignFile
                // fifth is either 'above','below', or 'none'.\r
                // sixth is a float value and only required when fifth is either\r
                // 'above' or 'below'.\r
-              StringTokenizer gcol = new StringTokenizer(colscheme, "|", true);\r
+              StringTokenizer gcol = new StringTokenizer(colscheme, "|",\r
+                      true);\r
                // set defaults\r
                int threshtype = AnnotationColourGradient.NO_THRESHOLD;\r
                float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN;\r
                boolean labelCol = false;\r
                // Parse spec line\r
                String mincol = gcol.nextToken();\r
-              if (mincol=="|")\r
+              if (mincol == "|")\r
                {\r
                  System.err\r
-                .println("Expected either 'label' or a colour specification in the line: "+line );\r
+                        .println("Expected either 'label' or a colour specification in the line: "\r
+                                + line);\r
                  continue;\r
                }\r
                String maxcol = null;\r
                if (mincol.toLowerCase().indexOf("label") == 0)\r
                {\r
                  labelCol = true;\r
-                mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip '|'\r
+                mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip\r
+                                                                           // '|'\r
                  mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);\r
                }\r
                String abso = null, minval, maxval;\r
@@ -191,16 +227,20 @@ public class FeaturesFile extends AlignFile
                  // at least four more tokens\r
                  if (mincol.equals("|"))\r
                  {\r
-                  mincol="";\r
-                } else {\r
+                  mincol = "";\r
+                }\r
+                else\r
+                {\r
                    gcol.nextToken(); // skip next '|'\r
                  }\r
                  // continue parsing rest of line\r
                  maxcol = gcol.nextToken();\r
                  if (maxcol.equals("|"))\r
                  {\r
-                  maxcol="";\r
-                } else {\r
+                  maxcol = "";\r
+                }\r
+                else\r
+                {\r
                    gcol.nextToken(); // skip next '|'\r
                  }\r
                  abso = gcol.nextToken();\r
@@ -216,7 +256,8 @@ public class FeaturesFile extends AlignFile
                    gcol.nextToken(); // skip next '|'\r
                  }\r
                  maxval = gcol.nextToken();\r
-                if (gcol.hasMoreTokens()) {\r
+                if (gcol.hasMoreTokens())\r
+                {\r
                    gcol.nextToken(); // skip next '|'\r
                  }\r
                  try\r
@@ -361,7 +402,7 @@ public class FeaturesFile extends AlignFile
              // Still possible this is an old Jalview file,\r
              // which does not have type colours at the beginning\r
              seqId = token = st.nextToken();\r
-            seq = align.findName(seqId, true);\r
+            seq = findName(align, seqId, relaxedIdmatching);\r
              if (seq != null)\r
              {\r
                desc = st.nextToken();\r
@@ -470,7 +511,7 @@ public class FeaturesFile extends AlignFile
  \r
            if (!token.equals("ID_NOT_SPECIFIED"))\r
            {\r
-            seq = align.findName(seqId = token, true);\r
+            seq = findName(align, seqId = token, relaxedIdmatching);\r
              st.nextToken();\r
            }\r
            else\r
@@ -540,88 +581,64 @@ public class FeaturesFile extends AlignFile
            GFFFile = false;\r
          }\r
        }\r
+      resetMatcher();\r
      } catch (Exception ex)\r
      {\r
        System.out.println(line);\r
        System.out.println("Error parsing feature file: " + ex + "\n" + line);\r
        ex.printStackTrace(System.err);\r
+      resetMatcher();\r
        return false;\r
      }\r
  \r
      return true;\r
    }\r
  \r
-  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+  private AlignmentI lastmatchedAl = null;\r
+\r
+  private SequenceIdMatcher matcher = null;\r
+\r
+  /**\r
+   * clear any temporary handles used to speed up ID matching\r
+   */\r
+  private void resetMatcher()\r
    {\r
-    if (sf.getDescription() == null)\r
+    lastmatchedAl = null;\r
+    matcher = null;\r
+  }\r
+\r
+  private SequenceI findName(AlignmentI align, String seqId,\r
+          boolean relaxedIdMatching)\r
+  {\r
+    SequenceI match = null;\r
+    if (relaxedIdMatching)\r
      {\r
-      return;\r
+      if (lastmatchedAl != align)\r
+      {\r
+        matcher = new SequenceIdMatcher(\r
+                (lastmatchedAl = align).getSequencesArray());\r
+      }\r
+      match = matcher.findIdMatch(seqId);\r
      }\r
-\r
-    if (removeHTML\r
-            && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)\r
+    else\r
      {\r
-      removeHTML = false;\r
+      match = align.findName(seqId, true);\r
      }\r
+    return match;\r
+  }\r
  \r
-    StringBuffer sb = new StringBuffer();\r
-    StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
-    String token, link;\r
-    int startTag;\r
-    String tag = null;\r
-    while (st.hasMoreElements())\r
+  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+  {\r
+    if (sf.getDescription() == null)\r
      {\r
-      token = st.nextToken("&>");\r
-      if (token.equalsIgnoreCase("html") || token.startsWith("/"))\r
-      {\r
-        continue;\r
-      }\r
-\r
-      tag = null;\r
-      startTag = token.indexOf("<");\r
-\r
-      if (startTag > -1)\r
-      {\r
-        tag = token.substring(startTag + 1);\r
-        token = token.substring(0, startTag);\r
-      }\r
-\r
-      if (tag != null && tag.toUpperCase().startsWith("A HREF="))\r
-      {\r
-        if (token.length() > 0)\r
-        {\r
-          sb.append(token);\r
-        }\r
-        link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);\r
-        String label = st.nextToken("<>");\r
-        sf.addLink(label + "|" + link);\r
-        sb.append(label + "%LINK%");\r
-      }\r
-      else if (tag != null && tag.equalsIgnoreCase("br"))\r
-      {\r
-        sb.append("\n");\r
-      }\r
-      else if (token.startsWith("lt;"))\r
-      {\r
-        sb.append("<" + token.substring(3));\r
-      }\r
-      else if (token.startsWith("gt;"))\r
-      {\r
-        sb.append(">" + token.substring(3));\r
-      }\r
-      else if (token.startsWith("amp;"))\r
-      {\r
-        sb.append("&" + token.substring(4));\r
-      }\r
-      else\r
-      {\r
-        sb.append(token);\r
-      }\r
+      return;\r
      }\r
+    jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);\r
  \r
-    if (removeHTML)\r
+    sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;\r
+    for (String link:parsed.getLinks())\r
      {\r
-      sf.description = sb.toString();\r
+      sf.addLink(link);\r
      }\r
  \r
    }\r
@@ -718,7 +735,10 @@ public class FeaturesFile extends AlignFile
            color = Format.getHexString(new java.awt.Color(Integer\r
                    .parseInt(visible.get(type).toString())));\r
          }\r
-        out.append(type + "\t" + color + "\n");\r
+        out.append(type);\r
+        out.append("\t");\r
+        out.append(color);\r
+        out.append(newline);\r
        }\r
      }\r
      // Work out which groups are both present and visible\r
@@ -757,7 +777,10 @@ public class FeaturesFile extends AlignFile
        if (groups.size() > 0 && groupIndex < groups.size())\r
        {\r
          group = groups.elementAt(groupIndex).toString();\r
-        out.append("\nSTARTGROUP\t" + group + "\n");\r
+        out.append(newline);\r
+        out.append("STARTGROUP\t");\r
+        out.append(group);\r
+        out.append(newline);\r
        }\r
        else\r
        {\r
@@ -830,22 +853,28 @@ public class FeaturesFile extends AlignFile
  \r
                out.append("\t");\r
              }\r
-            out.append(seqs[i].getName()\r
-                    + "\t-1\t"\r
-                    + next[j].begin\r
-                    + "\t"\r
-                    + next[j].end\r
-                    + "\t"\r
-                    + next[j].type\r
-                    + ((next[j].score != Float.NaN) ? "\t" + next[j].score\r
-                            + "\n" : "\n"));\r
+            out.append(seqs[i].getName());\r
+            out.append("\t-1\t");\r
+            out.append(next[j].begin);\r
+            out.append("\t");\r
+            out.append(next[j].end);\r
+            out.append("\t");\r
+            out.append(next[j].type);\r
+            if (next[j].score != Float.NaN)\r
+            {\r
+              out.append("\t");\r
+              out.append(next[j].score);\r
+            }\r
+            out.append(newline);\r
            }\r
          }\r
        }\r
  \r
        if (group != null)\r
        {\r
-        out.append("ENDGROUP\t" + group + "\n");\r
+        out.append("ENDGROUP\t");\r
+        out.append(group);\r
+        out.append(newline);\r
          groupIndex++;\r
        }\r
        else\r
@@ -904,13 +933,23 @@ public class FeaturesFile extends AlignFile
              source = next[j].getDescription();\r
            }\r
  \r
-          out.append(seqs[i].getName() + "\t" + source + "\t"\r
-                  + next[j].type + "\t" + next[j].begin + "\t"\r
-                  + next[j].end + "\t" + next[j].score + "\t");\r
+          out.append(seqs[i].getName());\r
+          out.append("\t");\r
+          out.append(source);\r
+          out.append("\t");\r
+          out.append(next[j].type);\r
+          out.append("\t");\r
+          out.append(next[j].begin);\r
+          out.append("\t");\r
+          out.append(next[j].end);\r
+          out.append("\t");\r
+          out.append(next[j].score);\r
+          out.append("\t");\r
  \r
            if (next[j].getValue("STRAND") != null)\r
            {\r
-            out.append(next[j].getValue("STRAND") + "\t");\r
+            out.append(next[j].getValue("STRAND"));\r
+            out.append("\t");\r
            }\r
            else\r
            {\r
@@ -925,13 +964,15 @@ public class FeaturesFile extends AlignFile
            {\r
              out.append(".");\r
            }\r
+          // TODO: verify/check GFF - should there be a /t here before attribute\r
+          // output ?\r
  \r
            if (next[j].getValue("ATTRIBUTES") != null)\r
            {\r
              out.append(next[j].getValue("ATTRIBUTES"));\r
            }\r
  \r
-          out.append("\n");\r
+          out.append(newline);\r
  \r
          }\r
        }\r
@@ -957,4 +998,5 @@ public class FeaturesFile extends AlignFile
    {\r
      return "USE printGFFFormat() or printJalviewFormat()";\r
    }\r
+\r
  }\r