X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=2ac0e13458f32eb1076948cc6a5332bee5191504;hb=3f72d101fbeb83a7526a3c4f8f62cfe8e8a0189d;hp=64bb539bfafe961aefe5c50deaf77e78ef58a8fe;hpb=e601d78bc7762386f570e346a803684c43a66ff1;p=jalview.git

diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java
index 64bb539..2ac0e13 100755
--- a/src/jalview/io/FeaturesFile.java
+++ b/src/jalview/io/FeaturesFile.java
@@ -1,6 +1,6 @@
 /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
  * 
  * This file is part of Jalview.
  * 
@@ -20,6 +20,7 @@ package jalview.io;
 import java.io.*;
 import java.util.*;
 
+import jalview.analysis.SequenceIdMatcher;
 import jalview.datamodel.*;
 import jalview.schemes.*;
 import jalview.util.Format;
@@ -70,28 +71,60 @@ public class FeaturesFile extends AlignFile
   }
 
   /**
-   * The Application can render HTML, but the applet will remove HTML tags and
-   * replace links with %LINK% Both need to read links in HTML however
-   * 
-   * @throws IOException
-   *           DOCUMENT ME!
+   * Parse GFF or sequence features file using case-independent matching, discarding URLs
+   * @param align - alignment/dataset containing sequences that are to be annotated
+   * @param colours - hashtable to store feature colour definitions
+   * @param removeHTML - process html strings into plain text
+   * @return true if features were added
    */
   public boolean parse(AlignmentI align, Hashtable colours,
           boolean removeHTML)
   {
-    return parse(align, colours, null, removeHTML);
+    return parse(align, colours, null, removeHTML, false);
   }
 
   /**
-   * The Application can render HTML, but the applet will remove HTML tags and
-   * replace links with %LINK% Both need to read links in HTML however
-   * 
-   * @throws IOException
-   *           DOCUMENT ME!
+   * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs
+   * @param align - alignment/dataset containing sequences that are to be annotated
+   * @param colours - hashtable to store feature colour definitions
+   * @param removeHTML - process html strings into plain text
+   * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align, 
+          Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)
+  {
+    return parse(align, colours, null, removeHTML, relaxedIdMatching);
+  }
+
+  /**
+   * Parse GFF or sequence features file optionally using case-independent matching
+   * @param align - alignment/dataset containing sequences that are to be annotated
+   * @param colours - hashtable to store feature colour definitions
+   * @param featureLink - hashtable to store associated URLs 
+   * @param removeHTML - process html strings into plain text
+   * @return true if features were added
    */
   public boolean parse(AlignmentI align, Hashtable colours,
           Hashtable featureLink, boolean removeHTML)
   {
+    return parse(align, colours, featureLink, removeHTML, false);
+  }
+
+  /**
+  /**
+   * Parse GFF or sequence features file 
+   * @param align - alignment/dataset containing sequences that are to be annotated
+   * @param colours - hashtable to store feature colour definitions
+   * @param featureLink - hashtable to store associated URLs 
+   * @param removeHTML - process html strings into plain text
+   * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align,
+          Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)
+  {
+
     String line = null;
     try
     {
@@ -369,7 +402,7 @@ public class FeaturesFile extends AlignFile
             // Still possible this is an old Jalview file,
             // which does not have type colours at the beginning
             seqId = token = st.nextToken();
-            seq = align.findName(seqId, true);
+            seq = findName(align, seqId, relaxedIdmatching);
             if (seq != null)
             {
               desc = st.nextToken();
@@ -478,7 +511,7 @@ public class FeaturesFile extends AlignFile
 
           if (!token.equals("ID_NOT_SPECIFIED"))
           {
-            seq = align.findName(seqId = token, true);
+            seq = findName(align, seqId = token, relaxedIdmatching);
             st.nextToken();
           }
           else
@@ -548,88 +581,64 @@ public class FeaturesFile extends AlignFile
           GFFFile = false;
         }
       }
+      resetMatcher();
     } catch (Exception ex)
     {
       System.out.println(line);
       System.out.println("Error parsing feature file: " + ex + "\n" + line);
       ex.printStackTrace(System.err);
+      resetMatcher();
       return false;
     }
 
     return true;
   }
 
-  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
+  private AlignmentI lastmatchedAl = null;
+
+  private SequenceIdMatcher matcher = null;
+
+  /**
+   * clear any temporary handles used to speed up ID matching
+   */
+  private void resetMatcher()
   {
-    if (sf.getDescription() == null)
+    lastmatchedAl = null;
+    matcher = null;
+  }
+
+  private SequenceI findName(AlignmentI align, String seqId,
+          boolean relaxedIdMatching)
+  {
+    SequenceI match = null;
+    if (relaxedIdMatching)
     {
-      return;
+      if (lastmatchedAl != align)
+      {
+        matcher = new SequenceIdMatcher(
+                (lastmatchedAl = align).getSequencesArray());
+      }
+      match = matcher.findIdMatch(seqId);
     }
-
-    if (removeHTML
-            && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)
+    else
     {
-      removeHTML = false;
+      match = align.findName(seqId, true);
     }
+    return match;
+  }
 
-    StringBuffer sb = new StringBuffer();
-    StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");
-    String token, link;
-    int startTag;
-    String tag = null;
-    while (st.hasMoreElements())
+  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
+  {
+    if (sf.getDescription() == null)
     {
-      token = st.nextToken("&>");
-      if (token.equalsIgnoreCase("html") || token.startsWith("/"))
-      {
-        continue;
-      }
-
-      tag = null;
-      startTag = token.indexOf("<");
-
-      if (startTag > -1)
-      {
-        tag = token.substring(startTag + 1);
-        token = token.substring(0, startTag);
-      }
-
-      if (tag != null && tag.toUpperCase().startsWith("A HREF="))
-      {
-        if (token.length() > 0)
-        {
-          sb.append(token);
-        }
-        link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
-        String label = st.nextToken("<>");
-        sf.addLink(label + "|" + link);
-        sb.append(label + "%LINK%");
-      }
-      else if (tag != null && tag.equalsIgnoreCase("br"))
-      {
-        sb.append(newline);
-      }
-      else if (token.startsWith("lt;"))
-      {
-        sb.append("<" + token.substring(3));
-      }
-      else if (token.startsWith("gt;"))
-      {
-        sb.append(">" + token.substring(3));
-      }
-      else if (token.startsWith("amp;"))
-      {
-        sb.append("&" + token.substring(4));
-      }
-      else
-      {
-        sb.append(token);
-      }
+      return;
     }
+    jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);
 
-    if (removeHTML)
+    sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;
+    for (String link:parsed.getLinks())
     {
-      sf.description = sb.toString();
+      sf.addLink(link);
     }
 
   }
@@ -929,19 +938,18 @@ public class FeaturesFile extends AlignFile
           out.append(source);
           out.append("\t");
           out.append(next[j].type);
-          out.append( "\t");
-          out.append(  next[j].begin );
           out.append("\t");
-          out.append(
-                  next[j].end);
-          out.append( "\t");
-          out.append( next[j].score);
-          out.append( "\t");
+          out.append(next[j].begin);
+          out.append("\t");
+          out.append(next[j].end);
+          out.append("\t");
+          out.append(next[j].score);
+          out.append("\t");
 
           if (next[j].getValue("STRAND") != null)
           {
             out.append(next[j].getValue("STRAND"));
-            out.append( "\t");
+            out.append("\t");
           }
           else
           {
@@ -956,7 +964,8 @@ public class FeaturesFile extends AlignFile
           {
             out.append(".");
           }
-          // TODO: verify/check GFF - should there be a /t here before attribute output ?
+          // TODO: verify/check GFF - should there be a /t here before attribute
+          // output ?
 
           if (next[j].getValue("ATTRIBUTES") != null)
           {
@@ -989,4 +998,5 @@ public class FeaturesFile extends AlignFile
   {
     return "USE printGFFFormat() or printJalviewFormat()";
   }
+
 }