X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FParseHtmlBodyAndLinks.java;h=f1b83b8d79323df6eb6aae0dfd0e5e7ec1ed7644;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=22bae142e78db618ba9c55681622037d7a2483f3;hpb=b57a02c25e335d033c97f8a6bacd6b54f62bd2b6;p=jalview.git

diff --git a/src/jalview/util/ParseHtmlBodyAndLinks.java b/src/jalview/util/ParseHtmlBodyAndLinks.java
index 22bae14..f1b83b8 100644
--- a/src/jalview/util/ParseHtmlBodyAndLinks.java
+++ b/src/jalview/util/ParseHtmlBodyAndLinks.java
@@ -1,23 +1,27 @@
-/*******************************************************************************
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
- * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
- *
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
  * This file is part of Jalview.
- *
+ * 
  * Jalview is free software: you can redistribute it and/or
  * modify it under the terms of the GNU General Public License 
- * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
- *
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
  * Jalview is distributed in the hope that it will be useful, but 
  * WITHOUT ANY WARRANTY; without even the implied warranty 
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
  * PURPOSE.  See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
- *******************************************************************************/
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.util;
 
 import java.util.ArrayList;
+import java.util.List;
 import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 
@@ -29,6 +33,9 @@ import java.util.regex.Pattern;
  */
 public class ParseHtmlBodyAndLinks
 {
+  private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern
+          .compile("<");
+
   String orig = null;
 
   public String getOrig()
@@ -46,9 +53,9 @@ public class ParseHtmlBodyAndLinks
     return htmlContent;
   }
 
-  ArrayList<String> links = new ArrayList<String>();
+  List<String> links = new ArrayList<String>();
 
-  StringBuffer sb = new StringBuffer();
+  String content;
 
   /**
    * result of parsing description - with or without HTML tags
@@ -58,7 +65,7 @@ public class ParseHtmlBodyAndLinks
   public String getContent()
   {
 
-    return sb.toString();
+    return content;
   }
 
   /**
@@ -66,12 +73,19 @@ public class ParseHtmlBodyAndLinks
    * 
    * @return
    */
-  public ArrayList<String> getLinks()
+  public List<String> getLinks()
   {
     return links;
   }
 
   /**
+   * Parses the given html and
+   * <ul>
+   * <li>extracts any 'href' links to a list of "displayName|url" strings,
+   * retrievable by #getLinks</li>
+   * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
+   * retrievable by #getContent</li>
+   * </ul>
    * 
    * @param description
    *          - html or text content to be parsed
@@ -83,6 +97,7 @@ public class ParseHtmlBodyAndLinks
   public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
           String newline)
   {
+    StringBuilder sb = new StringBuilder(description.length());
     if (description == null || description.length() == 0)
     {
       htmlContent = false;
@@ -99,7 +114,7 @@ public class ParseHtmlBodyAndLinks
     String tag = null;
     while (st.hasMoreElements())
     {
-      token = st.nextToken("&>");
+      token = st.nextToken(">");
       if (token.equalsIgnoreCase("html") || token.startsWith("/"))
       {
         continue;
@@ -129,18 +144,6 @@ public class ParseHtmlBodyAndLinks
       {
         sb.append(newline);
       }
-      else if (token.startsWith("lt;"))
-      {
-        sb.append("<" + token.substring(3));
-      }
-      else if (token.startsWith("gt;"))
-      {
-        sb.append(">" + token.substring(3));
-      }
-      else if (token.startsWith("amp;"))
-      {
-        sb.append("&" + token.substring(4));
-      }
       else
       {
         sb.append(token);
@@ -150,11 +153,18 @@ public class ParseHtmlBodyAndLinks
     {
       // instead of parsing the html into plaintext
       // clean the description ready for embedding in html
-      sb = new StringBuffer(Pattern.compile("<").matcher(description)
-              .replaceAll("&lt;"));
-
+      sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN
+              .matcher(description).replaceAll("&lt;"));
     }
+    content = translateEntities(sb.toString());
+  }
 
+  private String translateEntities(String s)
+  {
+    s = s.replaceAll("&amp;", "&");
+    s = s.replaceAll("&lt;", "<");
+    s = s.replaceAll("&gt;", ">");
+    return s;
   }
 
   /**
@@ -165,7 +175,7 @@ public class ParseHtmlBodyAndLinks
    */
   public String getNonHtmlContent()
   {
-    return isHtmlContent() ? sb.toString() : orig;
+    return isHtmlContent() ? content : orig;
   }
 
 }