X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FParseHtmlBodyAndLinks.java;h=0f2d01d0b7618405e1f51ad5da68d1f4a5993856;hb=41b0e9331ac71787c1280aa1d809f54c575fbf97;hp=5cb0a465fefa24e7c83744a44dc573962fd30519;hpb=193d05af3d6716a7e7652f208d41a016a0039396;p=jalview.git

diff --git a/src/jalview/util/ParseHtmlBodyAndLinks.java b/src/jalview/util/ParseHtmlBodyAndLinks.java
index 5cb0a46..0f2d01d 100644
--- a/src/jalview/util/ParseHtmlBodyAndLinks.java
+++ b/src/jalview/util/ParseHtmlBodyAndLinks.java
@@ -1,66 +1,111 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.util;
 
+import java.util.Locale;
+
 import java.util.ArrayList;
+import java.util.List;
 import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 
-
 /**
  * utility class for dealing with HTML link extraction
+ * 
  * @author jprocter
- *
+ * 
  */
 public class ParseHtmlBodyAndLinks
+{
+  private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern
+          .compile("<");
+
+  String orig = null;
+
+  public String getOrig()
   {
-    String orig=null;
-    public String getOrig()
-    {
-      return orig;
-    }
-    boolean htmlContent=true;
-    /**
-     * @return true if the content looked like HTML
+    return orig;
+  }
 
-     */
-    public boolean isHtmlContent()
-    {
-      return htmlContent;
-    }
+  boolean htmlContent = true;
 
-    ArrayList<String> links=new ArrayList<String>();
-    StringBuffer sb = new StringBuffer();
-    /**
-     * result of parsing description - with or without HTML tags
-     * @return
-     */
-    public String getContent()
-    {
-      
-      return sb.toString();
-    }
-    /**
-     * list of Label|Link encoded URL links extracted from HTML 
-     * @return
-     */
-    public ArrayList<String> getLinks() {
-      return links;
-    }
+  /**
+   * @return true if the content looked like HTML
+   */
+  public boolean isHtmlContent()
+  {
+    return htmlContent;
+  }
+
+  List<String> links = new ArrayList<String>();
+
+  String content;
 
-    /**
-     * 
-     * @param description - html or text content to be parsed
-     * @param removeHTML flag to indicate if HTML tags should be removed if they are present.
-     * @param newline
-     */
-    public ParseHtmlBodyAndLinks(String description,
-          boolean removeHTML, String newline)
+  /**
+   * result of parsing description - with or without HTML tags
+   * 
+   * @return
+   */
+  public String getContent()
+  {
+
+    return content;
+  }
+
+  /**
+   * list of Label|Link encoded URL links extracted from HTML
+   * 
+   * @return
+   */
+  public List<String> getLinks()
+  {
+    return links;
+  }
+
+  /**
+   * Parses the given html and
+   * <ul>
+   * <li>extracts any 'href' links to a list of "displayName|url" strings,
+   * retrievable by #getLinks</li>
+   * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
+   * retrievable by #getContent</li>
+   * </ul>
+   * 
+   * @param description
+   *          - html or text content to be parsed
+   * @param removeHTML
+   *          flag to indicate if HTML tags should be removed if they are
+   *          present.
+   * @param newline
+   */
+  public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
+          String newline)
+  {
+    if (description == null || description.length() == 0)
     {
-      if (description==null || description.length()==0)
-      {
-        htmlContent=false;
-        return;
-      }
-    if (description.toUpperCase().indexOf("<HTML>") == -1)
+      htmlContent = false;
+      return;
+    }
+    StringBuilder sb = new StringBuilder(description.length());
+    if (description.toUpperCase(Locale.ROOT).indexOf("<HTML>") == -1)
     {
       htmlContent = false;
     }
@@ -71,7 +116,7 @@ public class ParseHtmlBodyAndLinks
     String tag = null;
     while (st.hasMoreElements())
     {
-      token = st.nextToken("&>");
+      token = st.nextToken(">");
       if (token.equalsIgnoreCase("html") || token.startsWith("/"))
       {
         continue;
@@ -86,7 +131,7 @@ public class ParseHtmlBodyAndLinks
         token = token.substring(0, startTag);
       }
 
-      if (tag != null && tag.toUpperCase().startsWith("A HREF="))
+      if (tag != null && tag.toUpperCase(Locale.ROOT).startsWith("A HREF="))
       {
         if (token.length() > 0)
         {
@@ -101,18 +146,6 @@ public class ParseHtmlBodyAndLinks
       {
         sb.append(newline);
       }
-      else if (token.startsWith("lt;"))
-      {
-        sb.append("<" + token.substring(3));
-      }
-      else if (token.startsWith("gt;"))
-      {
-        sb.append(">" + token.substring(3));
-      }
-      else if (token.startsWith("amp;"))
-      {
-        sb.append("&" + token.substring(4));
-      }
       else
       {
         sb.append(token);
@@ -122,18 +155,29 @@ public class ParseHtmlBodyAndLinks
     {
       // instead of parsing the html into plaintext
       // clean the description ready for embedding in html
-      sb = new StringBuffer(Pattern.compile("<").matcher(description).replaceAll("&lt;"));        
-      
+      sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN.matcher(description)
+              .replaceAll("&lt;"));
     }
-    
+    content = translateEntities(sb.toString());
+  }
+
+  private String translateEntities(String s)
+  {
+    s = s.replaceAll("&amp;", "&");
+    s = s.replaceAll("&lt;", "<");
+    s = s.replaceAll("&gt;", ">");
+    return s;
+  }
+
+  /**
+   * get either the parsed content or the original, depending on whether the
+   * original looked like html content or not.
+   * 
+   * @return
+   */
+  public String getNonHtmlContent()
+  {
+    return isHtmlContent() ? content : orig;
   }
-    /**
-     * get either the parsed content or the original, depending on whether the original looked like html content or not.
-     * @return
-     */
-    public String getNonHtmlContent()
-    {
-      return isHtmlContent() ? sb.toString() : orig;
-    }
 
 }