X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FParseHtmlBodyAndLinks.java;h=0f2d01d0b7618405e1f51ad5da68d1f4a5993856;hb=9d2408483e451285fd555c3cd6e0273977acbaa7;hp=7aec22d84cfa06b1605f90a2680850241d3065a7;hpb=be32c14cd8e48fe0a207cd7030cb9cd46f894678;p=jalview.git diff --git a/src/jalview/util/ParseHtmlBodyAndLinks.java b/src/jalview/util/ParseHtmlBodyAndLinks.java index 7aec22d..0f2d01d 100644 --- a/src/jalview/util/ParseHtmlBodyAndLinks.java +++ b/src/jalview/util/ParseHtmlBodyAndLinks.java @@ -20,7 +20,10 @@ */ package jalview.util; +import java.util.Locale; + import java.util.ArrayList; +import java.util.List; import java.util.StringTokenizer; import java.util.regex.Pattern; @@ -32,7 +35,8 @@ import java.util.regex.Pattern; */ public class ParseHtmlBodyAndLinks { - private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern.compile("<"); + private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern + .compile("<"); String orig = null; @@ -51,9 +55,9 @@ public class ParseHtmlBodyAndLinks return htmlContent; } - ArrayList links = new ArrayList(); + List links = new ArrayList(); - StringBuffer sb = new StringBuffer(); + String content; /** * result of parsing description - with or without HTML tags @@ -63,7 +67,7 @@ public class ParseHtmlBodyAndLinks public String getContent() { - return sb.toString(); + return content; } /** @@ -71,12 +75,19 @@ public class ParseHtmlBodyAndLinks * * @return */ - public ArrayList getLinks() + public List getLinks() { return links; } /** + * Parses the given html and + *
    + *
  • extracts any 'href' links to a list of "displayName|url" strings, + * retrievable by #getLinks
  • + *
  • extracts the remaining text (with %LINK% placeholders replacing hrefs), + * retrievable by #getContent
  • + *
* * @param description * - html or text content to be parsed @@ -93,7 +104,8 @@ public class ParseHtmlBodyAndLinks htmlContent = false; return; } - if (description.toUpperCase().indexOf("") == -1) + StringBuilder sb = new StringBuilder(description.length()); + if (description.toUpperCase(Locale.ROOT).indexOf("") == -1) { htmlContent = false; } @@ -104,7 +116,7 @@ public class ParseHtmlBodyAndLinks String tag = null; while (st.hasMoreElements()) { - token = st.nextToken("&>"); + token = st.nextToken(">"); if (token.equalsIgnoreCase("html") || token.startsWith("/")) { continue; @@ -119,7 +131,7 @@ public class ParseHtmlBodyAndLinks token = token.substring(0, startTag); } - if (tag != null && tag.toUpperCase().startsWith("A HREF=")) + if (tag != null && tag.toUpperCase(Locale.ROOT).startsWith("A HREF=")) { if (token.length() > 0) { @@ -134,18 +146,6 @@ public class ParseHtmlBodyAndLinks { sb.append(newline); } - else if (token.startsWith("lt;")) - { - sb.append("<" + token.substring(3)); - } - else if (token.startsWith("gt;")) - { - sb.append(">" + token.substring(3)); - } - else if (token.startsWith("amp;")) - { - sb.append("&" + token.substring(4)); - } else { sb.append(token); @@ -155,11 +155,18 @@ public class ParseHtmlBodyAndLinks { // instead of parsing the html into plaintext // clean the description ready for embedding in html - sb = new StringBuffer(LEFT_ANGLE_BRACKET_PATTERN.matcher(description) + sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN.matcher(description) .replaceAll("<")); - } + content = translateEntities(sb.toString()); + } + private String translateEntities(String s) + { + s = s.replaceAll("&", "&"); + s = s.replaceAll("<", "<"); + s = s.replaceAll(">", ">"); + return s; } /** @@ -170,7 +177,7 @@ public class ParseHtmlBodyAndLinks */ public String getNonHtmlContent() { - return isHtmlContent() ? sb.toString() : orig; + return isHtmlContent() ? content : orig; } }