X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FParseHtmlBodyAndLinks.java;h=f1b83b8d79323df6eb6aae0dfd0e5e7ec1ed7644;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=22bae142e78db618ba9c55681622037d7a2483f3;hpb=b57a02c25e335d033c97f8a6bacd6b54f62bd2b6;p=jalview.git diff --git a/src/jalview/util/ParseHtmlBodyAndLinks.java b/src/jalview/util/ParseHtmlBodyAndLinks.java index 22bae14..f1b83b8 100644 --- a/src/jalview/util/ParseHtmlBodyAndLinks.java +++ b/src/jalview/util/ParseHtmlBodyAndLinks.java @@ -1,23 +1,27 @@ -/******************************************************************************* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle - * +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . - *******************************************************************************/ + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.util; import java.util.ArrayList; +import java.util.List; import java.util.StringTokenizer; import java.util.regex.Pattern; @@ -29,6 +33,9 @@ import java.util.regex.Pattern; */ public class ParseHtmlBodyAndLinks { + private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern + .compile("<"); + String orig = null; public String getOrig() @@ -46,9 +53,9 @@ public class ParseHtmlBodyAndLinks return htmlContent; } - ArrayList links = new ArrayList(); + List links = new ArrayList(); - StringBuffer sb = new StringBuffer(); + String content; /** * result of parsing description - with or without HTML tags @@ -58,7 +65,7 @@ public class ParseHtmlBodyAndLinks public String getContent() { - return sb.toString(); + return content; } /** @@ -66,12 +73,19 @@ public class ParseHtmlBodyAndLinks * * @return */ - public ArrayList getLinks() + public List getLinks() { return links; } /** + * Parses the given html and + *
    + *
  • extracts any 'href' links to a list of "displayName|url" strings, + * retrievable by #getLinks
  • + *
  • extracts the remaining text (with %LINK% placeholders replacing hrefs), + * retrievable by #getContent
  • + *
* * @param description * - html or text content to be parsed @@ -83,6 +97,7 @@ public class ParseHtmlBodyAndLinks public ParseHtmlBodyAndLinks(String description, boolean removeHTML, String newline) { + StringBuilder sb = new StringBuilder(description.length()); if (description == null || description.length() == 0) { htmlContent = false; @@ -99,7 +114,7 @@ public class ParseHtmlBodyAndLinks String tag = null; while (st.hasMoreElements()) { - token = st.nextToken("&>"); + token = st.nextToken(">"); if (token.equalsIgnoreCase("html") || token.startsWith("/")) { continue; @@ -129,18 +144,6 @@ public class ParseHtmlBodyAndLinks { sb.append(newline); } - else if (token.startsWith("lt;")) - { - sb.append("<" + token.substring(3)); - } - else if (token.startsWith("gt;")) - { - sb.append(">" + token.substring(3)); - } - else if (token.startsWith("amp;")) - { - sb.append("&" + token.substring(4)); - } else { sb.append(token); @@ -150,11 +153,18 @@ public class ParseHtmlBodyAndLinks { // instead of parsing the html into plaintext // clean the description ready for embedding in html - sb = new StringBuffer(Pattern.compile("<").matcher(description) - .replaceAll("<")); - + sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN + .matcher(description).replaceAll("<")); } + content = translateEntities(sb.toString()); + } + private String translateEntities(String s) + { + s = s.replaceAll("&", "&"); + s = s.replaceAll("<", "<"); + s = s.replaceAll(">", ">"); + return s; } /** @@ -165,7 +175,7 @@ public class ParseHtmlBodyAndLinks */ public String getNonHtmlContent() { - return isHtmlContent() ? sb.toString() : orig; + return isHtmlContent() ? content : orig; } }