From: gmungoc Date: Thu, 24 Sep 2015 15:25:24 +0000 (+0100) Subject: JAL-1905 parse url including & correctly X-Git-Tag: Release_2_10_0~387^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=5205a25be39fbf7747ef33c30662d3de81c52ba9;p=jalview.git JAL-1905 parse url including & correctly --- diff --git a/src/jalview/util/ParseHtmlBodyAndLinks.java b/src/jalview/util/ParseHtmlBodyAndLinks.java index 5263454..f1b83b8 100644 --- a/src/jalview/util/ParseHtmlBodyAndLinks.java +++ b/src/jalview/util/ParseHtmlBodyAndLinks.java @@ -21,6 +21,7 @@ package jalview.util; import java.util.ArrayList; +import java.util.List; import java.util.StringTokenizer; import java.util.regex.Pattern; @@ -52,9 +53,9 @@ public class ParseHtmlBodyAndLinks return htmlContent; } - ArrayList links = new ArrayList(); + List links = new ArrayList(); - StringBuffer sb = new StringBuffer(); + String content; /** * result of parsing description - with or without HTML tags @@ -64,7 +65,7 @@ public class ParseHtmlBodyAndLinks public String getContent() { - return sb.toString(); + return content; } /** @@ -72,12 +73,19 @@ public class ParseHtmlBodyAndLinks * * @return */ - public ArrayList getLinks() + public List getLinks() { return links; } /** + * Parses the given html and + *
    + *
  • extracts any 'href' links to a list of "displayName|url" strings, + * retrievable by #getLinks
  • + *
  • extracts the remaining text (with %LINK% placeholders replacing hrefs), + * retrievable by #getContent
  • + *
* * @param description * - html or text content to be parsed @@ -89,6 +97,7 @@ public class ParseHtmlBodyAndLinks public ParseHtmlBodyAndLinks(String description, boolean removeHTML, String newline) { + StringBuilder sb = new StringBuilder(description.length()); if (description == null || description.length() == 0) { htmlContent = false; @@ -105,7 +114,7 @@ public class ParseHtmlBodyAndLinks String tag = null; while (st.hasMoreElements()) { - token = st.nextToken("&>"); + token = st.nextToken(">"); if (token.equalsIgnoreCase("html") || token.startsWith("/")) { continue; @@ -135,18 +144,6 @@ public class ParseHtmlBodyAndLinks { sb.append(newline); } - else if (token.startsWith("lt;")) - { - sb.append("<" + token.substring(3)); - } - else if (token.startsWith("gt;")) - { - sb.append(">" + token.substring(3)); - } - else if (token.startsWith("amp;")) - { - sb.append("&" + token.substring(4)); - } else { sb.append(token); @@ -156,11 +153,18 @@ public class ParseHtmlBodyAndLinks { // instead of parsing the html into plaintext // clean the description ready for embedding in html - sb = new StringBuffer(LEFT_ANGLE_BRACKET_PATTERN.matcher(description) - .replaceAll("<")); - + sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN + .matcher(description).replaceAll("<")); } + content = translateEntities(sb.toString()); + } + private String translateEntities(String s) + { + s = s.replaceAll("&", "&"); + s = s.replaceAll("<", "<"); + s = s.replaceAll(">", ">"); + return s; } /** @@ -171,7 +175,7 @@ public class ParseHtmlBodyAndLinks */ public String getNonHtmlContent() { - return isHtmlContent() ? sb.toString() : orig; + return isHtmlContent() ? content : orig; } } diff --git a/test/jalview/util/ParseHtmlBodyAndLinksTest.java b/test/jalview/util/ParseHtmlBodyAndLinksTest.java new file mode 100644 index 0000000..5e8cd8c --- /dev/null +++ b/test/jalview/util/ParseHtmlBodyAndLinksTest.java @@ -0,0 +1,74 @@ +package jalview.util; + +import static org.testng.AssertJUnit.assertEquals; + +import org.testng.annotations.Test; + +public class ParseHtmlBodyAndLinksTest +{ + @Test(groups = { "Functional" }) + public void testParseHtml_noLinks() + { + ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks( + "something here", false, "\n"); + assertEquals("something here", testee.getContent()); + assertEquals("something here", testee.getNonHtmlContent()); + + // second argument makes no difference?? + testee = new ParseHtmlBodyAndLinks("something here", true, + "\n"); + assertEquals("something here", testee.getContent()); + assertEquals("something here", testee.getNonHtmlContent()); + } + + @Test(groups = { "Functional" }) + public void testParseHtml_withLinks() + { + ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks( + "Please click on this to learn more about this", + false, "\n"); + assertEquals( + "Please click on this%LINK% to learn more about this%LINK%", + testee.getContent()); + assertEquals( + "Please click on this%LINK% to learn more about this%LINK%", + testee.getNonHtmlContent()); + assertEquals(2, testee.getLinks().size()); + assertEquals("on this|http://www.nowhere.com", testee.getLinks().get(0)); + assertEquals("this|http://www.somewhere.com/here", testee.getLinks() + .get(1)); + } + + @Test(groups = { "Functional" }) + public void testParseHtml_withLinksWithParameters() + { + ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks( + "Please click on this to learn more", + false, "\n"); + assertEquals("Please click on this%LINK% to learn more", + testee.getContent()); + assertEquals("Please click on this%LINK% to learn more", + testee.getNonHtmlContent()); + assertEquals(1, testee.getLinks().size()); + assertEquals("on this|http://www.nowhere.com?id=234&taxon=human", + testee.getLinks().get(0)); + } + + @Test(groups = { "Functional" }) + public void testParseHtml_withLinksWithEncoding() + { + ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks( + "Please click on this to learn &<>more", + false, "\n"); + // html encoding in the text body is translated + assertEquals("Please click on this%LINK% to learn &<>more", + testee.getContent()); + assertEquals("Please click on this%LINK% to learn &<>more", + testee.getNonHtmlContent()); + assertEquals(1, testee.getLinks().size()); + // html encoding in the url links is not translated + assertEquals( + "on this|http://www.nowhere.com?id=234&taxon=human&id>3&id<10", + testee.getLinks().get(0)); + } +}