package jalview.util;
import java.util.ArrayList;
+import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
return htmlContent;
}
- ArrayList<String> links = new ArrayList<String>();
+ List<String> links = new ArrayList<String>();
- StringBuffer sb = new StringBuffer();
+ String content;
/**
* result of parsing description - with or without HTML tags
public String getContent()
{
- return sb.toString();
+ return content;
}
/**
*
* @return
*/
- public ArrayList<String> getLinks()
+ public List<String> getLinks()
{
return links;
}
/**
+ * Parses the given html and
+ * <ul>
+ * <li>extracts any 'href' links to a list of "displayName|url" strings,
+ * retrievable by #getLinks</li>
+ * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
+ * retrievable by #getContent</li>
+ * </ul>
*
* @param description
* - html or text content to be parsed
public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
String newline)
{
+ StringBuilder sb = new StringBuilder(description.length());
if (description == null || description.length() == 0)
{
htmlContent = false;
String tag = null;
while (st.hasMoreElements())
{
- token = st.nextToken("&>");
+ token = st.nextToken(">");
if (token.equalsIgnoreCase("html") || token.startsWith("/"))
{
continue;
{
sb.append(newline);
}
- else if (token.startsWith("lt;"))
- {
- sb.append("<" + token.substring(3));
- }
- else if (token.startsWith("gt;"))
- {
- sb.append(">" + token.substring(3));
- }
- else if (token.startsWith("amp;"))
- {
- sb.append("&" + token.substring(4));
- }
else
{
sb.append(token);
{
// instead of parsing the html into plaintext
// clean the description ready for embedding in html
- sb = new StringBuffer(LEFT_ANGLE_BRACKET_PATTERN.matcher(description)
- .replaceAll("<"));
-
+ sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN
+ .matcher(description).replaceAll("<"));
}
+ content = translateEntities(sb.toString());
+ }
+ private String translateEntities(String s)
+ {
+ s = s.replaceAll("&", "&");
+ s = s.replaceAll("<", "<");
+ s = s.replaceAll(">", ">");
+ return s;
}
/**
*/
public String getNonHtmlContent()
{
- return isHtmlContent() ? sb.toString() : orig;
+ return isHtmlContent() ? content : orig;
}
}
--- /dev/null
+package jalview.util;
+
+import static org.testng.AssertJUnit.assertEquals;
+
+import org.testng.annotations.Test;
+
+public class ParseHtmlBodyAndLinksTest
+{
+ @Test(groups = { "Functional" })
+ public void testParseHtml_noLinks()
+ {
+ ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks(
+ "<html>something here</html>", false, "\n");
+ assertEquals("something here", testee.getContent());
+ assertEquals("something here", testee.getNonHtmlContent());
+
+ // second argument makes no difference??
+ testee = new ParseHtmlBodyAndLinks("<html>something here</html>", true,
+ "\n");
+ assertEquals("something here", testee.getContent());
+ assertEquals("something here", testee.getNonHtmlContent());
+ }
+
+ @Test(groups = { "Functional" })
+ public void testParseHtml_withLinks()
+ {
+ ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks(
+ "<html>Please click <a href=\"http://www.nowhere.com\">on this</a> to learn more about <a href=\"http://www.somewhere.com/here\">this</a></html>",
+ false, "\n");
+ assertEquals(
+ "Please click on this%LINK% to learn more about this%LINK%",
+ testee.getContent());
+ assertEquals(
+ "Please click on this%LINK% to learn more about this%LINK%",
+ testee.getNonHtmlContent());
+ assertEquals(2, testee.getLinks().size());
+ assertEquals("on this|http://www.nowhere.com", testee.getLinks().get(0));
+ assertEquals("this|http://www.somewhere.com/here", testee.getLinks()
+ .get(1));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testParseHtml_withLinksWithParameters()
+ {
+ ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks(
+ "<html>Please click <a href=\"http://www.nowhere.com?id=234&taxon=human\">on this</a> to learn more</html>",
+ false, "\n");
+ assertEquals("Please click on this%LINK% to learn more",
+ testee.getContent());
+ assertEquals("Please click on this%LINK% to learn more",
+ testee.getNonHtmlContent());
+ assertEquals(1, testee.getLinks().size());
+ assertEquals("on this|http://www.nowhere.com?id=234&taxon=human",
+ testee.getLinks().get(0));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testParseHtml_withLinksWithEncoding()
+ {
+ ParseHtmlBodyAndLinks testee = new ParseHtmlBodyAndLinks(
+ "<html>Please click <a href=\"http://www.nowhere.com?id=234&taxon=human&id>3&id<10\">on this</a> to learn &<>more</html>",
+ false, "\n");
+ // html encoding in the text body is translated
+ assertEquals("Please click on this%LINK% to learn &<>more",
+ testee.getContent());
+ assertEquals("Please click on this%LINK% to learn &<>more",
+ testee.getNonHtmlContent());
+ assertEquals(1, testee.getLinks().size());
+ // html encoding in the url links is not translated
+ assertEquals(
+ "on this|http://www.nowhere.com?id=234&taxon=human&id>3&id<10",
+ testee.getLinks().get(0));
+ }
+}