}
/**
- * Returns the last part of 'input' after the last occurrence of 'token'. For
- * example to extract only the filename from a full path or URL.
- *
- * @param input
- * @param token
- * a delimiter which must be in regular expression format
- * @return
- */
- public static String getLastToken(String input, String token)
- {
- if (input == null)
- {
- return null;
- }
- if (token == null)
- {
- return input;
- }
- String[] st = input.split(token);
- return st[st.length - 1];
- }
-
- /**
* Parses the input string into components separated by the delimiter. Unlike
* String.split(), this method will ignore occurrences of the delimiter which
* are nested within single quotes in name-value pair values, e.g. a='b,c'.
{
return null;
}
- List<String> jv = new ArrayList<String>();
+ List<String> jv = new ArrayList<>();
int cp = 0, pos, escape;
boolean wasescaped = false, wasquoted = false;
String lstitem = null;
}
return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
}
+
+ /**
+ * A helper method that strips off any leading or trailing html and body tags.
+ * If no html tag is found, then also html-encodes angle bracket characters.
+ *
+ * @param text
+ * @return
+ */
+ public static String stripHtmlTags(String text)
+ {
+ if (text == null)
+ {
+ return null;
+ }
+ String tmp2up = text.toUpperCase();
+ int startTag = tmp2up.indexOf("<HTML>");
+ if (startTag > -1)
+ {
+ text = text.substring(startTag + 6);
+ tmp2up = tmp2up.substring(startTag + 6);
+ }
+ // is omission of "<BODY>" intentional here??
+ int endTag = tmp2up.indexOf("</BODY>");
+ if (endTag > -1)
+ {
+ text = text.substring(0, endTag);
+ tmp2up = tmp2up.substring(0, endTag);
+ }
+ endTag = tmp2up.indexOf("</HTML>");
+ if (endTag > -1)
+ {
+ text = text.substring(0, endTag);
+ }
+
+ if (startTag == -1 && (text.contains("<") || text.contains(">")))
+ {
+ text = text.replaceAll("<", "<");
+ text = text.replaceAll(">", ">");
+ }
+ return text;
+ }
}