X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FStringUtils.java;h=89bc36d65d72904132bc833a3b62c83130a5e5a9;hb=HEAD;hp=b5ab40df65c9c121000b017044ee344eb3e84b0d;hpb=37de9310bec3501cbc6381e0c3dcb282fcaad812;p=jalview.git diff --git a/src/jalview/util/StringUtils.java b/src/jalview/util/StringUtils.java index b5ab40d..89bc36d 100644 --- a/src/jalview/util/StringUtils.java +++ b/src/jalview/util/StringUtils.java @@ -20,8 +20,12 @@ */ package jalview.util; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import java.util.Locale; +import java.util.regex.Matcher; import java.util.regex.Pattern; public class StringUtils @@ -29,8 +33,16 @@ public class StringUtils private static final Pattern DELIMITERS_PATTERN = Pattern .compile(".*='[^']*(?!')"); + private static final char PERCENT = '%'; + private static final boolean DEBUG = false; + /* + * URL encoded characters, indexed by char value + * e.g. urlEncodings['='] = urlEncodings[61] = "%3D" + */ + private static String[] urlEncodings = new String[255]; + /** * Returns a new character array, after inserting characters into the given * character array. @@ -138,14 +150,15 @@ public class StringUtils * @param delimiter * @return elements separated by separator */ - public static String[] separatorListToArray(String input, String delimiter) + public static String[] separatorListToArray(String input, + String delimiter) { int seplen = delimiter.length(); if (input == null || input.equals("") || input.equals(delimiter)) { return null; } - List jv = new ArrayList(); + List jv = new ArrayList<>(); int cp = 0, pos, escape; boolean wasescaped = false, wasquoted = false; String lstitem = null; @@ -155,9 +168,8 @@ public class StringUtils if (wasescaped || wasquoted) { // append to previous pos - jv.set(jv.size() - 1, - lstitem = lstitem + delimiter - + input.substring(cp, pos + escape)); + jv.set(jv.size() - 1, lstitem = lstitem + delimiter + + input.substring(cp, pos + escape)); } else { @@ -190,19 +202,19 @@ public class StringUtils jv.clear(); if (DEBUG) { - System.err.println("Array from '" + delimiter + jalview.bin.Console.errPrintln("Array from '" + delimiter + "' separated List:\n" + v.length); for (int i = 0; i < v.length; i++) { - System.err.println("item " + i + " '" + v[i] + "'"); + jalview.bin.Console.errPrintln("item " + i + " '" + v[i] + "'"); } } return v; } if (DEBUG) { - System.err.println("Empty Array from '" + delimiter - + "' separated List"); + jalview.bin.Console.errPrintln( + "Empty Array from '" + delimiter + "' separated List"); } return null; } @@ -235,16 +247,16 @@ public class StringUtils } if (DEBUG) { - System.err.println("Returning '" + separator - + "' separated List:\n"); - System.err.println(v); + System.err + .println("Returning '" + separator + "' separated List:\n"); + jalview.bin.Console.errPrintln(v); } return v.toString(); } if (DEBUG) { - System.err.println("Returning empty '" + separator - + "' separated List\n"); + jalview.bin.Console.errPrintln( + "Returning empty '" + separator + "' separated List\n"); } return "" + separator; } @@ -364,8 +376,8 @@ public class StringUtils } } catch (NumberFormatException e) { - System.err.println("Invalid version format found: " - + e.getMessage()); + System.err + .println("Invalid version format found: " + e.getMessage()); return 0; } } @@ -399,8 +411,237 @@ public class StringUtils } if (s.length() <= 1) { - return s.toUpperCase(); + return s.toUpperCase(Locale.ROOT); + } + return s.substring(0, 1).toUpperCase(Locale.ROOT) + + s.substring(1).toLowerCase(Locale.ROOT); + } + + /** + * A helper method that strips off any leading or trailing html and body tags. + * If no html tag is found, then also html-encodes angle bracket characters. + * + * @param text + * @return + */ + public static String stripHtmlTags(String text) + { + if (text == null) + { + return null; + } + String tmp2up = text.toUpperCase(Locale.ROOT); + int startTag = tmp2up.indexOf(""); + if (startTag > -1) + { + text = text.substring(startTag + 6); + tmp2up = tmp2up.substring(startTag + 6); + } + // is omission of "" intentional here?? + int endTag = tmp2up.indexOf(""); + if (endTag > -1) + { + text = text.substring(0, endTag); + tmp2up = tmp2up.substring(0, endTag); + } + endTag = tmp2up.indexOf(""); + if (endTag > -1) + { + text = text.substring(0, endTag); + } + + if (startTag == -1 && (text.contains("<") || text.contains(">"))) + { + text = text.replaceAll("<", "<"); + text = text.replaceAll(">", ">"); + } + return text; + } + + /** + * Answers the input string with any occurrences of the 'encodeable' + * characters replaced by their URL encoding + * + * @param s + * @param encodable + * @return + */ + public static String urlEncode(String s, String encodable) + { + if (s == null || s.isEmpty()) + { + return s; + } + + /* + * do % encoding first, as otherwise it may double-encode! + */ + if (encodable.indexOf(PERCENT) != -1) + { + s = urlEncode(s, PERCENT); + } + + for (char c : encodable.toCharArray()) + { + if (c != PERCENT) + { + s = urlEncode(s, c); + } + } + return s; + } + + /** + * Answers the input string with any occurrences of {@code c} replaced with + * their url encoding. Answers the input string if it is unchanged. + * + * @param s + * @param c + * @return + */ + static String urlEncode(String s, char c) + { + String decoded = String.valueOf(c); + if (s.indexOf(decoded) != -1) + { + String encoded = getUrlEncoding(c); + if (!encoded.equals(decoded)) + { + s = s.replace(decoded, encoded); + } + } + return s; + } + + /** + * Answers the input string with any occurrences of the specified (unencoded) + * characters replaced by their URL decoding. + *

+ * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer + * {@code "a=b;c"}. + * + * @param s + * @param encodable + * @return + */ + public static String urlDecode(String s, String encodable) + { + if (s == null || s.isEmpty()) + { + return s; + } + + for (char c : encodable.toCharArray()) + { + String encoded = getUrlEncoding(c); + if (s.indexOf(encoded) != -1) + { + String decoded = String.valueOf(c); + s = s.replace(encoded, decoded); + } + } + return s; + } + + /** + * Does a lazy lookup of the url encoding of the given character, saving the + * value for repeat lookups + * + * @param c + * @return + */ + private static String getUrlEncoding(char c) + { + if (c < 0 || c >= urlEncodings.length) + { + return String.valueOf(c); + } + + String enc = urlEncodings[c]; + if (enc == null) + { + try + { + enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c), + "UTF-8"); + } catch (UnsupportedEncodingException e) + { + enc = urlEncodings[c] = String.valueOf(c); + } + } + return enc; + } + + public static int firstCharPosIgnoreCase(String text, String chars) + { + int min = text.length() + 1; + for (char c : chars.toLowerCase(Locale.ROOT).toCharArray()) + { + int i = text.toLowerCase(Locale.ROOT).indexOf(c); + if (0 <= i && i < min) + { + min = i; + } + } + return min < text.length() + 1 ? min : -1; + } + + public static boolean equalsIgnoreCase(String s1, String s2) + { + if (s1 == null || s2 == null) + { + return s1 == s2; + } + return s1.toLowerCase(Locale.ROOT).equals(s2.toLowerCase(Locale.ROOT)); + } + + public static int indexOfFirstWhitespace(String text) + { + int index = -1; + Pattern pat = Pattern.compile("\\s"); + Matcher m = pat.matcher(text); + if (m.find()) + { + index = m.start(); + } + return index; + } + + /* + * implementation of String.replaceLast. + * Replaces only the last occurrence of toReplace in string with replacement. + */ + public static String replaceLast(String string, String toReplace, + String replacement) + { + int pos = string.lastIndexOf(toReplace); + if (pos > -1) + { + return new StringBuilder().append(string.substring(0, pos)) + .append(replacement) + .append(string.substring(pos + toReplace.length())) + .toString(); + } + else + { + return string; + } + + } + + /* + * return the maximum length of a List of Strings + */ + public static int maxLength(List l) + { + int max = 0; + for (String s : l) + { + if (s == null) + continue; + if (s.length() > max) + max = s.length(); } - return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); + return max; } }