*/
package jalview.util;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
-import java.util.regex.Pattern;
+import java.util.Locale;
public class StringUtils
{
- private static final Pattern DELIMITERS_PATTERN = Pattern
- .compile(".*='[^']*(?!')");
+ private static final char PERCENT = '%';
private static final boolean DEBUG = false;
+ /*
+ * URL encoded characters, indexed by char value
+ * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
+ */
+ private static String[] urlEncodings = new String[255];
+
/**
* Returns a new character array, after inserting characters into the given
* character array.
* Parses the input string into components separated by the delimiter. Unlike
* String.split(), this method will ignore occurrences of the delimiter which
* are nested within single quotes in name-value pair values, e.g. a='b,c'.
+ * New implementation to avoid Pattern for jalviewjs.
*
* @param input
* @param delimiter
public static String[] separatorListToArray(String input,
String delimiter)
{
- int seplen = delimiter.length();
- if (input == null || input.equals("") || input.equals(delimiter))
+ if (input == null
+ // these two shouldn't return null (one or two "" respectively)
+ || input.equals("") || input.equals(delimiter))
{
return null;
}
- List<String> jv = new ArrayList<String>();
- int cp = 0, pos, escape;
- boolean wasescaped = false, wasquoted = false;
- String lstitem = null;
- while ((pos = input.indexOf(delimiter, cp)) >= cp)
+
+ final char escapeChar = '\\';
+ final char quoteChar = '\'';
+ int ilength = input.length();
+ int dlength = delimiter.length();
+ List<String> values = new ArrayList<>();
+
+ boolean escape = false;
+ boolean inquote = false;
+
+ int start = 0;
+ for (int i = 0; i < ilength; i++)
{
- escape = (pos > 0 && input.charAt(pos - 1) == '\\') ? -1 : 0;
- if (wasescaped || wasquoted)
- {
- // append to previous pos
- jv.set(jv.size() - 1, lstitem = lstitem + delimiter
- + input.substring(cp, pos + escape));
- }
- else
+ if (!escape && !inquote && ilength >= i + dlength
+ && input.substring(i, i + dlength).equals(delimiter))
{
- jv.add(lstitem = input.substring(cp, pos + escape));
+ // found a delimiter
+ values.add(input.substring(start, i));
+ i += dlength;
+ start = i;
+ continue;
}
- cp = pos + seplen;
- wasescaped = escape == -1;
- // last separator may be in an unmatched quote
- wasquoted = DELIMITERS_PATTERN.matcher(lstitem).matches();
- }
- if (cp < input.length())
- {
- String c = input.substring(cp);
- if (wasescaped || wasquoted)
+ char c = input.charAt(i);
+ if (c == escapeChar)
{
- // append final separator
- jv.set(jv.size() - 1, lstitem + delimiter + c);
+ escape = !escape;
+ continue;
}
- else
+ if (escape)
{
- if (!c.equals(delimiter))
- {
- jv.add(c);
- }
+ escape = false;
+ continue;
}
- }
- if (jv.size() > 0)
- {
- String[] v = jv.toArray(new String[jv.size()]);
- jv.clear();
- if (DEBUG)
+ if (c == quoteChar)
{
- System.err.println("Array from '" + delimiter
- + "' separated List:\n" + v.length);
- for (int i = 0; i < v.length; i++)
- {
- System.err.println("item " + i + " '" + v[i] + "'");
- }
+ inquote = !inquote;
}
- return v;
}
- if (DEBUG)
- {
- System.err.println(
- "Empty Array from '" + delimiter + "' separated List");
- }
- return null;
+ // add the last value
+ values.add(input.substring(start, ilength));
+
+ return values.toArray(new String[values.size()]);
}
/**
{
System.err
.println("Returning '" + separator + "' separated List:\n");
- System.err.println(v);
+ jalview.bin.Console.errPrintln(v);
}
return v.toString();
}
if (DEBUG)
{
- System.err.println(
+ jalview.bin.Console.errPrintln(
"Returning empty '" + separator + "' separated List\n");
}
return "" + separator;
}
if (s.length() <= 1)
{
- return s.toUpperCase();
+ return s.toUpperCase(Locale.ROOT);
}
- return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
+ return s.substring(0, 1).toUpperCase(Locale.ROOT)
+ + s.substring(1).toLowerCase(Locale.ROOT);
}
/**
{
return null;
}
- String tmp2up = text.toUpperCase();
+ String tmp2up = text.toUpperCase(Locale.ROOT);
int startTag = tmp2up.indexOf("<HTML>");
if (startTag > -1)
{
{
text = text.substring(0, endTag);
}
-
+
if (startTag == -1 && (text.contains("<") || text.contains(">")))
{
text = text.replaceAll("<", "<");
}
return text;
}
+
+ /**
+ * Answers the input string with any occurrences of the 'encodeable'
+ * characters replaced by their URL encoding
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlEncode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ /*
+ * do % encoding first, as otherwise it may double-encode!
+ */
+ if (encodable.indexOf(PERCENT) != -1)
+ {
+ s = urlEncode(s, PERCENT);
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ if (c != PERCENT)
+ {
+ s = urlEncode(s, c);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of {@code c} replaced with
+ * their url encoding. Answers the input string if it is unchanged.
+ *
+ * @param s
+ * @param c
+ * @return
+ */
+ static String urlEncode(String s, char c)
+ {
+ String decoded = String.valueOf(c);
+ if (s.indexOf(decoded) != -1)
+ {
+ String encoded = getUrlEncoding(c);
+ if (!encoded.equals(decoded))
+ {
+ s = s.replace(decoded, encoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of the specified (unencoded)
+ * characters replaced by their URL decoding.
+ * <p>
+ * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
+ * {@code "a=b;c"}.
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlDecode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ String encoded = getUrlEncoding(c);
+ if (s.indexOf(encoded) != -1)
+ {
+ String decoded = String.valueOf(c);
+ s = s.replace(encoded, decoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Does a lazy lookup of the url encoding of the given character, saving the
+ * value for repeat lookups
+ *
+ * @param c
+ * @return
+ */
+ private static String getUrlEncoding(char c)
+ {
+ if (c < 0 || c >= urlEncodings.length)
+ {
+ return String.valueOf(c);
+ }
+
+ String enc = urlEncodings[c];
+ if (enc == null)
+ {
+ try
+ {
+ enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
+ "UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ enc = urlEncodings[c] = String.valueOf(c);
+ }
+ }
+ return enc;
+ }
+
+ public static int firstCharPosIgnoreCase(String text, String chars)
+ {
+ int min = text.length() + 1;
+ for (char c : chars.toLowerCase(Locale.ROOT).toCharArray())
+ {
+ int i = text.toLowerCase(Locale.ROOT).indexOf(c);
+ if (0 <= i && i < min)
+ {
+ min = i;
+ }
+ }
+ return min < text.length() + 1 ? min : -1;
+ }
+
+ public static boolean equalsIgnoreCase(String s1, String s2)
+ {
+ if (s1 == null || s2 == null)
+ {
+ return s1 == s2;
+ }
+ return s1.toLowerCase(Locale.ROOT).equals(s2.toLowerCase(Locale.ROOT));
+ }
+
+ public static int indexOfFirstWhitespace(String text)
+ {
+ // Rewritten to not use regex for Jalviewjs. Probably more efficient this
+ // way anyway.
+ if (text == null)
+ {
+ return -1;
+ }
+ for (int i = 0; i < text.length(); i++)
+ {
+ if (Character.isWhitespace(text.charAt(i)))
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /*
+ * implementation of String.replaceLast.
+ * Replaces only the last occurrence of toReplace in string with replacement.
+ */
+ public static String replaceLast(String string, String toReplace,
+ String replacement)
+ {
+ int pos = string.lastIndexOf(toReplace);
+ if (pos > -1)
+ {
+ return new StringBuilder().append(string.substring(0, pos))
+ .append(replacement)
+ .append(string.substring(pos + toReplace.length()))
+ .toString();
+ }
+ else
+ {
+ return string;
+ }
+
+ }
+
+ /*
+ * return the maximum length of a List of Strings
+ */
+ public static int maxLength(List<String> l)
+ {
+ int max = 0;
+ for (String s : l)
+ {
+ if (s == null)
+ continue;
+ if (s.length() > max)
+ max = s.length();
+ }
+ return max;
+ }
}