2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import java.util.Locale;
25 import java.io.UnsupportedEncodingException;
26 import java.net.URLEncoder;
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.regex.Pattern;
31 public class StringUtils
33 private static final Pattern DELIMITERS_PATTERN = Pattern
34 .compile(".*='[^']*(?!')");
36 private static final char PERCENT = '%';
38 private static final boolean DEBUG = false;
41 * URL encoded characters, indexed by char value
42 * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
44 private static String[] urlEncodings = new String[255];
47 * Returns a new character array, after inserting characters into the given
51 * the character array to insert into
53 * the 0-based position for insertion
55 * the number of characters to insert
57 * the character to insert
59 public static final char[] insertCharAt(char[] in, int position,
62 char[] tmp = new char[in.length + count];
64 if (position >= in.length)
66 System.arraycopy(in, 0, tmp, 0, in.length);
71 System.arraycopy(in, 0, tmp, 0, position);
81 if (position < in.length)
83 System.arraycopy(in, position, tmp, index, in.length - position);
97 public static final char[] deleteChars(char[] in, int from, int to)
99 if (from >= in.length || from < 0)
108 tmp = new char[from];
109 System.arraycopy(in, 0, tmp, 0, from);
114 tmp = new char[in.length - to + from];
115 System.arraycopy(in, 0, tmp, 0, from);
116 System.arraycopy(in, to, tmp, from, in.length - to);
122 * Returns the last part of 'input' after the last occurrence of 'token'. For
123 * example to extract only the filename from a full path or URL.
127 * a delimiter which must be in regular expression format
130 public static String getLastToken(String input, String token)
140 String[] st = input.split(token);
141 return st[st.length - 1];
145 * Parses the input string into components separated by the delimiter. Unlike
146 * String.split(), this method will ignore occurrences of the delimiter which
147 * are nested within single quotes in name-value pair values, e.g. a='b,c'.
151 * @return elements separated by separator
153 public static String[] separatorListToArray(String input,
156 int seplen = delimiter.length();
157 if (input == null || input.equals("") || input.equals(delimiter))
161 List<String> jv = new ArrayList<>();
162 int cp = 0, pos, escape;
163 boolean wasescaped = false, wasquoted = false;
164 String lstitem = null;
165 while ((pos = input.indexOf(delimiter, cp)) >= cp)
167 escape = (pos > 0 && input.charAt(pos - 1) == '\\') ? -1 : 0;
168 if (wasescaped || wasquoted)
170 // append to previous pos
171 jv.set(jv.size() - 1, lstitem = lstitem + delimiter
172 + input.substring(cp, pos + escape));
176 jv.add(lstitem = input.substring(cp, pos + escape));
179 wasescaped = escape == -1;
180 // last separator may be in an unmatched quote
181 wasquoted = DELIMITERS_PATTERN.matcher(lstitem).matches();
183 if (cp < input.length())
185 String c = input.substring(cp);
186 if (wasescaped || wasquoted)
188 // append final separator
189 jv.set(jv.size() - 1, lstitem + delimiter + c);
193 if (!c.equals(delimiter))
201 String[] v = jv.toArray(new String[jv.size()]);
205 System.err.println("Array from '" + delimiter
206 + "' separated List:\n" + v.length);
207 for (int i = 0; i < v.length; i++)
209 System.err.println("item " + i + " '" + v[i] + "'");
217 "Empty Array from '" + delimiter + "' separated List");
223 * Returns a string which contains the list elements delimited by the
224 * separator. Null items are ignored. If the input is null or has length zero,
225 * a single delimiter is returned.
229 * @return concatenated string
231 public static String arrayToSeparatorList(String[] list, String separator)
233 StringBuffer v = new StringBuffer();
234 if (list != null && list.length > 0)
236 for (int i = 0, iSize = list.length; i < iSize; i++)
244 // TODO - escape any separator values in list[i]
251 .println("Returning '" + separator + "' separated List:\n");
252 System.err.println(v);
259 "Returning empty '" + separator + "' separated List\n");
261 return "" + separator;
265 * Converts a list to a string with a delimiter before each term except the
266 * first. Returns an empty string given a null or zero-length argument. This
267 * can be replaced with StringJoiner in Java 8.
273 public static String listToDelimitedString(List<String> terms,
276 StringBuilder sb = new StringBuilder(32);
277 if (terms != null && !terms.isEmpty())
279 boolean appended = false;
280 for (String term : terms)
290 return sb.toString();
294 * Convenience method to parse a string to an integer, returning 0 if the
295 * input is null or not a valid integer
300 public static int parseInt(String s)
303 if (s != null && s.length() > 0)
307 result = Integer.parseInt(s);
308 } catch (NumberFormatException ex)
316 * Compares two versions formatted as e.g. "3.4.5" and returns -1, 0 or 1 as
317 * the first version precedes, is equal to, or follows the second
323 public static int compareVersions(String v1, String v2)
325 return compareVersions(v1, v2, null);
329 * Compares two versions formatted as e.g. "3.4.5b1" and returns -1, 0 or 1 as
330 * the first version precedes, is equal to, or follows the second
334 * @param pointSeparator
335 * a string used to delimit point increments in sub-tokens of the
339 public static int compareVersions(String v1, String v2,
340 String pointSeparator)
342 if (v1 == null || v2 == null)
346 String[] toks1 = v1.split("\\.");
347 String[] toks2 = v2.split("\\.");
349 for (; i < toks1.length; i++)
351 if (i >= toks2.length)
358 String tok1 = toks1[i];
359 String tok2 = toks2[i];
360 if (pointSeparator != null)
363 * convert e.g. 5b2 into decimal 5.2 for comparison purposes
365 tok1 = tok1.replace(pointSeparator, ".");
366 tok2 = tok2.replace(pointSeparator, ".");
370 float f1 = Float.valueOf(tok1);
371 float f2 = Float.valueOf(tok2);
372 int comp = Float.compare(f1, f2);
377 } catch (NumberFormatException e)
380 .println("Invalid version format found: " + e.getMessage());
385 if (i < toks2.length)
394 * same length, all tokens match
400 * Converts the string to all lower-case except the first character which is
406 public static String toSentenceCase(String s)
414 return s.toUpperCase(Locale.ROOT);
416 return s.substring(0, 1).toUpperCase(Locale.ROOT) + s.substring(1).toLowerCase(Locale.ROOT);
420 * A helper method that strips off any leading or trailing html and body tags.
421 * If no html tag is found, then also html-encodes angle bracket characters.
426 public static String stripHtmlTags(String text)
432 String tmp2up = text.toUpperCase(Locale.ROOT);
433 int startTag = tmp2up.indexOf("<HTML>");
436 text = text.substring(startTag + 6);
437 tmp2up = tmp2up.substring(startTag + 6);
439 // is omission of "<BODY>" intentional here??
440 int endTag = tmp2up.indexOf("</BODY>");
443 text = text.substring(0, endTag);
444 tmp2up = tmp2up.substring(0, endTag);
446 endTag = tmp2up.indexOf("</HTML>");
449 text = text.substring(0, endTag);
452 if (startTag == -1 && (text.contains("<") || text.contains(">")))
454 text = text.replaceAll("<", "<");
455 text = text.replaceAll(">", ">");
461 * Answers the input string with any occurrences of the 'encodeable'
462 * characters replaced by their URL encoding
468 public static String urlEncode(String s, String encodable)
470 if (s == null || s.isEmpty())
476 * do % encoding first, as otherwise it may double-encode!
478 if (encodable.indexOf(PERCENT) != -1)
480 s = urlEncode(s, PERCENT);
483 for (char c : encodable.toCharArray())
494 * Answers the input string with any occurrences of {@code c} replaced with
495 * their url encoding. Answers the input string if it is unchanged.
501 static String urlEncode(String s, char c)
503 String decoded = String.valueOf(c);
504 if (s.indexOf(decoded) != -1)
506 String encoded = getUrlEncoding(c);
507 if (!encoded.equals(decoded))
509 s = s.replace(decoded, encoded);
516 * Answers the input string with any occurrences of the specified (unencoded)
517 * characters replaced by their URL decoding.
519 * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
526 public static String urlDecode(String s, String encodable)
528 if (s == null || s.isEmpty())
533 for (char c : encodable.toCharArray())
535 String encoded = getUrlEncoding(c);
536 if (s.indexOf(encoded) != -1)
538 String decoded = String.valueOf(c);
539 s = s.replace(encoded, decoded);
546 * Does a lazy lookup of the url encoding of the given character, saving the
547 * value for repeat lookups
552 private static String getUrlEncoding(char c)
554 if (c < 0 || c >= urlEncodings.length)
556 return String.valueOf(c);
559 String enc = urlEncodings[c];
564 enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
566 } catch (UnsupportedEncodingException e)
568 enc = urlEncodings[c] = String.valueOf(c);
574 public static int firstCharPosIgnoreCase(String text, String chars)
576 int min = text.length() + 1;
577 for (char c : chars.toLowerCase().toCharArray())
579 int i = text.toLowerCase().indexOf(c);
580 if (0 <= i && i < min)
585 return min < text.length() + 1 ? min : -1;