getdown/src/getdown/core/src/main/java/jalview/util/StringUtils.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.util;
  22
  23 import java.io.UnsupportedEncodingException;
  24 import java.net.URLEncoder;
  25 import java.util.ArrayList;
  26 import java.util.List;
  27 import java.util.Locale;
  28 import java.util.regex.Pattern;
  29
  30 public class StringUtils
  31 {
  32   private static final Pattern DELIMITERS_PATTERN = Pattern
  33           .compile(".*='[^']*(?!')");
  34
  35   private static final char PERCENT = '%';
  36
  37   private static final boolean DEBUG = false;
  38
  39   /*
  40    * URL encoded characters, indexed by char value
  41    * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
  42    */
  43   private static String[] urlEncodings = new String[255];
  44
  45   /**
  46    * Returns a new character array, after inserting characters into the given
  47    * character array.
  48    *
  49    * @param in
  50    *          the character array to insert into
  51    * @param position
  52    *          the 0-based position for insertion
  53    * @param count
  54    *          the number of characters to insert
  55    * @param ch
  56    *          the character to insert
  57    */
  58   public static final char[] insertCharAt(char[] in, int position,
  59           int count, char ch)
  60   {
  61     char[] tmp = new char[in.length + count];
  62
  63     if (position >= in.length)
  64     {
  65       System.arraycopy(in, 0, tmp, 0, in.length);
  66       position = in.length;
  67     }
  68     else
  69     {
  70       System.arraycopy(in, 0, tmp, 0, position);
  71     }
  72
  73     int index = position;
  74     while (count > 0)
  75     {
  76       tmp[index++] = ch;
  77       count--;
  78     }
  79
  80     if (position < in.length)
  81     {
  82       System.arraycopy(in, position, tmp, index, in.length - position);
  83     }
  84
  85     return tmp;
  86   }
  87
  88   /**
  89    * Delete
  90    *
  91    * @param in
  92    * @param from
  93    * @param to
  94    * @return
  95    */
  96   public static final char[] deleteChars(char[] in, int from, int to)
  97   {
  98     if (from >= in.length || from < 0)
  99     {
 100       return in;
 101     }
 102
 103     char[] tmp;
 104
 105     if (to >= in.length)
 106     {
 107       tmp = new char[from];
 108       System.arraycopy(in, 0, tmp, 0, from);
 109       to = in.length;
 110     }
 111     else
 112     {
 113       tmp = new char[in.length - to + from];
 114       System.arraycopy(in, 0, tmp, 0, from);
 115       System.arraycopy(in, to, tmp, from, in.length - to);
 116     }
 117     return tmp;
 118   }
 119
 120   /**
 121    * Returns the last part of 'input' after the last occurrence of 'token'. For
 122    * example to extract only the filename from a full path or URL.
 123    *
 124    * @param input
 125    * @param token
 126    *          a delimiter which must be in regular expression format
 127    * @return
 128    */
 129   public static String getLastToken(String input, String token)
 130   {
 131     if (input == null)
 132     {
 133       return null;
 134     }
 135     if (token == null)
 136     {
 137       return input;
 138     }
 139     String[] st = input.split(token);
 140     return st[st.length - 1];
 141   }
 142
 143   /**
 144    * Parses the input string into components separated by the delimiter. Unlike
 145    * String.split(), this method will ignore occurrences of the delimiter which
 146    * are nested within single quotes in name-value pair values, e.g. a='b,c'.
 147    *
 148    * @param input
 149    * @param delimiter
 150    * @return elements separated by separator
 151    */
 152   public static String[] separatorListToArray(String input,
 153           String delimiter)
 154   {
 155     int seplen = delimiter.length();
 156     if (input == null || input.equals("") || input.equals(delimiter))
 157     {
 158       return null;
 159     }
 160     List<String> jv = new ArrayList<>();
 161     int cp = 0, pos, escape;
 162     boolean wasescaped = false, wasquoted = false;
 163     String lstitem = null;
 164     while ((pos = input.indexOf(delimiter, cp)) >= cp)
 165     {
 166       escape = (pos > 0 && input.charAt(pos - 1) == '\\') ? -1 : 0;
 167       if (wasescaped || wasquoted)
 168       {
 169         // append to previous pos
 170         jv.set(jv.size() - 1, lstitem = lstitem + delimiter
 171                 + input.substring(cp, pos + escape));
 172       }
 173       else
 174       {
 175         jv.add(lstitem = input.substring(cp, pos + escape));
 176       }
 177       cp = pos + seplen;
 178       wasescaped = escape == -1;
 179       // last separator may be in an unmatched quote
 180       wasquoted = DELIMITERS_PATTERN.matcher(lstitem).matches();
 181     }
 182     if (cp < input.length())
 183     {
 184       String c = input.substring(cp);
 185       if (wasescaped || wasquoted)
 186       {
 187         // append final separator
 188         jv.set(jv.size() - 1, lstitem + delimiter + c);
 189       }
 190       else
 191       {
 192         if (!c.equals(delimiter))
 193         {
 194           jv.add(c);
 195         }
 196       }
 197     }
 198     if (jv.size() > 0)
 199     {
 200       String[] v = jv.toArray(new String[jv.size()]);
 201       jv.clear();
 202       if (DEBUG)
 203       {
 204         System.err.println("Array from '" + delimiter
 205                 + "' separated List:\n" + v.length);
 206         for (int i = 0; i < v.length; i++)
 207         {
 208           System.err.println("item " + i + " '" + v[i] + "'");
 209         }
 210       }
 211       return v;
 212     }
 213     if (DEBUG)
 214     {
 215       System.err.println(
 216               "Empty Array from '" + delimiter + "' separated List");
 217     }
 218     return null;
 219   }
 220
 221   /**
 222    * Returns a string which contains the list elements delimited by the
 223    * separator. Null items are ignored. If the input is null or has length zero,
 224    * a single delimiter is returned.
 225    *
 226    * @param list
 227    * @param separator
 228    * @return concatenated string
 229    */
 230   public static String arrayToSeparatorList(String[] list, String separator)
 231   {
 232     StringBuffer v = new StringBuffer();
 233     if (list != null && list.length > 0)
 234     {
 235       for (int i = 0, iSize = list.length; i < iSize; i++)
 236       {
 237         if (list[i] != null)
 238         {
 239           if (v.length() > 0)
 240           {
 241             v.append(separator);
 242           }
 243           // TODO - escape any separator values in list[i]
 244           v.append(list[i]);
 245         }
 246       }
 247       if (DEBUG)
 248       {
 249         System.err
 250                 .println("Returning '" + separator + "' separated List:\n");
 251         System.err.println(v);
 252       }
 253       return v.toString();
 254     }
 255     if (DEBUG)
 256     {
 257       System.err.println(
 258               "Returning empty '" + separator + "' separated List\n");
 259     }
 260     return "" + separator;
 261   }
 262
 263   /**
 264    * Converts a list to a string with a delimiter before each term except the
 265    * first. Returns an empty string given a null or zero-length argument. This
 266    * can be replaced with StringJoiner in Java 8.
 267    *
 268    * @param terms
 269    * @param delim
 270    * @return
 271    */
 272   public static String listToDelimitedString(List<String> terms,
 273           String delim)
 274   {
 275     StringBuilder sb = new StringBuilder(32);
 276     if (terms != null && !terms.isEmpty())
 277     {
 278       boolean appended = false;
 279       for (String term : terms)
 280       {
 281         if (appended)
 282         {
 283           sb.append(delim);
 284         }
 285         appended = true;
 286         sb.append(term);
 287       }
 288     }
 289     return sb.toString();
 290   }
 291
 292   /**
 293    * Convenience method to parse a string to an integer, returning 0 if the
 294    * input is null or not a valid integer
 295    *
 296    * @param s
 297    * @return
 298    */
 299   public static int parseInt(String s)
 300   {
 301     int result = 0;
 302     if (s != null && s.length() > 0)
 303     {
 304       try
 305       {
 306         result = Integer.parseInt(s);
 307       } catch (NumberFormatException ex)
 308       {
 309       }
 310     }
 311     return result;
 312   }
 313
 314   /**
 315    * Compares two versions formatted as e.g. "3.4.5" and returns -1, 0 or 1 as
 316    * the first version precedes, is equal to, or follows the second
 317    *
 318    * @param v1
 319    * @param v2
 320    * @return
 321    */
 322   public static int compareVersions(String v1, String v2)
 323   {
 324     return compareVersions(v1, v2, null);
 325   }
 326
 327   /**
 328    * Compares two versions formatted as e.g. "3.4.5b1" and returns -1, 0 or 1 as
 329    * the first version precedes, is equal to, or follows the second
 330    *
 331    * @param v1
 332    * @param v2
 333    * @param pointSeparator
 334    *          a string used to delimit point increments in sub-tokens of the
 335    *          version
 336    * @return
 337    */
 338   public static int compareVersions(String v1, String v2,
 339           String pointSeparator)
 340   {
 341     if (v1 == null || v2 == null)
 342     {
 343       return 0;
 344     }
 345     String[] toks1 = v1.split("\\.");
 346     String[] toks2 = v2.split("\\.");
 347     int i = 0;
 348     for (; i < toks1.length; i++)
 349     {
 350       if (i >= toks2.length)
 351       {
 352         /*
 353          * extra tokens in v1
 354          */
 355         return 1;
 356       }
 357       String tok1 = toks1[i];
 358       String tok2 = toks2[i];
 359       if (pointSeparator != null)
 360       {
 361         /*
 362          * convert e.g. 5b2 into decimal 5.2 for comparison purposes
 363          */
 364         tok1 = tok1.replace(pointSeparator, ".");
 365         tok2 = tok2.replace(pointSeparator, ".");
 366       }
 367       try
 368       {
 369         float f1 = Float.valueOf(tok1);
 370         float f2 = Float.valueOf(tok2);
 371         int comp = Float.compare(f1, f2);
 372         if (comp != 0)
 373         {
 374           return comp;
 375         }
 376       } catch (NumberFormatException e)
 377       {
 378         System.err
 379                 .println("Invalid version format found: " + e.getMessage());
 380         return 0;
 381       }
 382     }
 383
 384     if (i < toks2.length)
 385     {
 386       /*
 387        * extra tokens in v2
 388        */
 389       return -1;
 390     }
 391
 392     /*
 393      * same length, all tokens match
 394      */
 395     return 0;
 396   }
 397
 398   /**
 399    * Converts the string to all lower-case except the first character which is
 400    * upper-cased
 401    *
 402    * @param s
 403    * @return
 404    */
 405   public static String toSentenceCase(String s)
 406   {
 407     if (s == null)
 408     {
 409       return s;
 410     }
 411     if (s.length() <= 1)
 412     {
 413       return s.toUpperCase(Locale.ROOT);
 414     }
 415     return s.substring(0, 1).toUpperCase(Locale.ROOT) + s.substring(1).toLowerCase(Locale.ROOT);
 416   }
 417
 418   /**
 419    * A helper method that strips off any leading or trailing html and body tags.
 420    * If no html tag is found, then also html-encodes angle bracket characters.
 421    *
 422    * @param text
 423    * @return
 424    */
 425   public static String stripHtmlTags(String text)
 426   {
 427     if (text == null)
 428     {
 429       return null;
 430     }
 431     String tmp2up = text.toUpperCase(Locale.ROOT);
 432     int startTag = tmp2up.indexOf("<HTML>");
 433     if (startTag > -1)
 434     {
 435       text = text.substring(startTag + 6);
 436       tmp2up = tmp2up.substring(startTag + 6);
 437     }
 438     // is omission of "<BODY>" intentional here??
 439     int endTag = tmp2up.indexOf("</BODY>");
 440     if (endTag > -1)
 441     {
 442       text = text.substring(0, endTag);
 443       tmp2up = tmp2up.substring(0, endTag);
 444     }
 445     endTag = tmp2up.indexOf("</HTML>");
 446     if (endTag > -1)
 447     {
 448       text = text.substring(0, endTag);
 449     }
 450
 451     if (startTag == -1 && (text.contains("<") || text.contains(">")))
 452     {
 453       text = text.replaceAll("<", "&lt;");
 454       text = text.replaceAll(">", "&gt;");
 455     }
 456     return text;
 457   }
 458
 459   /**
 460    * Answers the input string with any occurrences of the 'encodeable'
 461    * characters replaced by their URL encoding
 462    *
 463    * @param s
 464    * @param encodable
 465    * @return
 466    */
 467   public static String urlEncode(String s, String encodable)
 468   {
 469     if (s == null || s.isEmpty())
 470     {
 471       return s;
 472     }
 473
 474     /*
 475      * do % encoding first, as otherwise it may double-encode!
 476      */
 477     if (encodable.indexOf(PERCENT) != -1)
 478     {
 479       s = urlEncode(s, PERCENT);
 480     }
 481
 482     for (char c : encodable.toCharArray())
 483     {
 484       if (c != PERCENT)
 485       {
 486         s = urlEncode(s, c);
 487       }
 488     }
 489     return s;
 490   }
 491
 492   /**
 493    * Answers the input string with any occurrences of {@code c} replaced with
 494    * their url encoding. Answers the input string if it is unchanged.
 495    *
 496    * @param s
 497    * @param c
 498    * @return
 499    */
 500   static String urlEncode(String s, char c)
 501   {
 502     String decoded = String.valueOf(c);
 503     if (s.indexOf(decoded) != -1)
 504     {
 505       String encoded = getUrlEncoding(c);
 506       if (!encoded.equals(decoded))
 507       {
 508         s = s.replace(decoded, encoded);
 509       }
 510     }
 511     return s;
 512   }
 513
 514   /**
 515    * Answers the input string with any occurrences of the specified (unencoded)
 516    * characters replaced by their URL decoding.
 517    * <p>
 518    * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
 519    * {@code "a=b;c"}.
 520    *
 521    * @param s
 522    * @param encodable
 523    * @return
 524    */
 525   public static String urlDecode(String s, String encodable)
 526   {
 527     if (s == null || s.isEmpty())
 528     {
 529       return s;
 530     }
 531
 532     for (char c : encodable.toCharArray())
 533     {
 534       String encoded = getUrlEncoding(c);
 535       if (s.indexOf(encoded) != -1)
 536       {
 537         String decoded = String.valueOf(c);
 538         s = s.replace(encoded, decoded);
 539       }
 540     }
 541     return s;
 542   }
 543
 544   /**
 545    * Does a lazy lookup of the url encoding of the given character, saving the
 546    * value for repeat lookups
 547    *
 548    * @param c
 549    * @return
 550    */
 551   private static String getUrlEncoding(char c)
 552   {
 553     if (c < 0 || c >= urlEncodings.length)
 554     {
 555       return String.valueOf(c);
 556     }
 557
 558     String enc = urlEncodings[c];
 559     if (enc == null)
 560     {
 561       try
 562       {
 563         enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
 564                 "UTF-8");
 565       } catch (UnsupportedEncodingException e)
 566       {
 567         enc = urlEncodings[c] = String.valueOf(c);
 568       }
 569     }
 570     return enc;
 571   }
 572
 573   public static int firstCharPosIgnoreCase(String text, String chars)
 574   {
 575     int min = text.length() + 1;
 576     for (char c : chars.toLowerCase(Locale.ROOT).toCharArray())
 577     {
 578       int i = text.toLowerCase(Locale.ROOT).indexOf(c);
 579       if (0 <= i && i < min)
 580       {
 581         min = i;
 582       }
 583     }
 584     return min < text.length() + 1 ? min : -1;
 585   }
 586 }