src/jalview/util/StringUtils.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.util;
  22
  23 import java.util.Locale;
  24
  25 import java.io.UnsupportedEncodingException;
  26 import java.net.URLEncoder;
  27 import java.util.ArrayList;
  28 import java.util.List;
  29 import java.util.regex.Pattern;
  30
  31 public class StringUtils
  32 {
  33   private static final Pattern DELIMITERS_PATTERN = Pattern
  34           .compile(".*='[^']*(?!')");
  35
  36   private static final char PERCENT = '%';
  37
  38   private static final boolean DEBUG = false;
  39
  40   /*
  41    * URL encoded characters, indexed by char value
  42    * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
  43    */
  44   private static String[] urlEncodings = new String[255];
  45
  46   /**
  47    * Returns a new character array, after inserting characters into the given
  48    * character array.
  49    *
  50    * @param in
  51    *          the character array to insert into
  52    * @param position
  53    *          the 0-based position for insertion
  54    * @param count
  55    *          the number of characters to insert
  56    * @param ch
  57    *          the character to insert
  58    */
  59   public static final char[] insertCharAt(char[] in, int position,
  60           int count, char ch)
  61   {
  62     char[] tmp = new char[in.length + count];
  63
  64     if (position >= in.length)
  65     {
  66       System.arraycopy(in, 0, tmp, 0, in.length);
  67       position = in.length;
  68     }
  69     else
  70     {
  71       System.arraycopy(in, 0, tmp, 0, position);
  72     }
  73
  74     int index = position;
  75     while (count > 0)
  76     {
  77       tmp[index++] = ch;
  78       count--;
  79     }
  80
  81     if (position < in.length)
  82     {
  83       System.arraycopy(in, position, tmp, index, in.length - position);
  84     }
  85
  86     return tmp;
  87   }
  88
  89   /**
  90    * Delete
  91    *
  92    * @param in
  93    * @param from
  94    * @param to
  95    * @return
  96    */
  97   public static final char[] deleteChars(char[] in, int from, int to)
  98   {
  99     if (from >= in.length || from < 0)
 100     {
 101       return in;
 102     }
 103
 104     char[] tmp;
 105
 106     if (to >= in.length)
 107     {
 108       tmp = new char[from];
 109       System.arraycopy(in, 0, tmp, 0, from);
 110       to = in.length;
 111     }
 112     else
 113     {
 114       tmp = new char[in.length - to + from];
 115       System.arraycopy(in, 0, tmp, 0, from);
 116       System.arraycopy(in, to, tmp, from, in.length - to);
 117     }
 118     return tmp;
 119   }
 120
 121   /**
 122    * Returns the last part of 'input' after the last occurrence of 'token'. For
 123    * example to extract only the filename from a full path or URL.
 124    *
 125    * @param input
 126    * @param token
 127    *          a delimiter which must be in regular expression format
 128    * @return
 129    */
 130   public static String getLastToken(String input, String token)
 131   {
 132     if (input == null)
 133     {
 134       return null;
 135     }
 136     if (token == null)
 137     {
 138       return input;
 139     }
 140     String[] st = input.split(token);
 141     return st[st.length - 1];
 142   }
 143
 144   /**
 145    * Parses the input string into components separated by the delimiter. Unlike
 146    * String.split(), this method will ignore occurrences of the delimiter which
 147    * are nested within single quotes in name-value pair values, e.g. a='b,c'.
 148    *
 149    * @param input
 150    * @param delimiter
 151    * @return elements separated by separator
 152    */
 153   public static String[] separatorListToArray(String input,
 154           String delimiter)
 155   {
 156     int seplen = delimiter.length();
 157     if (input == null || input.equals("") || input.equals(delimiter))
 158     {
 159       return null;
 160     }
 161     List<String> jv = new ArrayList<>();
 162     int cp = 0, pos, escape;
 163     boolean wasescaped = false, wasquoted = false;
 164     String lstitem = null;
 165     while ((pos = input.indexOf(delimiter, cp)) >= cp)
 166     {
 167       escape = (pos > 0 && input.charAt(pos - 1) == '\\') ? -1 : 0;
 168       if (wasescaped || wasquoted)
 169       {
 170         // append to previous pos
 171         jv.set(jv.size() - 1, lstitem = lstitem + delimiter
 172                 + input.substring(cp, pos + escape));
 173       }
 174       else
 175       {
 176         jv.add(lstitem = input.substring(cp, pos + escape));
 177       }
 178       cp = pos + seplen;
 179       wasescaped = escape == -1;
 180       // last separator may be in an unmatched quote
 181       wasquoted = DELIMITERS_PATTERN.matcher(lstitem).matches();
 182     }
 183     if (cp < input.length())
 184     {
 185       String c = input.substring(cp);
 186       if (wasescaped || wasquoted)
 187       {
 188         // append final separator
 189         jv.set(jv.size() - 1, lstitem + delimiter + c);
 190       }
 191       else
 192       {
 193         if (!c.equals(delimiter))
 194         {
 195           jv.add(c);
 196         }
 197       }
 198     }
 199     if (jv.size() > 0)
 200     {
 201       String[] v = jv.toArray(new String[jv.size()]);
 202       jv.clear();
 203       if (DEBUG)
 204       {
 205         System.err.println("Array from '" + delimiter
 206                 + "' separated List:\n" + v.length);
 207         for (int i = 0; i < v.length; i++)
 208         {
 209           System.err.println("item " + i + " '" + v[i] + "'");
 210         }
 211       }
 212       return v;
 213     }
 214     if (DEBUG)
 215     {
 216       System.err.println(
 217               "Empty Array from '" + delimiter + "' separated List");
 218     }
 219     return null;
 220   }
 221
 222   /**
 223    * Returns a string which contains the list elements delimited by the
 224    * separator. Null items are ignored. If the input is null or has length zero,
 225    * a single delimiter is returned.
 226    *
 227    * @param list
 228    * @param separator
 229    * @return concatenated string
 230    */
 231   public static String arrayToSeparatorList(String[] list, String separator)
 232   {
 233     StringBuffer v = new StringBuffer();
 234     if (list != null && list.length > 0)
 235     {
 236       for (int i = 0, iSize = list.length; i < iSize; i++)
 237       {
 238         if (list[i] != null)
 239         {
 240           if (v.length() > 0)
 241           {
 242             v.append(separator);
 243           }
 244           // TODO - escape any separator values in list[i]
 245           v.append(list[i]);
 246         }
 247       }
 248       if (DEBUG)
 249       {
 250         System.err
 251                 .println("Returning '" + separator + "' separated List:\n");
 252         System.err.println(v);
 253       }
 254       return v.toString();
 255     }
 256     if (DEBUG)
 257     {
 258       System.err.println(
 259               "Returning empty '" + separator + "' separated List\n");
 260     }
 261     return "" + separator;
 262   }
 263
 264   /**
 265    * Converts a list to a string with a delimiter before each term except the
 266    * first. Returns an empty string given a null or zero-length argument. This
 267    * can be replaced with StringJoiner in Java 8.
 268    *
 269    * @param terms
 270    * @param delim
 271    * @return
 272    */
 273   public static String listToDelimitedString(List<String> terms,
 274           String delim)
 275   {
 276     StringBuilder sb = new StringBuilder(32);
 277     if (terms != null && !terms.isEmpty())
 278     {
 279       boolean appended = false;
 280       for (String term : terms)
 281       {
 282         if (appended)
 283         {
 284           sb.append(delim);
 285         }
 286         appended = true;
 287         sb.append(term);
 288       }
 289     }
 290     return sb.toString();
 291   }
 292
 293   /**
 294    * Convenience method to parse a string to an integer, returning 0 if the
 295    * input is null or not a valid integer
 296    *
 297    * @param s
 298    * @return
 299    */
 300   public static int parseInt(String s)
 301   {
 302     int result = 0;
 303     if (s != null && s.length() > 0)
 304     {
 305       try
 306       {
 307         result = Integer.parseInt(s);
 308       } catch (NumberFormatException ex)
 309       {
 310       }
 311     }
 312     return result;
 313   }
 314
 315   /**
 316    * Compares two versions formatted as e.g. "3.4.5" and returns -1, 0 or 1 as
 317    * the first version precedes, is equal to, or follows the second
 318    *
 319    * @param v1
 320    * @param v2
 321    * @return
 322    */
 323   public static int compareVersions(String v1, String v2)
 324   {
 325     return compareVersions(v1, v2, null);
 326   }
 327
 328   /**
 329    * Compares two versions formatted as e.g. "3.4.5b1" and returns -1, 0 or 1 as
 330    * the first version precedes, is equal to, or follows the second
 331    *
 332    * @param v1
 333    * @param v2
 334    * @param pointSeparator
 335    *          a string used to delimit point increments in sub-tokens of the
 336    *          version
 337    * @return
 338    */
 339   public static int compareVersions(String v1, String v2,
 340           String pointSeparator)
 341   {
 342     if (v1 == null || v2 == null)
 343     {
 344       return 0;
 345     }
 346     String[] toks1 = v1.split("\\.");
 347     String[] toks2 = v2.split("\\.");
 348     int i = 0;
 349     for (; i < toks1.length; i++)
 350     {
 351       if (i >= toks2.length)
 352       {
 353         /*
 354          * extra tokens in v1
 355          */
 356         return 1;
 357       }
 358       String tok1 = toks1[i];
 359       String tok2 = toks2[i];
 360       if (pointSeparator != null)
 361       {
 362         /*
 363          * convert e.g. 5b2 into decimal 5.2 for comparison purposes
 364          */
 365         tok1 = tok1.replace(pointSeparator, ".");
 366         tok2 = tok2.replace(pointSeparator, ".");
 367       }
 368       try
 369       {
 370         float f1 = Float.valueOf(tok1);
 371         float f2 = Float.valueOf(tok2);
 372         int comp = Float.compare(f1, f2);
 373         if (comp != 0)
 374         {
 375           return comp;
 376         }
 377       } catch (NumberFormatException e)
 378       {
 379         System.err
 380                 .println("Invalid version format found: " + e.getMessage());
 381         return 0;
 382       }
 383     }
 384
 385     if (i < toks2.length)
 386     {
 387       /*
 388        * extra tokens in v2
 389        */
 390       return -1;
 391     }
 392
 393     /*
 394      * same length, all tokens match
 395      */
 396     return 0;
 397   }
 398
 399   /**
 400    * Converts the string to all lower-case except the first character which is
 401    * upper-cased
 402    *
 403    * @param s
 404    * @return
 405    */
 406   public static String toSentenceCase(String s)
 407   {
 408     if (s == null)
 409     {
 410       return s;
 411     }
 412     if (s.length() <= 1)
 413     {
 414       return s.toUpperCase(Locale.ROOT);
 415     }
 416     return s.substring(0, 1).toUpperCase(Locale.ROOT) + s.substring(1).toLowerCase(Locale.ROOT);
 417   }
 418
 419   /**
 420    * A helper method that strips off any leading or trailing html and body tags.
 421    * If no html tag is found, then also html-encodes angle bracket characters.
 422    *
 423    * @param text
 424    * @return
 425    */
 426   public static String stripHtmlTags(String text)
 427   {
 428     if (text == null)
 429     {
 430       return null;
 431     }
 432     String tmp2up = text.toUpperCase(Locale.ROOT);
 433     int startTag = tmp2up.indexOf("<HTML>");
 434     if (startTag > -1)
 435     {
 436       text = text.substring(startTag + 6);
 437       tmp2up = tmp2up.substring(startTag + 6);
 438     }
 439     // is omission of "<BODY>" intentional here??
 440     int endTag = tmp2up.indexOf("</BODY>");
 441     if (endTag > -1)
 442     {
 443       text = text.substring(0, endTag);
 444       tmp2up = tmp2up.substring(0, endTag);
 445     }
 446     endTag = tmp2up.indexOf("</HTML>");
 447     if (endTag > -1)
 448     {
 449       text = text.substring(0, endTag);
 450     }
 451
 452     if (startTag == -1 && (text.contains("<") || text.contains(">")))
 453     {
 454       text = text.replaceAll("<", "&lt;");
 455       text = text.replaceAll(">", "&gt;");
 456     }
 457     return text;
 458   }
 459
 460   /**
 461    * Answers the input string with any occurrences of the 'encodeable'
 462    * characters replaced by their URL encoding
 463    *
 464    * @param s
 465    * @param encodable
 466    * @return
 467    */
 468   public static String urlEncode(String s, String encodable)
 469   {
 470     if (s == null || s.isEmpty())
 471     {
 472       return s;
 473     }
 474
 475     /*
 476      * do % encoding first, as otherwise it may double-encode!
 477      */
 478     if (encodable.indexOf(PERCENT) != -1)
 479     {
 480       s = urlEncode(s, PERCENT);
 481     }
 482
 483     for (char c : encodable.toCharArray())
 484     {
 485       if (c != PERCENT)
 486       {
 487         s = urlEncode(s, c);
 488       }
 489     }
 490     return s;
 491   }
 492
 493   /**
 494    * Answers the input string with any occurrences of {@code c} replaced with
 495    * their url encoding. Answers the input string if it is unchanged.
 496    *
 497    * @param s
 498    * @param c
 499    * @return
 500    */
 501   static String urlEncode(String s, char c)
 502   {
 503     String decoded = String.valueOf(c);
 504     if (s.indexOf(decoded) != -1)
 505     {
 506       String encoded = getUrlEncoding(c);
 507       if (!encoded.equals(decoded))
 508       {
 509         s = s.replace(decoded, encoded);
 510       }
 511     }
 512     return s;
 513   }
 514
 515   /**
 516    * Answers the input string with any occurrences of the specified (unencoded)
 517    * characters replaced by their URL decoding.
 518    * <p>
 519    * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
 520    * {@code "a=b;c"}.
 521    *
 522    * @param s
 523    * @param encodable
 524    * @return
 525    */
 526   public static String urlDecode(String s, String encodable)
 527   {
 528     if (s == null || s.isEmpty())
 529     {
 530       return s;
 531     }
 532
 533     for (char c : encodable.toCharArray())
 534     {
 535       String encoded = getUrlEncoding(c);
 536       if (s.indexOf(encoded) != -1)
 537       {
 538         String decoded = String.valueOf(c);
 539         s = s.replace(encoded, decoded);
 540       }
 541     }
 542     return s;
 543   }
 544
 545   /**
 546    * Does a lazy lookup of the url encoding of the given character, saving the
 547    * value for repeat lookups
 548    *
 549    * @param c
 550    * @return
 551    */
 552   private static String getUrlEncoding(char c)
 553   {
 554     if (c < 0 || c >= urlEncodings.length)
 555     {
 556       return String.valueOf(c);
 557     }
 558
 559     String enc = urlEncodings[c];
 560     if (enc == null)
 561     {
 562       try
 563       {
 564         enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
 565                 "UTF-8");
 566       } catch (UnsupportedEncodingException e)
 567       {
 568         enc = urlEncodings[c] = String.valueOf(c);
 569       }
 570     }
 571     return enc;
 572   }
 573
 574   public static int firstCharPosIgnoreCase(String text, String chars)
 575   {
 576     int min = text.length() + 1;
 577     for (char c : chars.toLowerCase().toCharArray())
 578     {
 579       int i = text.toLowerCase().indexOf(c);
 580       if (0 <= i && i < min)
 581       {
 582         min = i;
 583       }
 584     }
 585     return min < text.length() + 1 ? min : -1;
 586   }
 587 }