Merge branch 'develop' into features/mchmmer

[jalview.git] / src / jalview / util / StringUtils.java
diff --git a/src/jalview/util/StringUtils.java b/src/jalview/util/StringUtils.java

index b3456aa..3c5ba92 100644 (file)
--- a/src/jalview/util/StringUtils.java
+++ b/src/jalview/util/StringUtils.java
@@ -107,29 +107,6 @@ public class StringUtils
    }
  
    /**
-   * Returns the last part of 'input' after the last occurrence of 'token'. For
-   * example to extract only the filename from a full path or URL.
-   * 
-   * @param input
-   * @param token
-   *          a delimiter which must be in regular expression format
-   * @return
-   */
-  public static String getLastToken(String input, String token)
-  {
-    if (input == null)
-    {
-      return null;
-    }
-    if (token == null)
-    {
-      return input;
-    }
-    String[] st = input.split(token);
-    return st[st.length - 1];
-  }
-
-  /**
     * Parses the input string into components separated by the delimiter. Unlike
     * String.split(), this method will ignore occurrences of the delimiter which
     * are nested within single quotes in name-value pair values, e.g. a='b,c'.
@@ -146,7 +123,7 @@ public class StringUtils
      {
        return null;
      }
-    List<String> jv = new ArrayList<String>();
+    List<String> jv = new ArrayList<>();
      int cp = 0, pos, escape;
      boolean wasescaped = false, wasquoted = false;
      String lstitem = null;
@@ -403,4 +380,45 @@ public class StringUtils
      }
      return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
    }
+
+  /**
+   * A helper method that strips off any leading or trailing html and body tags.
+   * If no html tag is found, then also html-encodes angle bracket characters.
+   * 
+   * @param text
+   * @return
+   */
+  public static String stripHtmlTags(String text)
+  {
+    if (text == null)
+    {
+      return null;
+    }
+    String tmp2up = text.toUpperCase();
+    int startTag = tmp2up.indexOf("<HTML>");
+    if (startTag > -1)
+    {
+      text = text.substring(startTag + 6);
+      tmp2up = tmp2up.substring(startTag + 6);
+    }
+    // is omission of "<BODY>" intentional here??
+    int endTag = tmp2up.indexOf("</BODY>");
+    if (endTag > -1)
+    {
+      text = text.substring(0, endTag);
+      tmp2up = tmp2up.substring(0, endTag);
+    }
+    endTag = tmp2up.indexOf("</HTML>");
+    if (endTag > -1)
+    {
+      text = text.substring(0, endTag);
+    }
+  
+    if (startTag == -1 && (text.contains("<") || text.contains(">")))
+    {
+      text = text.replaceAll("<", "&lt;");
+      text = text.replaceAll(">", "&gt;");
+    }
+    return text;
+  }
  }