X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Forg%2Fjson%2FXML.java;fp=src%2Forg%2Fjson%2FXML.java;h=55362b2749d07e120ada98a17db1e9000741aad4;hb=ec8f3cedf60fb1feed6d34de6b49f6bfa78b9dd8;hp=0000000000000000000000000000000000000000;hpb=056dad85a910551cc95e44d451a61f6b8c4dd35d;p=jalview.git diff --git a/src/org/json/XML.java b/src/org/json/XML.java new file mode 100644 index 0000000..55362b2 --- /dev/null +++ b/src/org/json/XML.java @@ -0,0 +1,683 @@ +package org.json; + +/* +Copyright (c) 2015 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +import java.io.Reader; +import java.io.StringReader; +import java.util.Iterator; + +/** + * This provides static methods to convert an XML text into a JSONObject, and to + * covert a JSONObject into an XML text. + * + * @author JSON.org + * @version 2016-08-10 + */ +@SuppressWarnings("boxing") +public class XML { + /** The Character '&'. */ + public static final Character AMP = '&'; + + /** The Character '''. */ + public static final Character APOS = '\''; + + /** The Character '!'. */ + public static final Character BANG = '!'; + + /** The Character '='. */ + public static final Character EQ = '='; + + /** The Character '>'. */ + public static final Character GT = '>'; + + /** The Character '<'. */ + public static final Character LT = '<'; + + /** The Character '?'. */ + public static final Character QUEST = '?'; + + /** The Character '"'. */ + public static final Character QUOT = '"'; + + /** The Character '/'. */ + public static final Character SLASH = '/'; + + /** + * Creates an iterator for navigating Code Points in a string instead of + * characters. Once Java7 support is dropped, this can be replaced with + * + * string.codePoints() + * + * which is available in Java8 and above. + * + * @see http://stackoverflow.com/a/21791059/6030888 + */ + private static Iterable codePointIterator(final String string) { + return new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + private int nextIndex = 0; + private int length = string.length(); + + @Override + public boolean hasNext() { + return this.nextIndex < this.length; + } + + @Override + public Integer next() { + int result = string.codePointAt(this.nextIndex); + this.nextIndex += Character.charCount(result); + return result; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + /** + * Replace special characters with XML escapes: + * + *
+     * & (ampersand) is replaced by &amp;
+     * < (less than) is replaced by &lt;
+     * > (greater than) is replaced by &gt;
+     * " (double quote) is replaced by &quot;
+     * ' (single quote / apostrophe) is replaced by &apos;
+     * 
+ * + * @param string + * The string to be escaped. + * @return The escaped string. + */ + public static String escape(String string) { + StringBuilder sb = new StringBuilder(string.length()); + for (final int cp : codePointIterator(string)) { + switch (cp) { + case '&': + sb.append("&"); + break; + case '<': + sb.append("<"); + break; + case '>': + sb.append(">"); + break; + case '"': + sb.append("""); + break; + case '\'': + sb.append("'"); + break; + default: + if (mustEscape(cp)) { + sb.append("&#x"); + sb.append(Integer.toHexString(cp)); + sb.append(';'); + } else { + sb.appendCodePoint(cp); + } + } + } + return sb.toString(); + } + + /** + * @param cp code point to test + * @return true if the code point is not valid for an XML + */ + private static boolean mustEscape(int cp) { + /* Valid range from https://www.w3.org/TR/REC-xml/#charsets + * + * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + * + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ + // isISOControl is true when (cp >= 0 && cp <= 0x1F) || (cp >= 0x7F && cp <= 0x9F) + // all ISO control characters are out of range except tabs and new lines + return (Character.isISOControl(cp) + && cp != 0x9 + && cp != 0xA + && cp != 0xD + ) || !( + // valid the range of acceptable characters that aren't control + (cp >= 0x20 && cp <= 0xD7FF) + || (cp >= 0xE000 && cp <= 0xFFFD) + || (cp >= 0x10000 && cp <= 0x10FFFF) + ) + ; + } + + /** + * Removes XML escapes from the string. + * + * @param string + * string to remove escapes from + * @return string with converted entities + */ + public static String unescape(String string) { + StringBuilder sb = new StringBuilder(string.length()); + for (int i = 0, length = string.length(); i < length; i++) { + char c = string.charAt(i); + if (c == '&') { + final int semic = string.indexOf(';', i); + if (semic > i) { + final String entity = string.substring(i + 1, semic); + sb.append(XMLTokener.unescapeEntity(entity)); + // skip past the entity we just parsed. + i += entity.length() + 1; + } else { + // this shouldn't happen in most cases since the parser + // errors on unclosed entries. + sb.append(c); + } + } else { + // not part of an entity + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Throw an exception if the string contains whitespace. Whitespace is not + * allowed in tagNames and attributes. + * + * @param string + * A string. + * @throws JSONException Thrown if the string contains whitespace or is empty. + */ + public static void noSpace(String string) throws JSONException { + int i, length = string.length(); + if (length == 0) { + throw new JSONException("Empty string."); + } + for (i = 0; i < length; i += 1) { + if (Character.isWhitespace(string.charAt(i))) { + throw new JSONException("'" + string + + "' contains a space character."); + } + } + } + + /** + * Scan the content following the named tag, attaching it to the context. + * + * @param x + * The XMLTokener containing the source string. + * @param context + * The JSONObject that will include the new material. + * @param name + * The tag name. + * @return true if the close tag is processed. + * @throws JSONException + */ + private static boolean parse(XMLTokener x, JSONObject context, String name, boolean keepStrings) + throws JSONException { + char c; + int i; + JSONObject jsonobject = null; + String string; + String tagName; + Object token; + + // Test for and skip past these forms: + // + // + // + // + // Report errors for these forms: + // <> + // <= + // << + + token = x.nextToken(); + + // "); + return false; + } + x.back(); + } else if (c == '[') { + token = x.nextToken(); + if ("CDATA".equals(token)) { + if (x.next() == '[') { + string = x.nextCDATA(); + if (string.length() > 0) { + context.accumulate("content", string); + } + return false; + } + } + throw x.syntaxError("Expected 'CDATA['"); + } + i = 1; + do { + token = x.nextMeta(); + if (token == null) { + throw x.syntaxError("Missing '>' after ' 0); + return false; + } else if (token == QUEST) { + + // "); + return false; + } else if (token == SLASH) { + + // Close tag + if (x.nextToken() != GT) { + throw x.syntaxError("Misshaped tag"); + } + if (jsonobject.length() > 0) { + context.accumulate(tagName, jsonobject); + } else { + context.accumulate(tagName, ""); + } + return false; + + } else if (token == GT) { + // Content, between <...> and + for (;;) { + token = x.nextContent(); + if (token == null) { + if (tagName != null) { + throw x.syntaxError("Unclosed tag " + tagName); + } + return false; + } else if (token instanceof String) { + string = (String) token; + if (string.length() > 0) { + jsonobject.accumulate("content", + keepStrings ? string : stringToValue(string)); + } + + } else if (token == LT) { + // Nested element + if (parse(x, jsonobject, tagName,keepStrings)) { + if (jsonobject.length() == 0) { + context.accumulate(tagName, ""); + } else if (jsonobject.length() == 1 + && jsonobject.opt("content") != null) { + context.accumulate(tagName, + jsonobject.opt("content")); + } else { + context.accumulate(tagName, jsonobject); + } + return false; + } + } + } + } else { + throw x.syntaxError("Misshaped tag"); + } + } + } + } + + /** + * This method is the same as {@link JSONObject#stringToValue(String)}. + * + * @param string String to convert + * @return JSON value of this string or the string + */ + // To maintain compatibility with the Android API, this method is a direct copy of + // the one in JSONObject. Changes made here should be reflected there. + public static Object stringToValue(String string) { + if (string.equals("")) { + return string; + } + if (string.equalsIgnoreCase("true")) { + return Boolean.TRUE; + } + if (string.equalsIgnoreCase("false")) { + return Boolean.FALSE; + } + if (string.equalsIgnoreCase("null")) { + return JSONObject.NULL; + } + + /* + * If it might be a number, try converting it. If a number cannot be + * produced, then the value will just be a string. + */ + + char initial = string.charAt(0); + if ((initial >= '0' && initial <= '9') || initial == '-') { + try { + // if we want full Big Number support this block can be replaced with: + // return stringToNumber(string); + if (string.indexOf('.') > -1 || string.indexOf('e') > -1 + || string.indexOf('E') > -1 || "-0".equals(string)) { + Double d = Double.valueOf(string); + if (!d.isInfinite() && !d.isNaN()) { + return d; + } + } else { + Long myLong = Long.valueOf(string); + if (string.equals(myLong.toString())) { + if (myLong.longValue() == myLong.intValue()) { + return Integer.valueOf(myLong.intValue()); + } + return myLong; + } + } + } catch (Exception ignore) { + } + } + return string; + } + + /** + * Convert a well-formed (but not necessarily valid) XML string into a + * JSONObject. Some information may be lost in this transformation because + * JSON is a data format and XML is a document format. XML uses elements, + * attributes, and content text, while JSON uses unordered collections of + * name/value pairs and arrays of values. JSON does not does not like to + * distinguish between elements and attributes. Sequences of similar + * elements are represented as JSONArrays. Content text may be placed in a + * "content" member. Comments, prologs, DTDs, and <[ [ ]]> + * are ignored. + * + * @param string + * The source string. + * @return A JSONObject containing the structured data from the XML string. + * @throws JSONException Thrown if there is an errors while parsing the string + */ + public static JSONObject toJSONObject(String string) throws JSONException { + return toJSONObject(string, false); + } + + /** + * Convert a well-formed (but not necessarily valid) XML into a + * JSONObject. Some information may be lost in this transformation because + * JSON is a data format and XML is a document format. XML uses elements, + * attributes, and content text, while JSON uses unordered collections of + * name/value pairs and arrays of values. JSON does not does not like to + * distinguish between elements and attributes. Sequences of similar + * elements are represented as JSONArrays. Content text may be placed in a + * "content" member. Comments, prologs, DTDs, and <[ [ ]]> + * are ignored. + * + * @param reader The XML source reader. + * @return A JSONObject containing the structured data from the XML string. + * @throws JSONException Thrown if there is an errors while parsing the string + */ + public static JSONObject toJSONObject(Reader reader) throws JSONException { + return toJSONObject(reader, false); + } + + /** + * Convert a well-formed (but not necessarily valid) XML into a + * JSONObject. Some information may be lost in this transformation because + * JSON is a data format and XML is a document format. XML uses elements, + * attributes, and content text, while JSON uses unordered collections of + * name/value pairs and arrays of values. JSON does not does not like to + * distinguish between elements and attributes. Sequences of similar + * elements are represented as JSONArrays. Content text may be placed in a + * "content" member. Comments, prologs, DTDs, and <[ [ ]]> + * are ignored. + * + * All values are converted as strings, for 1, 01, 29.0 will not be coerced to + * numbers but will instead be the exact value as seen in the XML document. + * + * @param reader The XML source reader. + * @param keepStrings If true, then values will not be coerced into boolean + * or numeric values and will instead be left as strings + * @return A JSONObject containing the structured data from the XML string. + * @throws JSONException Thrown if there is an errors while parsing the string + */ + public static JSONObject toJSONObject(Reader reader, boolean keepStrings) throws JSONException { + JSONObject jo = new JSONObject(); + XMLTokener x = new XMLTokener(reader); + while (x.more()) { + x.skipPast("<"); + if(x.more()) { + parse(x, jo, null, keepStrings); + } + } + return jo; + } + + /** + * Convert a well-formed (but not necessarily valid) XML string into a + * JSONObject. Some information may be lost in this transformation because + * JSON is a data format and XML is a document format. XML uses elements, + * attributes, and content text, while JSON uses unordered collections of + * name/value pairs and arrays of values. JSON does not does not like to + * distinguish between elements and attributes. Sequences of similar + * elements are represented as JSONArrays. Content text may be placed in a + * "content" member. Comments, prologs, DTDs, and <[ [ ]]> + * are ignored. + * + * All values are converted as strings, for 1, 01, 29.0 will not be coerced to + * numbers but will instead be the exact value as seen in the XML document. + * + * @param string + * The source string. + * @param keepStrings If true, then values will not be coerced into boolean + * or numeric values and will instead be left as strings + * @return A JSONObject containing the structured data from the XML string. + * @throws JSONException Thrown if there is an errors while parsing the string + */ + public static JSONObject toJSONObject(String string, boolean keepStrings) throws JSONException { + return toJSONObject(new StringReader(string), keepStrings); + } + + /** + * Convert a JSONObject into a well-formed, element-normal XML string. + * + * @param object + * A JSONObject. + * @return A string. + * @throws JSONException Thrown if there is an error parsing the string + */ + public static String toString(Object object) throws JSONException { + return toString(object, null); + } + + /** + * Convert a JSONObject into a well-formed, element-normal XML string. + * + * @param object + * A JSONObject. + * @param tagName + * The optional name of the enclosing tag. + * @return A string. + * @throws JSONException Thrown if there is an error parsing the string + */ + public static String toString(final Object object, final String tagName) + throws JSONException { + StringBuilder sb = new StringBuilder(); + JSONArray ja; + JSONObject jo; + String string; + + if (object instanceof JSONObject) { + + // Emit + if (tagName != null) { + sb.append('<'); + sb.append(tagName); + sb.append('>'); + } + + // Loop thru the keys. + // don't use the new entrySet accessor to maintain Android Support + jo = (JSONObject) object; + for (final String key : jo.keySet()) { + Object value = jo.opt(key); + if (value == null) { + value = ""; + } else if (value.getClass().isArray()) { + value = new JSONArray(value); + } + + // Emit content in body + if ("content".equals(key)) { + if (value instanceof JSONArray) { + ja = (JSONArray) value; + int jaLength = ja.length(); + // don't use the new iterator API to maintain support for Android + for (int i = 0; i < jaLength; i++) { + if (i > 0) { + sb.append('\n'); + } + Object val = ja.opt(i); + sb.append(escape(val.toString())); + } + } else { + sb.append(escape(value.toString())); + } + + // Emit an array of similar keys + + } else if (value instanceof JSONArray) { + ja = (JSONArray) value; + int jaLength = ja.length(); + // don't use the new iterator API to maintain support for Android + for (int i = 0; i < jaLength; i++) { + Object val = ja.opt(i); + if (val instanceof JSONArray) { + sb.append('<'); + sb.append(key); + sb.append('>'); + sb.append(toString(val)); + sb.append("'); + } else { + sb.append(toString(val, key)); + } + } + } else if ("".equals(value)) { + sb.append('<'); + sb.append(key); + sb.append("/>"); + + // Emit a new tag + + } else { + sb.append(toString(value, key)); + } + } + if (tagName != null) { + + // Emit the close tag + sb.append("'); + } + return sb.toString(); + + } + + if (object != null && (object instanceof JSONArray || object.getClass().isArray())) { + if(object.getClass().isArray()) { + ja = new JSONArray(object); + } else { + ja = (JSONArray) object; + } + int jaLength = ja.length(); + // don't use the new iterator API to maintain support for Android + for (int i = 0; i < jaLength; i++) { + Object val = ja.opt(i); + // XML does not have good support for arrays. If an array + // appears in a place where XML is lacking, synthesize an + // element. + sb.append(toString(val, tagName == null ? "array" : tagName)); + } + return sb.toString(); + } + + string = (object == null) ? "null" : escape(object.toString()); + return (tagName == null) ? "\"" + string + "\"" + : (string.length() == 0) ? "<" + tagName + "/>" : "<" + tagName + + ">" + string + ""; + + } +}