- return new ChunkHolder(value, watcher.getCursorPosition());\r
- }\r
-\r
- /**\r
- * This method ensures that the output String has only valid XML unicode\r
- * characters as specified by the XML 1.0 standard. For reference, please\r
- * see the standard.\r
- * \r
- * @param The\r
- * String whose non-valid characters we want to remove.\r
- * \r
- * @return The in String, stripped of non-valid characters.\r
- */\r
- static String removeInvalidXMLCharacters(String str) {\r
- assert str != null;\r
-\r
- StringBuilder out = new StringBuilder(); // Used to hold the output.\r
- int codePoint; // Used to reference the current character.\r
-\r
- // For test\r
- // String ss = "\ud801\udc00"; // This is actualy one unicode character,\r
- // represented by two code units!!!.\r
- // System.out.println(ss.codePointCount(0, ss.length()));// See: 1\r
- int i = 0;\r
- String value = null;\r
- try {\r
- // make sure the string contain only UTF-8 characters\r
- value = new String(str.getBytes("UTF-8"), "UTF-8");\r
- } catch (UnsupportedEncodingException e) {\r
- // will not happen\r
- throw new AssertionError("UTF-8 charset is not supported!!!");\r
+\r
+ /**\r
+ * This method ensures that the output String has only valid XML unicode\r
+ * characters as specified by the XML 1.0 standard. For reference, please\r
+ * see the standard.\r
+ * \r
+ * @param The\r
+ * String whose non-valid characters we want to remove.\r
+ * \r
+ * @return The in String, stripped of non-valid characters.\r
+ */\r
+ static String removeInvalidXMLCharacters(String str) {\r
+ assert str != null;\r
+\r
+ StringBuilder out = new StringBuilder(); // Used to hold the output.\r
+ int codePoint; // Used to reference the current character.\r
+\r
+ // For test\r
+ // String ss = "\ud801\udc00"; // This is actualy one unicode character,\r
+ // represented by two code units!!!.\r
+ // System.out.println(ss.codePointCount(0, ss.length()));// See: 1\r
+ int i = 0;\r
+ String value = null;\r
+ try {\r
+ // make sure the string contain only UTF-8 characters\r
+ value = new String(str.getBytes("UTF-8"), "UTF-8");\r
+ } catch (UnsupportedEncodingException e) {\r
+ // will not happen\r
+ throw new AssertionError("UTF-8 charset is not supported!!!");\r
+ }\r
+ while (i < value.length()) {\r
+ codePoint = value.codePointAt(i); // This is the unicode code of the\r
+ // character.\r
+ if ((codePoint == 0x9)\r
+ || // Consider testing larger ranges first to\r
+ // improve speed.\r
+ (codePoint == 0xA) || (codePoint == 0xD)\r
+ || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))\r
+ || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))\r
+ || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {\r
+\r
+ out.append(Character.toChars(codePoint));\r
+ }\r
+\r
+ i += Character.charCount(codePoint);\r
+ /*\r
+ * Increment with the number of code units(java chars) needed to\r
+ * represent a Unicode char.\r
+ */\r
+ }\r
+ return out.toString();\r