--- /dev/null
+/*\r
+ * Cay S. Horstmann & Gary Cornell, Core Java\r
+ * Published By Sun Microsystems Press/Prentice-Hall\r
+ * Copyright (C) 1997 Sun Microsystems Inc.\r
+ * All Rights Reserved.\r
+ *\r
+ * Permission to use, copy, modify, and distribute this \r
+ * software and its documentation for NON-COMMERCIAL purposes\r
+ * and without fee is hereby granted provided that this \r
+ * copyright notice appears in all copies. \r
+ * \r
+ * THE AUTHORS AND PUBLISHER MAKE NO REPRESENTATIONS OR \r
+ * WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER \r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE \r
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A \r
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS\r
+ * AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED \r
+ * BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING \r
+ * THIS SOFTWARE OR ITS DERIVATIVES.\r
+ */\r
+\r
+/**\r
+ * A class for formatting numbers that follows printf conventions.\r
+ * Also implements C-like atoi and atof functions\r
+ * @version 1.03 25 Oct 1997\r
+ * @author Cay Horstmann\r
+ */\r
+\r
+package org.vamsas.objects.utils;\r
+\r
+import java.io.*;\r
+\r
+public class Format { /**\r
+ * Formats the number following printf conventions.\r
+ * Main limitation: Can only handle one format parameter at a time\r
+ * Use multiple Format objects to format more than one number\r
+ * @param s the format string following printf conventions\r
+ * The string has a prefix, a format code and a suffix. The prefix and suffix\r
+ * become part of the formatted output. The format code directs the\r
+ * formatting of the (single) parameter to be formatted. The code has the\r
+ * following structure\r
+ * <ul>\r
+ * <li> a % (required)\r
+ * <li> a modifier (optional)\r
+ * <dl>\r
+ * <dt> + <dd> forces display of + for positive numbers\r
+ * <dt> 0 <dd> show leading zeroes\r
+ * <dt> - <dd> align left in the field\r
+ * <dt> space <dd> prepend a space in front of positive numbers\r
+ * <dt> # <dd> use "alternate" format. Add 0 or 0x for octal or hexadecimal numbers. Don't suppress trailing zeroes in general floating point format.\r
+ * </dl>\r
+ * <li> an integer denoting field width (optional)\r
+ * <li> a period followed by an integer denoting precision (optional)\r
+ * <li> a format descriptor (required)\r
+ * <dl>\r
+ * <dt>f <dd> floating point number in fixed format\r
+ * <dt>e, E <dd> floating point number in exponential notation (scientific format). The E format results in an uppercase E for the exponent (1.14130E+003), the e format in a lowercase e.\r
+ * <dt>g, G <dd> floating point number in general format (fixed format for small numbers, exponential format for large numbers). Trailing zeroes are suppressed. The G format results in an uppercase E for the exponent (if any), the g format in a lowercase e.\r
+ * <dt>d, i <dd> integer in decimal\r
+ * <dt>x <dd> integer in hexadecimal\r
+ * <dt>o <dd> integer in octal\r
+ * <dt>s <dd> string\r
+ * <dt>c <dd> character\r
+ * </dl>\r
+ * </ul>\r
+ * @exception IllegalArgumentException if bad format\r
+ */\r
+\r
+ public Format(String s) {\r
+ width = 0;\r
+ precision = -1;\r
+ pre = "";\r
+ post = "";\r
+ leading_zeroes = false;\r
+ show_plus = false;\r
+ alternate = false;\r
+ show_space = false;\r
+ left_align = false;\r
+ fmt = ' ';\r
+\r
+ int state = 0;\r
+ int length = s.length();\r
+ int parse_state = 0;\r
+ // 0 = prefix, 1 = flags, 2 = width, 3 = precision,\r
+ // 4 = format, 5 = end\r
+ int i = 0;\r
+\r
+ while (parse_state == 0) {\r
+ if (i >= length)\r
+ parse_state = 5;\r
+ else if (s.charAt(i) == '%') {\r
+ if (i < length - 1) {\r
+ if (s.charAt(i + 1) == '%') {\r
+ pre = pre + '%';\r
+ i++;\r
+ } else\r
+ parse_state = 1;\r
+ } else\r
+ throw new java.lang.IllegalArgumentException();\r
+ } else\r
+ pre = pre + s.charAt(i);\r
+ i++;\r
+ }\r
+ while (parse_state == 1) {\r
+ if (i >= length)\r
+ parse_state = 5;\r
+ else if (s.charAt(i) == ' ')\r
+ show_space = true;\r
+ else if (s.charAt(i) == '-')\r
+ left_align = true;\r
+ else if (s.charAt(i) == '+')\r
+ show_plus = true;\r
+ else if (s.charAt(i) == '0')\r
+ leading_zeroes = true;\r
+ else if (s.charAt(i) == '#')\r
+ alternate = true;\r
+ else {\r
+ parse_state = 2;\r
+ i--;\r
+ }\r
+ i++;\r
+ }\r
+ while (parse_state == 2) {\r
+ if (i >= length)\r
+ parse_state = 5;\r
+ else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') {\r
+ width = width * 10 + s.charAt(i) - '0';\r
+ i++;\r
+ } else if (s.charAt(i) == '.') {\r
+ parse_state = 3;\r
+ precision = 0;\r
+ i++;\r
+ } else\r
+ parse_state = 4;\r
+ }\r
+ while (parse_state == 3) {\r
+ if (i >= length)\r
+ parse_state = 5;\r
+ else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') {\r
+ precision = precision * 10 + s.charAt(i) - '0';\r
+ i++;\r
+ } else\r
+ parse_state = 4;\r
+ }\r
+ if (parse_state == 4) {\r
+ if (i >= length)\r
+ parse_state = 5;\r
+ else\r
+ fmt = s.charAt(i);\r
+ i++;\r
+ }\r
+ if (i < length)\r
+ post = s.substring(i, length);\r
+ }\r
+\r
+ /**\r
+ * prints a formatted number following printf conventions\r
+ * @param s a PrintStream\r
+ * @param fmt the format string\r
+ * @param x the double to print\r
+ */\r
+\r
+ public static void print(java.io.PrintStream s, String fmt, double x) {\r
+ s.print(new Format(fmt).form(x));\r
+ }\r
+\r
+ /**\r
+ * prints a formatted number following printf conventions\r
+ * @param s a PrintStream\r
+ * @param fmt the format string\r
+ * @param x the long to print\r
+ */\r
+ public static void print(java.io.PrintStream s, String fmt, long x) {\r
+ s.print(new Format(fmt).form(x));\r
+ }\r
+\r
+ /**\r
+ * prints a formatted number following printf conventions\r
+ * @param s a PrintStream\r
+ * @param fmt the format string\r
+ * @param x the character to \r
+ */\r
+\r
+ public static void print(java.io.PrintStream s, String fmt, char x) {\r
+ s.print(new Format(fmt).form(x));\r
+ }\r
+\r
+ /**\r
+ * prints a formatted number following printf conventions\r
+ * @param s a PrintStream, fmt the format string\r
+ * @param x a string that represents the digits to print\r
+ */\r
+\r
+ public static void print(java.io.PrintStream s, String fmt, String x) {\r
+ s.print(new Format(fmt).form(x));\r
+ }\r
+\r
+ /**\r
+ * Converts a string of digits (decimal, octal or hex) to an integer\r
+ * @param s a string\r
+ * @return the numeric value of the prefix of s representing a base 10 integer\r
+ */\r
+\r
+ public static int atoi(String s) {\r
+ return (int)atol(s);\r
+ }\r
+\r
+ /**\r
+ * Converts a string of digits (decimal, octal or hex) to a long integer\r
+ * @param s a string\r
+ * @return the numeric value of the prefix of s representing a base 10 integer\r
+ */\r
+\r
+ public static long atol(String s) {\r
+ int i = 0;\r
+\r
+ while (i < s.length() && Character.isWhitespace(s.charAt(i)))\r
+ i++;\r
+ if (i < s.length() && s.charAt(i) == '0') {\r
+ if (i + 1 < s.length() && (s.charAt(i + 1) == 'x' || s.charAt(i + 1) == 'X'))\r
+ return parseLong(s.substring(i + 2), 16);\r
+ else\r
+ return parseLong(s, 8);\r
+ } else\r
+ return parseLong(s, 10);\r
+ }\r
+\r
+ private static long parseLong(String s, int base) {\r
+ int i = 0;\r
+ int sign = 1;\r
+ long r = 0;\r
+\r
+ while (i < s.length() && Character.isWhitespace(s.charAt(i)))\r
+ i++;\r
+ if (i < s.length() && s.charAt(i) == '-') {\r
+ sign = -1;\r
+ i++;\r
+ } else if (i < s.length() && s.charAt(i) == '+') {\r
+ i++;\r
+ }\r
+ while (i < s.length()) {\r
+ char ch = s.charAt(i);\r
+ if ('0' <= ch && ch < '0' + base)\r
+ r = r * base + ch - '0';\r
+ else if ('A' <= ch && ch < 'A' + base - 10)\r
+ r = r * base + ch - 'A' + 10 ;\r
+ else if ('a' <= ch && ch < 'a' + base - 10)\r
+ r = r * base + ch - 'a' + 10 ;\r
+ else\r
+ return r * sign;\r
+ i++;\r
+ }\r
+ return r * sign;\r
+ }\r
+\r
+ /**\r
+ * Converts a string of digits to an double\r
+ * @param s a string\r
+ */\r
+\r
+ public static double atof(String s) {\r
+ int i = 0;\r
+ int sign = 1;\r
+ double r = 0; // integer part\r
+ double f = 0; // fractional part\r
+ double p = 1; // exponent of fractional part\r
+ int state = 0; // 0 = int part, 1 = frac part\r
+\r
+ while (i < s.length() && Character.isWhitespace(s.charAt(i)))\r
+ i++;\r
+ if (i < s.length() && s.charAt(i) == '-') {\r
+ sign = -1;\r
+ i++;\r
+ } else if (i < s.length() && s.charAt(i) == '+') {\r
+ i++;\r
+ }\r
+ while (i < s.length()) {\r
+ char ch = s.charAt(i);\r
+ if ('0' <= ch && ch <= '9') {\r
+ if (state == 0)\r
+ r = r * 10 + ch - '0';\r
+ else if (state == 1) {\r
+ p = p / 10;\r
+ r = r + p * (ch - '0');\r
+ }\r
+ } else if (ch == '.') {\r
+ if (state == 0)\r
+ state = 1;\r
+ else\r
+ return sign * r;\r
+ } else if (ch == 'e' || ch == 'E') {\r
+ long e = (int)parseLong(s.substring(i + 1), 10);\r
+ return sign * r * Math.pow(10, e);\r
+ } else\r
+ return sign * r;\r
+ i++;\r
+ }\r
+ return sign * r;\r
+ }\r
+\r
+ /**\r
+ * Formats a double into a string (like sprintf in C)\r
+ * @param x the number to format\r
+ * @return the formatted string \r
+ * @exception IllegalArgumentException if bad argument\r
+ */\r
+\r
+ public String form(double x) {\r
+ String r;\r
+ if (precision < 0)\r
+ precision = 6;\r
+ int s = 1;\r
+ if (x < 0) {\r
+ x = -x;\r
+ s = -1;\r
+ }\r
+ if (fmt == 'f')\r
+ r = fixed_format(x);\r
+ else if (fmt == 'e' || fmt == 'E' || fmt == 'g' || fmt == 'G')\r
+ r = exp_format(x);\r
+ else\r
+ throw new java.lang.IllegalArgumentException();\r
+\r
+ return pad(sign(s, r));\r
+ }\r
+\r
+ /**\r
+ * Formats a long integer into a string (like sprintf in C)\r
+ * @param x the number to format\r
+ * @return the formatted string \r
+ */\r
+\r
+ public String form(long x) {\r
+ String r;\r
+ int s = 0;\r
+ if (fmt == 'd' || fmt == 'i') {\r
+ if (x < 0) {\r
+ r = ("" + x).substring(1);\r
+ s = -1;\r
+ } else {\r
+ r = "" + x;\r
+ s = 1;\r
+ }\r
+ } else if (fmt == 'o')\r
+ r = convert(x, 3, 7, "01234567");\r
+ else if (fmt == 'x')\r
+ r = convert(x, 4, 15, "0123456789abcdef");\r
+ else if (fmt == 'X')\r
+ r = convert(x, 4, 15, "0123456789ABCDEF");\r
+ else\r
+ throw new java.lang.IllegalArgumentException();\r
+\r
+ return pad(sign(s, r));\r
+ }\r
+\r
+ /**\r
+ * Formats a character into a string (like sprintf in C)\r
+ * @param x the value to format\r
+ * @return the formatted string \r
+ */\r
+\r
+ public String form(char c) {\r
+ if (fmt != 'c')\r
+ throw new java.lang.IllegalArgumentException();\r
+\r
+ String r = "" + c;\r
+ return pad(r);\r
+ }\r
+\r
+ /**\r
+ * Formats a string into a larger string (like sprintf in C)\r
+ * @param x the value to format\r
+ * @return the formatted string \r
+ */\r
+\r
+ public String form(String s) {\r
+ if (fmt != 's')\r
+ throw new java.lang.IllegalArgumentException();\r
+ if (precision >= 0)\r
+ s = s.substring(0, precision);\r
+ return pad(s);\r
+ }\r
+\r
+\r
+ /**\r
+ * a test stub for the format class\r
+ */\r
+\r
+ public static void main(String[] a) {\r
+ double x = 1.23456789012;\r
+ double y = 123;\r
+ double z = 1.2345e30;\r
+ double w = 1.02;\r
+ double u = 1.234e-5;\r
+ int d = 0xCAFE;\r
+ Format.print(System.out, "x = |%f|\n", x);\r
+ Format.print(System.out, "u = |%20f|\n", u);\r
+ Format.print(System.out, "x = |% .5f|\n", x);\r
+ Format.print(System.out, "w = |%20.5f|\n", w);\r
+ Format.print(System.out, "x = |%020.5f|\n", x);\r
+ Format.print(System.out, "x = |%+20.5f|\n", x);\r
+ Format.print(System.out, "x = |%+020.5f|\n", x);\r
+ Format.print(System.out, "x = |% 020.5f|\n", x);\r
+ Format.print(System.out, "y = |%#+20.5f|\n", y);\r
+ Format.print(System.out, "y = |%-+20.5f|\n", y);\r
+ Format.print(System.out, "z = |%20.5f|\n", z);\r
+\r
+ Format.print(System.out, "x = |%e|\n", x);\r
+ Format.print(System.out, "u = |%20e|\n", u);\r
+ Format.print(System.out, "x = |% .5e|\n", x);\r
+ Format.print(System.out, "w = |%20.5e|\n", w);\r
+ Format.print(System.out, "x = |%020.5e|\n", x);\r
+ Format.print(System.out, "x = |%+20.5e|\n", x);\r
+ Format.print(System.out, "x = |%+020.5e|\n", x);\r
+ Format.print(System.out, "x = |% 020.5e|\n", x);\r
+ Format.print(System.out, "y = |%#+20.5e|\n", y);\r
+ Format.print(System.out, "y = |%-+20.5e|\n", y);\r
+\r
+ Format.print(System.out, "x = |%g|\n", x);\r
+ Format.print(System.out, "z = |%g|\n", z);\r
+ Format.print(System.out, "w = |%g|\n", w);\r
+ Format.print(System.out, "u = |%g|\n", u);\r
+ Format.print(System.out, "y = |%.2g|\n", y);\r
+ Format.print(System.out, "y = |%#.2g|\n", y);\r
+\r
+ Format.print(System.out, "d = |%d|\n", d);\r
+ Format.print(System.out, "d = |%20d|\n", d);\r
+ Format.print(System.out, "d = |%020d|\n", d);\r
+ Format.print(System.out, "d = |%+20d|\n", d);\r
+ Format.print(System.out, "d = |% 020d|\n", d);\r
+ Format.print(System.out, "d = |%-20d|\n", d);\r
+ Format.print(System.out, "d = |%20.8d|\n", d);\r
+ Format.print(System.out, "d = |%x|\n", d);\r
+ Format.print(System.out, "d = |%20X|\n", d);\r
+ Format.print(System.out, "d = |%#20x|\n", d);\r
+ Format.print(System.out, "d = |%020X|\n", d);\r
+ Format.print(System.out, "d = |%20.8x|\n", d);\r
+ Format.print(System.out, "d = |%o|\n", d);\r
+ Format.print(System.out, "d = |%020o|\n", d);\r
+ Format.print(System.out, "d = |%#20o|\n", d);\r
+ Format.print(System.out, "d = |%#020o|\n", d);\r
+ Format.print(System.out, "d = |%20.12o|\n", d);\r
+\r
+ Format.print(System.out, "s = |%-20s|\n", "Hello");\r
+ Format.print(System.out, "s = |%-20c|\n", '!');\r
+\r
+ // regression test to confirm fix of reported bugs\r
+\r
+ Format.print(System.out, "|%i|\n", Long.MIN_VALUE);\r
+\r
+ Format.print(System.out, "|%6.2e|\n", 0.0);\r
+ Format.print(System.out, "|%6.2g|\n", 0.0);\r
+\r
+ Format.print(System.out, "|%6.2f|\n", 9.99);\r
+ Format.print(System.out, "|%6.2f|\n", 9.999);\r
+\r
+ Format.print(System.out, "|%6.0f|\n", 9.999);\r
+ }\r
+\r
+ private static String repeat(char c, int n) {\r
+ if (n <= 0)\r
+ return "";\r
+ StringBuffer s = new StringBuffer(n);\r
+ for (int i = 0; i < n; i++)\r
+ s.append(c);\r
+ return s.toString();\r
+ }\r
+\r
+ private static String convert(long x, int n, int m, String d) {\r
+ if (x == 0)\r
+ return "0";\r
+ String r = "";\r
+ while (x != 0) {\r
+ r = d.charAt((int)(x & m)) + r;\r
+ x = x >>> n;\r
+ }\r
+ return r;\r
+ }\r
+\r
+ private String pad(String r) {\r
+ String p = repeat(' ', width - r.length());\r
+ if (left_align)\r
+ return pre + r + p + post;\r
+ else\r
+ return pre + p + r + post;\r
+ }\r
+\r
+ private String sign(int s, String r) {\r
+ String p = "";\r
+ if (s < 0)\r
+ p = "-";\r
+ else if (s > 0) {\r
+ if (show_plus)\r
+ p = "+";\r
+ else if (show_space)\r
+ p = " ";\r
+ } else {\r
+ if (fmt == 'o' && alternate && r.length() > 0 && r.charAt(0) != '0')\r
+ p = "0";\r
+ else if (fmt == 'x' && alternate)\r
+ p = "0x";\r
+ else if (fmt == 'X' && alternate)\r
+ p = "0X";\r
+ }\r
+ int w = 0;\r
+ if (leading_zeroes)\r
+ w = width;\r
+ else if ((fmt == 'd' || fmt == 'i' || fmt == 'x' || fmt == 'X' || fmt == 'o')\r
+ && precision > 0)\r
+ w = precision;\r
+\r
+ return p + repeat('0', w - p.length() - r.length()) + r;\r
+ }\r
+\r
+ private String fixed_format(double d) {\r
+ boolean removeTrailing\r
+ = (fmt == 'G' || fmt == 'g') && !alternate;\r
+ // remove trailing zeroes and decimal point\r
+\r
+ if (d > 0x7FFFFFFFFFFFFFFFL)\r
+ return exp_format(d);\r
+ if (precision == 0)\r
+ return (long)(d + 0.5) + (removeTrailing ? "" : ".");\r
+\r
+ long whole = (long)d;\r
+ double fr = d - whole; // fractional part\r
+ if (fr >= 1 || fr < 0)\r
+ return exp_format(d);\r
+\r
+ double factor = 1;\r
+ String leading_zeroes = "";\r
+ for (int i = 1; i <= precision && factor <= 0x7FFFFFFFFFFFFFFFL; i++) {\r
+ factor *= 10;\r
+ leading_zeroes = leading_zeroes + "0";\r
+ }\r
+ long l = (long) (factor * fr + 0.5);\r
+ if (l >= factor) {\r
+ l = 0;\r
+ whole++;\r
+ } // CSH 10-25-97\r
+\r
+ String z = leading_zeroes + l;\r
+ z = "." + z.substring(z.length() - precision, z.length());\r
+\r
+ if (removeTrailing) {\r
+ int t = z.length() - 1;\r
+ while (t >= 0 && z.charAt(t) == '0')\r
+ t--;\r
+ if (t >= 0 && z.charAt(t) == '.')\r
+ t--;\r
+ z = z.substring(0, t + 1);\r
+ }\r
+\r
+ return whole + z;\r
+ }\r
+\r
+ private String exp_format(double d) {\r
+ String f = "";\r
+ int e = 0;\r
+ double dd = d;\r
+ double factor = 1;\r
+ if (d != 0) {\r
+ while (dd > 10) {\r
+ e++;\r
+ factor /= 10;\r
+ dd = dd / 10;\r
+ }\r
+ while (dd < 1) {\r
+ e--;\r
+ factor *= 10;\r
+ dd = dd * 10;\r
+ }\r
+ }\r
+ if ((fmt == 'g' || fmt == 'G') && e >= -4 && e < precision)\r
+ return fixed_format(d);\r
+\r
+ d = d * factor;\r
+ f = f + fixed_format(d);\r
+\r
+ if (fmt == 'e' || fmt == 'g')\r
+ f = f + "e";\r
+ else\r
+ f = f + "E";\r
+\r
+ String p = "000";\r
+ if (e >= 0) {\r
+ f = f + "+";\r
+ p = p + e;\r
+ } else {\r
+ f = f + "-";\r
+ p = p + (-e);\r
+ }\r
+\r
+ return f + p.substring(p.length() - 3, p.length());\r
+ }\r
+\r
+ private int width;\r
+ private int precision;\r
+ private String pre;\r
+ private String post;\r
+ private boolean leading_zeroes;\r
+ private boolean show_plus;\r
+ private boolean alternate;\r
+ private boolean show_space;\r
+ private boolean left_align;\r
+ private char fmt; // one of cdeEfgGiosxXos\r
+}\r
+\r
+\r
+\r
+\r
+\r
--- /dev/null
+/*
+ * Created on 17-May-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.vamsas.objects.utils;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Hashtable;
+import java.util.StringTokenizer;
+import java.util.Vector;
+import java.util.regex.Pattern;
+
+import vamsas.objects.simple.Alignment;
+import vamsas.objects.simple.Sequence;
+import vamsas.objects.simple.SequenceSet;
+
+/**
+ * @author jimp
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class SeqAln extends vamsas.objects.simple.Alignment {
+
+ public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
+
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ String gapchars = "";
+ char gapchar='-';
+
+ int i = 0;
+ boolean flag = false;
+
+ Vector headers = new Vector();
+ Hashtable seqhash = new Hashtable();
+ Sequence[] seqs=null;
+ int noSeqs = 0;
+ String line;
+
+ try {
+ BufferedReader ins = new BufferedReader(new InputStreamReader(os));
+ while ((line = ins.readLine()) != null) {
+ if (line.indexOf(" ") != 0) {
+ java.util.StringTokenizer str = new StringTokenizer(line," ");
+ String id = "";
+
+ if (str.hasMoreTokens()) {
+ id = str.nextToken();
+ if (id.equals("CLUSTAL")) {
+ flag = true;
+ } else {
+ if (flag) {
+ StringBuffer tempseq;
+ if (seqhash.containsKey(id)) {
+ tempseq = (StringBuffer)seqhash.get(id);
+ } else {
+ tempseq = new StringBuffer();
+ seqhash.put(id,tempseq);
+ }
+
+ if (!(headers.contains(id))) {
+ headers.addElement(id);
+ }
+
+ tempseq.append(str.nextToken());
+ }
+ }
+ }
+ }
+ }
+
+ } catch (IOException e) {
+ throw(new Exception("Exception parsing clustal file ",e));
+ }
+
+ if (flag) {
+ noSeqs = headers.size();
+
+ //Add sequences to the hash
+ seqs = new Sequence[headers.size()];
+ for (i = 0; i < headers.size(); i++ ) {
+ if ( seqhash.get(headers.elementAt(i)) != null) {
+
+ Sequence newSeq = new Sequence(headers.elementAt(i).toString(),
+ seqhash.get(headers.elementAt(i).toString()).toString());
+
+ seqs[i]=newSeq;
+
+ } else {
+ throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i)));
+ }
+ }
+ }
+ return seqs;
+ }
+
+ public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException {
+ Sequence[] s = seqAl.getSeqs().getSeqs();
+
+ java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os));
+
+ out.write("CLUSTAL\n\n");
+
+ int max = 0;
+ int maxid = 0;
+
+ int i = 0;
+
+ while (i < s.length && s[i] != null) {
+ String tmp = s[i].getId();
+
+ if (s[i].getSeq().length() > max) {
+ max = s[i].getSeq().length();
+ }
+ if (tmp.length() > maxid) {
+ maxid = tmp.length();
+ }
+ i++;
+ }
+
+ if (maxid < 15) {
+ maxid = 15;
+ }
+ maxid++;
+ int len = 60;
+ int nochunks = max / len + 1;
+
+ for (i = 0; i < nochunks; i++) {
+ int j = 0;
+ while ( j < s.length && s[j] != null) {
+ out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" "));
+ int start = i*len;
+ int end = start + len;
+
+ if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) {
+ out.write(s[j].getSeq().substring(start,end) + "\n");
+ } else {
+ if (start < s[j].getSeq().length()) {
+ out.write(s[j].getSeq().substring(start) + "\n");
+ }
+ }
+ j++;
+ }
+ out.write("\n");
+
+ }
+ }
+
+
+ public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception {
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ boolean gapsset = false;
+ char gapchar='-';
+ int seqLength = 0;
+
+
+ for (int i=0, nseq=seqs.length; i<nseq; i++) {
+ String seq = seqs[i].getSeq();
+ String gaps = nonGap.matcher(seq).replaceAll("");
+ if (seqLength==0) {
+ seqLength=seq.length();
+ } else
+ if (seqLength!=seq.length())
+ throw(new Exception(i+"th Sequence (>"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment
+
+ // common check for any sequence...
+ if (gaps!=null && gaps.length()>0) {
+ if (!gapsset)
+ gapchar = gaps.charAt(0);
+ for (int c=0, gc=gaps.length(); c<gc; c++) {
+ if (gapchar!=gaps.charAt(c)) {
+ throw(new IOException("Inconsistent gap characters in sequence "+i+": '"+seq+"'"));
+ }
+ }
+ }
+ }
+
+ return new Alignment(origin, new SequenceSet(seqs), source, new String(""+gapchar));
+ }
+
+ public static Alignment read_FastaAlignment(InputStream os, String[] source) throws Exception {
+ Sequence[] seqs;
+ try {
+ seqs = SeqSet.read_SeqFasta(os);
+ if (seqs==null)
+ throw(new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n",e);
+ }
+
+ return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
+ }
+ public static Alignment read_ClustalAlignment(InputStream os, String[] source) throws Exception {
+ Sequence[] seqs;
+ try {
+ seqs = SeqAln.ReadClustalFile(os);
+ if (seqs==null)
+ throw(new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n",e);
+ }
+
+ return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
+ }
+}
--- /dev/null
+/*
+ * Created on 17-May-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.vamsas.objects.utils;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.Hashtable;
+import java.util.Vector;
+import java.util.regex.Pattern;
+
+import vamsas.objects.simple.Sequence;
+
+/**
+ * @author jimp
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class SeqSet extends vamsas.objects.simple.SequenceSet {
+
+ public static void write_Fasta(OutputStream os, Sequence[] seqs) throws IOException {
+ write_Fasta(os, seqs, 80);
+ }
+
+ public static void write_Fasta(OutputStream os, Sequence[] seqs, boolean width80) throws IOException {
+ write_Fasta(os, seqs, (width80) ? 80 : 0);
+ }
+
+ public static void write_Fasta(OutputStream os, Sequence[] seqs, int width) throws IOException {
+ int i, nseq = seqs.length;
+ BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
+ for (i=0; i<nseq; i++) {
+ fasta_out.write(">"+seqs[i].getId()+"\n");
+ if (width<=0) {
+ fasta_out.write(seqs[i].getSeq()+"\n");
+ } else {
+ String tempseq = seqs[i].getSeq();
+ int j=0, k=tempseq.length();
+ while (j<k) {
+ int d = k-j;
+ if (d>=width) {
+ fasta_out.write(tempseq, j, width);
+ } else {
+ fasta_out.write(tempseq, j, d);
+ }
+ fasta_out.write("\n");
+ j+=width;
+ }
+ }
+ }
+ fasta_out.flush();
+ }
+ /**
+ * TODO: introduce a dictionary parameter for qualified sequence symbols
+ * Reads a sequence set from a stream - will only read prescribed amino acid
+ * symbols.
+ * @param os
+ * @return
+ * @throws IOException
+ */
+ public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
+ Vector seqs = new Vector();
+ int nseq = 0;
+ BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
+
+ /// TODO: decide on where this routine should live... current best guess is vamsas.objects.io
+ String line;
+ Sequence seq = null;
+ Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
+ String sname = "", seqstr=null;
+ do {
+ line = infasta.readLine();
+ if (line==null || line.startsWith(">")) {
+ if (seqstr!=null)
+ seqs.add((Object) new Sequence(sname.substring(1), seqstr));
+ sname = line; // remove >
+ seqstr="";
+ } else {
+ String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
+ seqstr += subseq;
+ }
+ } while (line!=null);
+ nseq = seqs.size();
+ if (nseq>0) {
+ // TODO:POSS: should really return a sequence if there's only one in the file.
+ Sequence[] seqset = new Sequence[nseq];
+ for (int i=0; i<nseq; i++) {
+ seqset[i] = (Sequence) seqs.elementAt(i);
+ }
+ return seqset;
+ }
+
+ return null;
+ }
+
+ public static Hashtable uniquify(Sequence[] sequences) {
+ // Generate a safely named sequence set and a hash to recover the sequence names
+ Hashtable map = new Hashtable();
+ for (int i = 0; i < sequences.length; i++) {
+ String safename = new String("Sequence" + i);
+ map.put(safename, sequences[i].getId());
+ sequences[i].setId(safename);
+ }
+ return map;
+ }
+
+ public static boolean deuniquify(Hashtable map, Sequence[] sequences) {
+ // recover unsafe sequence names for a sequence set
+ boolean allfound = true;
+ for (int i = 0; i < sequences.length; i++) {
+ if (map.containsKey(sequences[i].getId())) {
+ String unsafename = (String) map.get(sequences[i].getId());
+ sequences[i].setId(unsafename);
+ } else {
+ allfound = false;
+ }
+ }
+ return allfound;
+ }
+
+}