From 96c48c81c184ee5b2f0b75412624c3e371caa49a Mon Sep 17 00:00:00 2001 From: jprocter Date: Fri, 13 Jan 2006 15:04:06 +0000 Subject: [PATCH] methods for making and accessing objects in org.vamsa.objects.core git-svn-id: https://svn.lifesci.dundee.ac.uk/svn/repository/trunk@111 be28352e-c001-0410-b1a7-c7978e42abec --- src/org/vamsas/objects/utils/Format.java | 612 ++++++++++++++++++++++++++++++ src/org/vamsas/objects/utils/Seq.java | 97 +++++ src/org/vamsas/objects/utils/SeqAln.java | 212 +++++++++++ src/org/vamsas/objects/utils/SeqSet.java | 130 +++++++ 4 files changed, 1051 insertions(+) create mode 100644 src/org/vamsas/objects/utils/Format.java create mode 100644 src/org/vamsas/objects/utils/Seq.java create mode 100644 src/org/vamsas/objects/utils/SeqAln.java create mode 100644 src/org/vamsas/objects/utils/SeqSet.java diff --git a/src/org/vamsas/objects/utils/Format.java b/src/org/vamsas/objects/utils/Format.java new file mode 100644 index 0000000..43db402 --- /dev/null +++ b/src/org/vamsas/objects/utils/Format.java @@ -0,0 +1,612 @@ +/* + * Cay S. Horstmann & Gary Cornell, Core Java + * Published By Sun Microsystems Press/Prentice-Hall + * Copyright (C) 1997 Sun Microsystems Inc. + * All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software and its documentation for NON-COMMERCIAL purposes + * and without fee is hereby granted provided that this + * copyright notice appears in all copies. + * + * THE AUTHORS AND PUBLISHER MAKE NO REPRESENTATIONS OR + * WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS + * AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED + * BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING + * THIS SOFTWARE OR ITS DERIVATIVES. + */ + +/** + * A class for formatting numbers that follows printf conventions. + * Also implements C-like atoi and atof functions + * @version 1.03 25 Oct 1997 + * @author Cay Horstmann + */ + +package org.vamsas.objects.utils; + +import java.io.*; + +public class Format { /** + * Formats the number following printf conventions. + * Main limitation: Can only handle one format parameter at a time + * Use multiple Format objects to format more than one number + * @param s the format string following printf conventions + * The string has a prefix, a format code and a suffix. The prefix and suffix + * become part of the formatted output. The format code directs the + * formatting of the (single) parameter to be formatted. The code has the + * following structure + * + * @exception IllegalArgumentException if bad format + */ + + public Format(String s) { + width = 0; + precision = -1; + pre = ""; + post = ""; + leading_zeroes = false; + show_plus = false; + alternate = false; + show_space = false; + left_align = false; + fmt = ' '; + + int state = 0; + int length = s.length(); + int parse_state = 0; + // 0 = prefix, 1 = flags, 2 = width, 3 = precision, + // 4 = format, 5 = end + int i = 0; + + while (parse_state == 0) { + if (i >= length) + parse_state = 5; + else if (s.charAt(i) == '%') { + if (i < length - 1) { + if (s.charAt(i + 1) == '%') { + pre = pre + '%'; + i++; + } else + parse_state = 1; + } else + throw new java.lang.IllegalArgumentException(); + } else + pre = pre + s.charAt(i); + i++; + } + while (parse_state == 1) { + if (i >= length) + parse_state = 5; + else if (s.charAt(i) == ' ') + show_space = true; + else if (s.charAt(i) == '-') + left_align = true; + else if (s.charAt(i) == '+') + show_plus = true; + else if (s.charAt(i) == '0') + leading_zeroes = true; + else if (s.charAt(i) == '#') + alternate = true; + else { + parse_state = 2; + i--; + } + i++; + } + while (parse_state == 2) { + if (i >= length) + parse_state = 5; + else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') { + width = width * 10 + s.charAt(i) - '0'; + i++; + } else if (s.charAt(i) == '.') { + parse_state = 3; + precision = 0; + i++; + } else + parse_state = 4; + } + while (parse_state == 3) { + if (i >= length) + parse_state = 5; + else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') { + precision = precision * 10 + s.charAt(i) - '0'; + i++; + } else + parse_state = 4; + } + if (parse_state == 4) { + if (i >= length) + parse_state = 5; + else + fmt = s.charAt(i); + i++; + } + if (i < length) + post = s.substring(i, length); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the double to print + */ + + public static void print(java.io.PrintStream s, String fmt, double x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the long to print + */ + public static void print(java.io.PrintStream s, String fmt, long x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the character to + */ + + public static void print(java.io.PrintStream s, String fmt, char x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream, fmt the format string + * @param x a string that represents the digits to print + */ + + public static void print(java.io.PrintStream s, String fmt, String x) { + s.print(new Format(fmt).form(x)); + } + + /** + * Converts a string of digits (decimal, octal or hex) to an integer + * @param s a string + * @return the numeric value of the prefix of s representing a base 10 integer + */ + + public static int atoi(String s) { + return (int)atol(s); + } + + /** + * Converts a string of digits (decimal, octal or hex) to a long integer + * @param s a string + * @return the numeric value of the prefix of s representing a base 10 integer + */ + + public static long atol(String s) { + int i = 0; + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '0') { + if (i + 1 < s.length() && (s.charAt(i + 1) == 'x' || s.charAt(i + 1) == 'X')) + return parseLong(s.substring(i + 2), 16); + else + return parseLong(s, 8); + } else + return parseLong(s, 10); + } + + private static long parseLong(String s, int base) { + int i = 0; + int sign = 1; + long r = 0; + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '-') { + sign = -1; + i++; + } else if (i < s.length() && s.charAt(i) == '+') { + i++; + } + while (i < s.length()) { + char ch = s.charAt(i); + if ('0' <= ch && ch < '0' + base) + r = r * base + ch - '0'; + else if ('A' <= ch && ch < 'A' + base - 10) + r = r * base + ch - 'A' + 10 ; + else if ('a' <= ch && ch < 'a' + base - 10) + r = r * base + ch - 'a' + 10 ; + else + return r * sign; + i++; + } + return r * sign; + } + + /** + * Converts a string of digits to an double + * @param s a string + */ + + public static double atof(String s) { + int i = 0; + int sign = 1; + double r = 0; // integer part + double f = 0; // fractional part + double p = 1; // exponent of fractional part + int state = 0; // 0 = int part, 1 = frac part + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '-') { + sign = -1; + i++; + } else if (i < s.length() && s.charAt(i) == '+') { + i++; + } + while (i < s.length()) { + char ch = s.charAt(i); + if ('0' <= ch && ch <= '9') { + if (state == 0) + r = r * 10 + ch - '0'; + else if (state == 1) { + p = p / 10; + r = r + p * (ch - '0'); + } + } else if (ch == '.') { + if (state == 0) + state = 1; + else + return sign * r; + } else if (ch == 'e' || ch == 'E') { + long e = (int)parseLong(s.substring(i + 1), 10); + return sign * r * Math.pow(10, e); + } else + return sign * r; + i++; + } + return sign * r; + } + + /** + * Formats a double into a string (like sprintf in C) + * @param x the number to format + * @return the formatted string + * @exception IllegalArgumentException if bad argument + */ + + public String form(double x) { + String r; + if (precision < 0) + precision = 6; + int s = 1; + if (x < 0) { + x = -x; + s = -1; + } + if (fmt == 'f') + r = fixed_format(x); + else if (fmt == 'e' || fmt == 'E' || fmt == 'g' || fmt == 'G') + r = exp_format(x); + else + throw new java.lang.IllegalArgumentException(); + + return pad(sign(s, r)); + } + + /** + * Formats a long integer into a string (like sprintf in C) + * @param x the number to format + * @return the formatted string + */ + + public String form(long x) { + String r; + int s = 0; + if (fmt == 'd' || fmt == 'i') { + if (x < 0) { + r = ("" + x).substring(1); + s = -1; + } else { + r = "" + x; + s = 1; + } + } else if (fmt == 'o') + r = convert(x, 3, 7, "01234567"); + else if (fmt == 'x') + r = convert(x, 4, 15, "0123456789abcdef"); + else if (fmt == 'X') + r = convert(x, 4, 15, "0123456789ABCDEF"); + else + throw new java.lang.IllegalArgumentException(); + + return pad(sign(s, r)); + } + + /** + * Formats a character into a string (like sprintf in C) + * @param x the value to format + * @return the formatted string + */ + + public String form(char c) { + if (fmt != 'c') + throw new java.lang.IllegalArgumentException(); + + String r = "" + c; + return pad(r); + } + + /** + * Formats a string into a larger string (like sprintf in C) + * @param x the value to format + * @return the formatted string + */ + + public String form(String s) { + if (fmt != 's') + throw new java.lang.IllegalArgumentException(); + if (precision >= 0) + s = s.substring(0, precision); + return pad(s); + } + + + /** + * a test stub for the format class + */ + + public static void main(String[] a) { + double x = 1.23456789012; + double y = 123; + double z = 1.2345e30; + double w = 1.02; + double u = 1.234e-5; + int d = 0xCAFE; + Format.print(System.out, "x = |%f|\n", x); + Format.print(System.out, "u = |%20f|\n", u); + Format.print(System.out, "x = |% .5f|\n", x); + Format.print(System.out, "w = |%20.5f|\n", w); + Format.print(System.out, "x = |%020.5f|\n", x); + Format.print(System.out, "x = |%+20.5f|\n", x); + Format.print(System.out, "x = |%+020.5f|\n", x); + Format.print(System.out, "x = |% 020.5f|\n", x); + Format.print(System.out, "y = |%#+20.5f|\n", y); + Format.print(System.out, "y = |%-+20.5f|\n", y); + Format.print(System.out, "z = |%20.5f|\n", z); + + Format.print(System.out, "x = |%e|\n", x); + Format.print(System.out, "u = |%20e|\n", u); + Format.print(System.out, "x = |% .5e|\n", x); + Format.print(System.out, "w = |%20.5e|\n", w); + Format.print(System.out, "x = |%020.5e|\n", x); + Format.print(System.out, "x = |%+20.5e|\n", x); + Format.print(System.out, "x = |%+020.5e|\n", x); + Format.print(System.out, "x = |% 020.5e|\n", x); + Format.print(System.out, "y = |%#+20.5e|\n", y); + Format.print(System.out, "y = |%-+20.5e|\n", y); + + Format.print(System.out, "x = |%g|\n", x); + Format.print(System.out, "z = |%g|\n", z); + Format.print(System.out, "w = |%g|\n", w); + Format.print(System.out, "u = |%g|\n", u); + Format.print(System.out, "y = |%.2g|\n", y); + Format.print(System.out, "y = |%#.2g|\n", y); + + Format.print(System.out, "d = |%d|\n", d); + Format.print(System.out, "d = |%20d|\n", d); + Format.print(System.out, "d = |%020d|\n", d); + Format.print(System.out, "d = |%+20d|\n", d); + Format.print(System.out, "d = |% 020d|\n", d); + Format.print(System.out, "d = |%-20d|\n", d); + Format.print(System.out, "d = |%20.8d|\n", d); + Format.print(System.out, "d = |%x|\n", d); + Format.print(System.out, "d = |%20X|\n", d); + Format.print(System.out, "d = |%#20x|\n", d); + Format.print(System.out, "d = |%020X|\n", d); + Format.print(System.out, "d = |%20.8x|\n", d); + Format.print(System.out, "d = |%o|\n", d); + Format.print(System.out, "d = |%020o|\n", d); + Format.print(System.out, "d = |%#20o|\n", d); + Format.print(System.out, "d = |%#020o|\n", d); + Format.print(System.out, "d = |%20.12o|\n", d); + + Format.print(System.out, "s = |%-20s|\n", "Hello"); + Format.print(System.out, "s = |%-20c|\n", '!'); + + // regression test to confirm fix of reported bugs + + Format.print(System.out, "|%i|\n", Long.MIN_VALUE); + + Format.print(System.out, "|%6.2e|\n", 0.0); + Format.print(System.out, "|%6.2g|\n", 0.0); + + Format.print(System.out, "|%6.2f|\n", 9.99); + Format.print(System.out, "|%6.2f|\n", 9.999); + + Format.print(System.out, "|%6.0f|\n", 9.999); + } + + private static String repeat(char c, int n) { + if (n <= 0) + return ""; + StringBuffer s = new StringBuffer(n); + for (int i = 0; i < n; i++) + s.append(c); + return s.toString(); + } + + private static String convert(long x, int n, int m, String d) { + if (x == 0) + return "0"; + String r = ""; + while (x != 0) { + r = d.charAt((int)(x & m)) + r; + x = x >>> n; + } + return r; + } + + private String pad(String r) { + String p = repeat(' ', width - r.length()); + if (left_align) + return pre + r + p + post; + else + return pre + p + r + post; + } + + private String sign(int s, String r) { + String p = ""; + if (s < 0) + p = "-"; + else if (s > 0) { + if (show_plus) + p = "+"; + else if (show_space) + p = " "; + } else { + if (fmt == 'o' && alternate && r.length() > 0 && r.charAt(0) != '0') + p = "0"; + else if (fmt == 'x' && alternate) + p = "0x"; + else if (fmt == 'X' && alternate) + p = "0X"; + } + int w = 0; + if (leading_zeroes) + w = width; + else if ((fmt == 'd' || fmt == 'i' || fmt == 'x' || fmt == 'X' || fmt == 'o') + && precision > 0) + w = precision; + + return p + repeat('0', w - p.length() - r.length()) + r; + } + + private String fixed_format(double d) { + boolean removeTrailing + = (fmt == 'G' || fmt == 'g') && !alternate; + // remove trailing zeroes and decimal point + + if (d > 0x7FFFFFFFFFFFFFFFL) + return exp_format(d); + if (precision == 0) + return (long)(d + 0.5) + (removeTrailing ? "" : "."); + + long whole = (long)d; + double fr = d - whole; // fractional part + if (fr >= 1 || fr < 0) + return exp_format(d); + + double factor = 1; + String leading_zeroes = ""; + for (int i = 1; i <= precision && factor <= 0x7FFFFFFFFFFFFFFFL; i++) { + factor *= 10; + leading_zeroes = leading_zeroes + "0"; + } + long l = (long) (factor * fr + 0.5); + if (l >= factor) { + l = 0; + whole++; + } // CSH 10-25-97 + + String z = leading_zeroes + l; + z = "." + z.substring(z.length() - precision, z.length()); + + if (removeTrailing) { + int t = z.length() - 1; + while (t >= 0 && z.charAt(t) == '0') + t--; + if (t >= 0 && z.charAt(t) == '.') + t--; + z = z.substring(0, t + 1); + } + + return whole + z; + } + + private String exp_format(double d) { + String f = ""; + int e = 0; + double dd = d; + double factor = 1; + if (d != 0) { + while (dd > 10) { + e++; + factor /= 10; + dd = dd / 10; + } + while (dd < 1) { + e--; + factor *= 10; + dd = dd * 10; + } + } + if ((fmt == 'g' || fmt == 'G') && e >= -4 && e < precision) + return fixed_format(d); + + d = d * factor; + f = f + fixed_format(d); + + if (fmt == 'e' || fmt == 'g') + f = f + "e"; + else + f = f + "E"; + + String p = "000"; + if (e >= 0) { + f = f + "+"; + p = p + e; + } else { + f = f + "-"; + p = p + (-e); + } + + return f + p.substring(p.length() - 3, p.length()); + } + + private int width; + private int precision; + private String pre; + private String post; + private boolean leading_zeroes; + private boolean show_plus; + private boolean alternate; + private boolean show_space; + private boolean left_align; + private char fmt; // one of cdeEfgGiosxXos +} + + + + + diff --git a/src/org/vamsas/objects/utils/Seq.java b/src/org/vamsas/objects/utils/Seq.java new file mode 100644 index 0000000..8d9371d --- /dev/null +++ b/src/org/vamsas/objects/utils/Seq.java @@ -0,0 +1,97 @@ +/* + * Created on 17-May-2005 + * + * TODO To change the template for this generated file go to + * Window - Preferences - Java - Code Style - Code Templates + */ +package org.vamsas.objects.utils; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.regex.Pattern; + +import org.vamsas.objects.core.AlignmentSequence; +import org.vamsas.objects.core.Sequence; +import org.vamsas.objects.core.SequenceType; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to + * Window - Preferences - Java - Code Style - Code Templates + */ +public class Seq { + + public static void write_PirSeq(OutputStream os, SequenceType seq, int wid) throws IOException { + BufferedWriter pir_out = new BufferedWriter(new OutputStreamWriter(os)); + pir_out.write(">P1;"+seq.getName()+"\n"); + int width = (wid<1) ? 80 : wid; + for (int j=0,k=seq.getSequence().length(); j"+seq.getName()+"\n"); + fasta_out.write(seq.getSequence()+"\n"); + fasta_out.flush(); + } + + public static void write_FastaSeq(OutputStream os, SequenceType seq, int wid) throws IOException { + BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); + fasta_out.write(">"+seq.getName()+"\n"); + int width = (wid<1) ? 80 : wid; + for (int j=0,k=seq.getSequence().length(); j0 + || !q.getDictionary().equals("info:iubmb.org/aminoacid")) + return false; + return valid_aadictionary_string(q.getSequence()); + } + + // follow references + if (s instanceof AlignmentSequence) { + Object w = (((AlignmentSequence) s).getRefid()); + if (w!=null && w!=s && w instanceof SequenceType) + return is_valid_aa_seq((SequenceType) w) + && valid_aadictionary_string(((AlignmentSequence) s).getSequence()); + } + + return false; + } +} diff --git a/src/org/vamsas/objects/utils/SeqAln.java b/src/org/vamsas/objects/utils/SeqAln.java new file mode 100644 index 0000000..dfe5e5b --- /dev/null +++ b/src/org/vamsas/objects/utils/SeqAln.java @@ -0,0 +1,212 @@ +/* + * Created on 17-May-2005 + * + * TODO To change the template for this generated file go to + * Window - Preferences - Java - Code Style - Code Templates + */ +package org.vamsas.objects.utils; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; +import java.util.regex.Pattern; + +import vamsas.objects.simple.Alignment; +import vamsas.objects.simple.Sequence; +import vamsas.objects.simple.SequenceSet; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to + * Window - Preferences - Java - Code Style - Code Templates + */ +public class SeqAln extends vamsas.objects.simple.Alignment { + + public static Sequence[] ReadClustalFile(InputStream os) throws Exception { + + Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); + String gapchars = ""; + char gapchar='-'; + + int i = 0; + boolean flag = false; + + Vector headers = new Vector(); + Hashtable seqhash = new Hashtable(); + Sequence[] seqs=null; + int noSeqs = 0; + String line; + + try { + BufferedReader ins = new BufferedReader(new InputStreamReader(os)); + while ((line = ins.readLine()) != null) { + if (line.indexOf(" ") != 0) { + java.util.StringTokenizer str = new StringTokenizer(line," "); + String id = ""; + + if (str.hasMoreTokens()) { + id = str.nextToken(); + if (id.equals("CLUSTAL")) { + flag = true; + } else { + if (flag) { + StringBuffer tempseq; + if (seqhash.containsKey(id)) { + tempseq = (StringBuffer)seqhash.get(id); + } else { + tempseq = new StringBuffer(); + seqhash.put(id,tempseq); + } + + if (!(headers.contains(id))) { + headers.addElement(id); + } + + tempseq.append(str.nextToken()); + } + } + } + } + } + + } catch (IOException e) { + throw(new Exception("Exception parsing clustal file ",e)); + } + + if (flag) { + noSeqs = headers.size(); + + //Add sequences to the hash + seqs = new Sequence[headers.size()]; + for (i = 0; i < headers.size(); i++ ) { + if ( seqhash.get(headers.elementAt(i)) != null) { + + Sequence newSeq = new Sequence(headers.elementAt(i).toString(), + seqhash.get(headers.elementAt(i).toString()).toString()); + + seqs[i]=newSeq; + + } else { + throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i))); + } + } + } + return seqs; + } + + public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException { + Sequence[] s = seqAl.getSeqs().getSeqs(); + + java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os)); + + out.write("CLUSTAL\n\n"); + + int max = 0; + int maxid = 0; + + int i = 0; + + while (i < s.length && s[i] != null) { + String tmp = s[i].getId(); + + if (s[i].getSeq().length() > max) { + max = s[i].getSeq().length(); + } + if (tmp.length() > maxid) { + maxid = tmp.length(); + } + i++; + } + + if (maxid < 15) { + maxid = 15; + } + maxid++; + int len = 60; + int nochunks = max / len + 1; + + for (i = 0; i < nochunks; i++) { + int j = 0; + while ( j < s.length && s[j] != null) { + out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" ")); + int start = i*len; + int end = start + len; + + if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) { + out.write(s[j].getSeq().substring(start,end) + "\n"); + } else { + if (start < s[j].getSeq().length()) { + out.write(s[j].getSeq().substring(start) + "\n"); + } + } + j++; + } + out.write("\n"); + + } + } + + + public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception { + Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); + boolean gapsset = false; + char gapchar='-'; + int seqLength = 0; + + + for (int i=0, nseq=seqs.length; i"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment + + // common check for any sequence... + if (gaps!=null && gaps.length()>0) { + if (!gapsset) + gapchar = gaps.charAt(0); + for (int c=0, gc=gaps.length(); c"+seqs[i].getId()+"\n"); + if (width<=0) { + fasta_out.write(seqs[i].getSeq()+"\n"); + } else { + String tempseq = seqs[i].getSeq(); + int j=0, k=tempseq.length(); + while (j=width) { + fasta_out.write(tempseq, j, width); + } else { + fasta_out.write(tempseq, j, d); + } + fasta_out.write("\n"); + j+=width; + } + } + } + fasta_out.flush(); + } + /** + * TODO: introduce a dictionary parameter for qualified sequence symbols + * Reads a sequence set from a stream - will only read prescribed amino acid + * symbols. + * @param os + * @return + * @throws IOException + */ + public static Sequence[] read_SeqFasta(InputStream os) throws IOException { + Vector seqs = new Vector(); + int nseq = 0; + BufferedReader infasta = new BufferedReader(new InputStreamReader(os)); + + /// TODO: decide on where this routine should live... current best guess is vamsas.objects.io + String line; + Sequence seq = null; + Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE); + String sname = "", seqstr=null; + do { + line = infasta.readLine(); + if (line==null || line.startsWith(">")) { + if (seqstr!=null) + seqs.add((Object) new Sequence(sname.substring(1), seqstr)); + sname = line; // remove > + seqstr=""; + } else { + String subseq = Pattern.compile("//s+").matcher(line).replaceAll(""); + seqstr += subseq; + } + } while (line!=null); + nseq = seqs.size(); + if (nseq>0) { + // TODO:POSS: should really return a sequence if there's only one in the file. + Sequence[] seqset = new Sequence[nseq]; + for (int i=0; i