From a9e0b2497cae20cee8ec0d8c66d51e0d819b8581 Mon Sep 17 00:00:00 2001 From: jprocter Date: Thu, 14 Dec 2006 18:18:26 +0000 Subject: [PATCH] refactoring org to uk git-svn-id: https://svn.lifesci.dundee.ac.uk/svn/repository/trunk@278 be28352e-c001-0410-b1a7-c7978e42abec --- .../ac/vamsas/objects/utils/AppDataReference.java | 161 +++++ src/uk/ac/vamsas/objects/utils/DocumentStuff.java | 35 ++ src/uk/ac/vamsas/objects/utils/Format.java | 612 ++++++++++++++++++++ .../ac/vamsas/objects/utils/GlyphDictionary.java | 23 + .../ac/vamsas/objects/utils/ProvenanceStuff.java | 46 ++ src/uk/ac/vamsas/objects/utils/Seq.java | 131 +++++ src/uk/ac/vamsas/objects/utils/SeqAln.java | 238 ++++++++ src/uk/ac/vamsas/objects/utils/SeqSet.java | 138 +++++ .../ac/vamsas/objects/utils/SymbolDictionary.java | 19 + .../objects/utils/document/VersionEntries.java | 49 ++ 10 files changed, 1452 insertions(+) create mode 100644 src/uk/ac/vamsas/objects/utils/AppDataReference.java create mode 100644 src/uk/ac/vamsas/objects/utils/DocumentStuff.java create mode 100644 src/uk/ac/vamsas/objects/utils/Format.java create mode 100644 src/uk/ac/vamsas/objects/utils/GlyphDictionary.java create mode 100644 src/uk/ac/vamsas/objects/utils/ProvenanceStuff.java create mode 100644 src/uk/ac/vamsas/objects/utils/Seq.java create mode 100644 src/uk/ac/vamsas/objects/utils/SeqAln.java create mode 100644 src/uk/ac/vamsas/objects/utils/SeqSet.java create mode 100644 src/uk/ac/vamsas/objects/utils/SymbolDictionary.java create mode 100644 src/uk/ac/vamsas/objects/utils/document/VersionEntries.java diff --git a/src/uk/ac/vamsas/objects/utils/AppDataReference.java b/src/uk/ac/vamsas/objects/utils/AppDataReference.java new file mode 100644 index 0000000..d982e1f --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/AppDataReference.java @@ -0,0 +1,161 @@ +/** + * + */ +package uk.ac.vamsas.objects.utils; +import java.util.Vector; + + +import uk.ac.vamsas.client.ClientHandle; +import uk.ac.vamsas.client.UserHandle; +import uk.ac.vamsas.client.simpleclient.VamsasArchive; +import uk.ac.vamsas.client.simpleclient.VamsasArchiveReader; +import uk.ac.vamsas.objects.core.*; +/** + * Form, accessors and validation for ApplicationData references in + * vamsas document. + * TODO: LATER:extend XML Schema to properly validate against the same forms required by this class + * TODO: VAMSAS: URNS for appDatas are supposed to be unique, aren't they ? + */ +public class AppDataReference { + /** + * search interface for collecting particular types of AppDatas in a vamsas document + * @author jimp + * + */ + interface IAppDSearch { + /** + * process the appData Vobject d + * @param d + * @return true if appData should be collected + */ + public boolean process(AppData d); + } + /** + * collect all appData reference strings in a vamsas document + * @param doc + * @return vector of String objects + */ + static public Vector getAppDataReferences(VamsasDocument doc) { + if ((doc!=null) && (doc.getApplicationDataCount()>0)) { + Vector apdrefs = new Vector(); + ApplicationData[] appdatas = doc.getApplicationData(); + for (int q=0; q0) + return apdrefs; + } + return null; + } + /** + * General search through the set of AppData objects for a particular profile of Client and User handle. + * @param doc + * @param test interface implemented by the filter selecting particular AppDatas. + * @param cascade if true only User objects for ApplicationData objects that test.process returned true will be tested. + * @return set of uk.ac.vamsas.objects.core.AppData objects for which test.process returned true + */ + static public Vector searchAppDatas(VamsasDocument doc, IAppDSearch test, boolean cascade) { + if ((doc!=null) && (doc.getApplicationDataCount()>0)) { + Vector apdrefs = new Vector(); + ApplicationData[] appdatas = doc.getApplicationData(); + for (int q=0; q0) + return apdrefs; + } + return null; + } + static public boolean equals(User p, UserHandle u) { + if (p.getFullname().equals(u.getFullName()) + && p.getOrganization().equals(u.getOrganization())) + return true; + return false; + } + /** + * returns true if Name matches in c and p, and Urn's match (or c.getUrn()==null) and Version's match (or c.getVersion()==null) + * @param p + * @param c + * @return match of p on template c. + */ + static public boolean equals(ApplicationData p, ClientHandle c) { + if ( + //((c.getClientUrn()==null) || p.getUrn().equals(c.getClientUrn())) + //&& + (p.getName().equals(c.getClientName())) + && + ((c.getVersion()==null) || (p.getVersion().equals(c.getVersion()))) + ) + return true; + return false; + } + /** + * Searches document appData structure for particular combinations of client and user data + * @param doc the data + * @param user template user data to match against + * @see AppDataReference.equals(uk.ac.vamsas.objects.core.User, uk.ac.vamsas.client.UserHandle) + * @param app + * @see AppDataReference.equals(uk.ac.vamsas.objects.core.ApplicationData, uk.ac.vamsas.client.ClientHandle) + * @return set of matching client app datas for this client and user combination + */ + static public Vector getUserandApplicationsData(VamsasDocument doc, UserHandle user, ClientHandle app) { + if (doc==null) { + return null; + } + final UserHandle u = user; + final ClientHandle c = app; + + IAppDSearch match = new IAppDSearch() { + public boolean process(AppData p) { + if (p instanceof User) { + if (AppDataReference.equals((User) p, u)) + return true; + } else + if (p instanceof ApplicationData) { + if (AppDataReference.equals((ApplicationData) p, c)) + return true; + } + return false; + } + }; + + return searchAppDatas(doc, match, true); // only return AppDatas belonging to appdata app. + } + /** + * safely creates a new appData reference + * @param dest destination document Vobject + * @param entry base application reference to make unique + */ + public static String uniqueAppDataReference(VamsasDocument dest,String base) { + String urn = new String(base); + + for (int i=0, j=dest.getApplicationDataCount(); i + *
  • a % (required) + *
  • a modifier (optional) + *
    + *
    +
    forces display of + for positive numbers + *
    0
    show leading zeroes + *
    -
    align left in the field + *
    space
    prepend a space in front of positive numbers + *
    #
    use "alternate" format. Add 0 or 0x for octal or hexadecimal numbers. Don't suppress trailing zeroes in general floating point format. + *
    + *
  • an integer denoting field width (optional) + *
  • a period followed by an integer denoting precision (optional) + *
  • a format descriptor (required) + *
    + *
    f
    floating point number in fixed format + *
    e, E
    floating point number in exponential notation (scientific format). The E format results in an uppercase E for the exponent (1.14130E+003), the e format in a lowercase e. + *
    g, G
    floating point number in general format (fixed format for small numbers, exponential format for large numbers). Trailing zeroes are suppressed. The G format results in an uppercase E for the exponent (if any), the g format in a lowercase e. + *
    d, i
    integer in decimal + *
    x
    integer in hexadecimal + *
    o
    integer in octal + *
    s
    string + *
    c
    character + *
    + * + * @exception IllegalArgumentException if bad format + */ + + public Format(String s) { + width = 0; + precision = -1; + pre = ""; + post = ""; + leading_zeroes = false; + show_plus = false; + alternate = false; + show_space = false; + left_align = false; + fmt = ' '; + + int state = 0; + int length = s.length(); + int parse_state = 0; + // 0 = prefix, 1 = flags, 2 = width, 3 = precision, + // 4 = format, 5 = end + int i = 0; + + while (parse_state == 0) { + if (i >= length) + parse_state = 5; + else if (s.charAt(i) == '%') { + if (i < length - 1) { + if (s.charAt(i + 1) == '%') { + pre = pre + '%'; + i++; + } else + parse_state = 1; + } else + throw new java.lang.IllegalArgumentException(); + } else + pre = pre + s.charAt(i); + i++; + } + while (parse_state == 1) { + if (i >= length) + parse_state = 5; + else if (s.charAt(i) == ' ') + show_space = true; + else if (s.charAt(i) == '-') + left_align = true; + else if (s.charAt(i) == '+') + show_plus = true; + else if (s.charAt(i) == '0') + leading_zeroes = true; + else if (s.charAt(i) == '#') + alternate = true; + else { + parse_state = 2; + i--; + } + i++; + } + while (parse_state == 2) { + if (i >= length) + parse_state = 5; + else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') { + width = width * 10 + s.charAt(i) - '0'; + i++; + } else if (s.charAt(i) == '.') { + parse_state = 3; + precision = 0; + i++; + } else + parse_state = 4; + } + while (parse_state == 3) { + if (i >= length) + parse_state = 5; + else if ('0' <= s.charAt(i) && s.charAt(i) <= '9') { + precision = precision * 10 + s.charAt(i) - '0'; + i++; + } else + parse_state = 4; + } + if (parse_state == 4) { + if (i >= length) + parse_state = 5; + else + fmt = s.charAt(i); + i++; + } + if (i < length) + post = s.substring(i, length); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the double to print + */ + + public static void print(java.io.PrintStream s, String fmt, double x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the long to print + */ + public static void print(java.io.PrintStream s, String fmt, long x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream + * @param fmt the format string + * @param x the character to + */ + + public static void print(java.io.PrintStream s, String fmt, char x) { + s.print(new Format(fmt).form(x)); + } + + /** + * prints a formatted number following printf conventions + * @param s a PrintStream, fmt the format string + * @param x a string that represents the digits to print + */ + + public static void print(java.io.PrintStream s, String fmt, String x) { + s.print(new Format(fmt).form(x)); + } + + /** + * Converts a string of digits (decimal, octal or hex) to an integer + * @param s a string + * @return the numeric value of the prefix of s representing a base 10 integer + */ + + public static int atoi(String s) { + return (int)atol(s); + } + + /** + * Converts a string of digits (decimal, octal or hex) to a long integer + * @param s a string + * @return the numeric value of the prefix of s representing a base 10 integer + */ + + public static long atol(String s) { + int i = 0; + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '0') { + if (i + 1 < s.length() && (s.charAt(i + 1) == 'x' || s.charAt(i + 1) == 'X')) + return parseLong(s.substring(i + 2), 16); + else + return parseLong(s, 8); + } else + return parseLong(s, 10); + } + + private static long parseLong(String s, int base) { + int i = 0; + int sign = 1; + long r = 0; + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '-') { + sign = -1; + i++; + } else if (i < s.length() && s.charAt(i) == '+') { + i++; + } + while (i < s.length()) { + char ch = s.charAt(i); + if ('0' <= ch && ch < '0' + base) + r = r * base + ch - '0'; + else if ('A' <= ch && ch < 'A' + base - 10) + r = r * base + ch - 'A' + 10 ; + else if ('a' <= ch && ch < 'a' + base - 10) + r = r * base + ch - 'a' + 10 ; + else + return r * sign; + i++; + } + return r * sign; + } + + /** + * Converts a string of digits to an double + * @param s a string + */ + + public static double atof(String s) { + int i = 0; + int sign = 1; + double r = 0; // integer part + double f = 0; // fractional part + double p = 1; // exponent of fractional part + int state = 0; // 0 = int part, 1 = frac part + + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + i++; + if (i < s.length() && s.charAt(i) == '-') { + sign = -1; + i++; + } else if (i < s.length() && s.charAt(i) == '+') { + i++; + } + while (i < s.length()) { + char ch = s.charAt(i); + if ('0' <= ch && ch <= '9') { + if (state == 0) + r = r * 10 + ch - '0'; + else if (state == 1) { + p = p / 10; + r = r + p * (ch - '0'); + } + } else if (ch == '.') { + if (state == 0) + state = 1; + else + return sign * r; + } else if (ch == 'e' || ch == 'E') { + long e = (int)parseLong(s.substring(i + 1), 10); + return sign * r * Math.pow(10, e); + } else + return sign * r; + i++; + } + return sign * r; + } + + /** + * Formats a double into a string (like sprintf in C) + * @param x the number to format + * @return the formatted string + * @exception IllegalArgumentException if bad argument + */ + + public String form(double x) { + String r; + if (precision < 0) + precision = 6; + int s = 1; + if (x < 0) { + x = -x; + s = -1; + } + if (fmt == 'f') + r = fixed_format(x); + else if (fmt == 'e' || fmt == 'E' || fmt == 'g' || fmt == 'G') + r = exp_format(x); + else + throw new java.lang.IllegalArgumentException(); + + return pad(sign(s, r)); + } + + /** + * Formats a long integer into a string (like sprintf in C) + * @param x the number to format + * @return the formatted string + */ + + public String form(long x) { + String r; + int s = 0; + if (fmt == 'd' || fmt == 'i') { + if (x < 0) { + r = ("" + x).substring(1); + s = -1; + } else { + r = "" + x; + s = 1; + } + } else if (fmt == 'o') + r = convert(x, 3, 7, "01234567"); + else if (fmt == 'x') + r = convert(x, 4, 15, "0123456789abcdef"); + else if (fmt == 'X') + r = convert(x, 4, 15, "0123456789ABCDEF"); + else + throw new java.lang.IllegalArgumentException(); + + return pad(sign(s, r)); + } + + /** + * Formats a character into a string (like sprintf in C) + * @param x the value to format + * @return the formatted string + */ + + public String form(char c) { + if (fmt != 'c') + throw new java.lang.IllegalArgumentException(); + + String r = "" + c; + return pad(r); + } + + /** + * Formats a string into a larger string (like sprintf in C) + * @param x the value to format + * @return the formatted string + */ + + public String form(String s) { + if (fmt != 's') + throw new java.lang.IllegalArgumentException(); + if (precision >= 0) + s = s.substring(0, precision); + return pad(s); + } + + + /** + * a test stub for the format class + */ + + public static void main(String[] a) { + double x = 1.23456789012; + double y = 123; + double z = 1.2345e30; + double w = 1.02; + double u = 1.234e-5; + int d = 0xCAFE; + Format.print(System.out, "x = |%f|\n", x); + Format.print(System.out, "u = |%20f|\n", u); + Format.print(System.out, "x = |% .5f|\n", x); + Format.print(System.out, "w = |%20.5f|\n", w); + Format.print(System.out, "x = |%020.5f|\n", x); + Format.print(System.out, "x = |%+20.5f|\n", x); + Format.print(System.out, "x = |%+020.5f|\n", x); + Format.print(System.out, "x = |% 020.5f|\n", x); + Format.print(System.out, "y = |%#+20.5f|\n", y); + Format.print(System.out, "y = |%-+20.5f|\n", y); + Format.print(System.out, "z = |%20.5f|\n", z); + + Format.print(System.out, "x = |%e|\n", x); + Format.print(System.out, "u = |%20e|\n", u); + Format.print(System.out, "x = |% .5e|\n", x); + Format.print(System.out, "w = |%20.5e|\n", w); + Format.print(System.out, "x = |%020.5e|\n", x); + Format.print(System.out, "x = |%+20.5e|\n", x); + Format.print(System.out, "x = |%+020.5e|\n", x); + Format.print(System.out, "x = |% 020.5e|\n", x); + Format.print(System.out, "y = |%#+20.5e|\n", y); + Format.print(System.out, "y = |%-+20.5e|\n", y); + + Format.print(System.out, "x = |%g|\n", x); + Format.print(System.out, "z = |%g|\n", z); + Format.print(System.out, "w = |%g|\n", w); + Format.print(System.out, "u = |%g|\n", u); + Format.print(System.out, "y = |%.2g|\n", y); + Format.print(System.out, "y = |%#.2g|\n", y); + + Format.print(System.out, "d = |%d|\n", d); + Format.print(System.out, "d = |%20d|\n", d); + Format.print(System.out, "d = |%020d|\n", d); + Format.print(System.out, "d = |%+20d|\n", d); + Format.print(System.out, "d = |% 020d|\n", d); + Format.print(System.out, "d = |%-20d|\n", d); + Format.print(System.out, "d = |%20.8d|\n", d); + Format.print(System.out, "d = |%x|\n", d); + Format.print(System.out, "d = |%20X|\n", d); + Format.print(System.out, "d = |%#20x|\n", d); + Format.print(System.out, "d = |%020X|\n", d); + Format.print(System.out, "d = |%20.8x|\n", d); + Format.print(System.out, "d = |%o|\n", d); + Format.print(System.out, "d = |%020o|\n", d); + Format.print(System.out, "d = |%#20o|\n", d); + Format.print(System.out, "d = |%#020o|\n", d); + Format.print(System.out, "d = |%20.12o|\n", d); + + Format.print(System.out, "s = |%-20s|\n", "Hello"); + Format.print(System.out, "s = |%-20c|\n", '!'); + + // regression test to confirm fix of reported bugs + + Format.print(System.out, "|%i|\n", Long.MIN_VALUE); + + Format.print(System.out, "|%6.2e|\n", 0.0); + Format.print(System.out, "|%6.2g|\n", 0.0); + + Format.print(System.out, "|%6.2f|\n", 9.99); + Format.print(System.out, "|%6.2f|\n", 9.999); + + Format.print(System.out, "|%6.0f|\n", 9.999); + } + + private static String repeat(char c, int n) { + if (n <= 0) + return ""; + StringBuffer s = new StringBuffer(n); + for (int i = 0; i < n; i++) + s.append(c); + return s.toString(); + } + + private static String convert(long x, int n, int m, String d) { + if (x == 0) + return "0"; + String r = ""; + while (x != 0) { + r = d.charAt((int)(x & m)) + r; + x = x >>> n; + } + return r; + } + + private String pad(String r) { + String p = repeat(' ', width - r.length()); + if (left_align) + return pre + r + p + post; + else + return pre + p + r + post; + } + + private String sign(int s, String r) { + String p = ""; + if (s < 0) + p = "-"; + else if (s > 0) { + if (show_plus) + p = "+"; + else if (show_space) + p = " "; + } else { + if (fmt == 'o' && alternate && r.length() > 0 && r.charAt(0) != '0') + p = "0"; + else if (fmt == 'x' && alternate) + p = "0x"; + else if (fmt == 'X' && alternate) + p = "0X"; + } + int w = 0; + if (leading_zeroes) + w = width; + else if ((fmt == 'd' || fmt == 'i' || fmt == 'x' || fmt == 'X' || fmt == 'o') + && precision > 0) + w = precision; + + return p + repeat('0', w - p.length() - r.length()) + r; + } + + private String fixed_format(double d) { + boolean removeTrailing + = (fmt == 'G' || fmt == 'g') && !alternate; + // remove trailing zeroes and decimal point + + if (d > 0x7FFFFFFFFFFFFFFFL) + return exp_format(d); + if (precision == 0) + return (long)(d + 0.5) + (removeTrailing ? "" : "."); + + long whole = (long)d; + double fr = d - whole; // fractional part + if (fr >= 1 || fr < 0) + return exp_format(d); + + double factor = 1; + String leading_zeroes = ""; + for (int i = 1; i <= precision && factor <= 0x7FFFFFFFFFFFFFFFL; i++) { + factor *= 10; + leading_zeroes = leading_zeroes + "0"; + } + long l = (long) (factor * fr + 0.5); + if (l >= factor) { + l = 0; + whole++; + } // CSH 10-25-97 + + String z = leading_zeroes + l; + z = "." + z.substring(z.length() - precision, z.length()); + + if (removeTrailing) { + int t = z.length() - 1; + while (t >= 0 && z.charAt(t) == '0') + t--; + if (t >= 0 && z.charAt(t) == '.') + t--; + z = z.substring(0, t + 1); + } + + return whole + z; + } + + private String exp_format(double d) { + String f = ""; + int e = 0; + double dd = d; + double factor = 1; + if (d != 0) { + while (dd > 10) { + e++; + factor /= 10; + dd = dd / 10; + } + while (dd < 1) { + e--; + factor *= 10; + dd = dd * 10; + } + } + if ((fmt == 'g' || fmt == 'G') && e >= -4 && e < precision) + return fixed_format(d); + + d = d * factor; + f = f + fixed_format(d); + + if (fmt == 'e' || fmt == 'g') + f = f + "e"; + else + f = f + "E"; + + String p = "000"; + if (e >= 0) { + f = f + "+"; + p = p + e; + } else { + f = f + "-"; + p = p + (-e); + } + + return f + p.substring(p.length() - 3, p.length()); + } + + private int width; + private int precision; + private String pre; + private String post; + private boolean leading_zeroes; + private boolean show_plus; + private boolean alternate; + private boolean show_space; + private boolean left_align; + private char fmt; // one of cdeEfgGiosxXos +} + + + + + diff --git a/src/uk/ac/vamsas/objects/utils/GlyphDictionary.java b/src/uk/ac/vamsas/objects/utils/GlyphDictionary.java new file mode 100644 index 0000000..a49f4be --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/GlyphDictionary.java @@ -0,0 +1,23 @@ +package uk.ac.vamsas.objects.utils; + +/** + * dict attribute values for glyph symbol sets found in uk.ac.vamsas.objects.core.AnnotationElement + * TODO: add validators and multilength symbols. + * @author JimP + * + */ +public class GlyphDictionary { + /** + * standard H, E, or C three state secondary structure assignment. + */ + static final public String PROTEIN_SS_3STATE="aasecstr_3"; // HE, blank or C + /** + * default glyph type attribute indicates a UTF8 character + */ + static final public String DEFAULT="utf8"; + /** + * kyte and doolittle hydrophobicity + * TODO: specify this glyph set. + */ + static final public String PROTEIN_HD_HYDRO="kd_hydrophobicity"; +} diff --git a/src/uk/ac/vamsas/objects/utils/ProvenanceStuff.java b/src/uk/ac/vamsas/objects/utils/ProvenanceStuff.java new file mode 100644 index 0000000..f9cebb4 --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/ProvenanceStuff.java @@ -0,0 +1,46 @@ +package uk.ac.vamsas.objects.utils; + +import java.util.Date; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import uk.ac.vamsas.objects.core.Entry; +import uk.ac.vamsas.objects.core.Provenance; + +public class ProvenanceStuff { + + /** + * stuff for making and doing things with provenance objects. + */ + static Log log = LogFactory.getLog(ProvenanceStuff.class); + + /** + * @param app TODO + * @param action + * text for action entry + * @return new Provenance entry for ArchiveWriter created docs. + * TODO: Verify and move to SimpleClient class for provenance handling + */ + public static Entry newProvenanceEntry(String app, String user, String action) { + log.debug("Adding ProvenanceEntry("+user+","+action+")"); + Entry e = new Entry(); + e.setApp(app); + e.setAction(action); + e.setUser(user); + e.setDate(new org.exolab.castor.types.Date(new Date())); + return e; + } + public static Provenance newProvenance(Entry entry) { + Provenance list = new Provenance(); + list.addEntry(entry); + return list; + } + public static Provenance newProvenance(String user, String action) { + return newProvenance(ProvenanceStuff.newProvenanceEntry("vamsasApp:ExampleVamsasClient/alpha", user, action)); + } + public static Provenance newProvenance(String app, String user, String action) { + return newProvenance(ProvenanceStuff.newProvenanceEntry(app, user, action)); + } + +} diff --git a/src/uk/ac/vamsas/objects/utils/Seq.java b/src/uk/ac/vamsas/objects/utils/Seq.java new file mode 100644 index 0000000..8ec3f9c --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/Seq.java @@ -0,0 +1,131 @@ +/* + * Created on 17-May-2005 + * + * TODO To change the template for this generated file go to + * Window - Preferences - Java - Code Style - Code Templates + */ +package uk.ac.vamsas.objects.utils; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.regex.Pattern; + +import uk.ac.vamsas.objects.core.AlignmentSequence; +import uk.ac.vamsas.objects.core.Sequence; +import uk.ac.vamsas.objects.core.SequenceType; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to + * Window - Preferences - Java - Code Style - Code Templates + */ +public class Seq { + + public static void write_PirSeq(OutputStream os, SequenceType seq, int wid) throws IOException { + BufferedWriter pir_out = new BufferedWriter(new OutputStreamWriter(os)); + pir_out.write(">P1;"+seq.getName()+"\n"); + int width = (wid<1) ? 80 : wid; + for (int j=0,k=seq.getSequence().length(); j"+seq.getName()+"\n"); + fasta_out.write(seq.getSequence()+"\n"); + fasta_out.flush(); + } + + public static void write_FastaSeq(OutputStream os, SequenceType seq, int wid) throws IOException { + BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); + fasta_out.write(">"+seq.getName()+"\n"); + int width = (wid<1) ? 80 : wid; + for (int j=0,k=seq.getSequence().length(); jend on Sequence Vobject + if ((start-end)!=Sequence.length()) + seq.setEnd(end+Sequence.length()); + } + return seq; + } + public static AlignmentSequence newAlignmentSequence(String name, String alSequence, Sequence refseq, int start, int end) { + if (refseq!=null) { + AlignmentSequence asq = new AlignmentSequence(); + asq.setName(name); + asq.setSequence(alSequence); + asq.setRefid(refseq); + if (startrefseq.getEnd()) + end = refseq.getEnd(); + asq.setEnd(end); + return asq; + } + return null; + } + public static boolean is_valid_aa_seq(SequenceType s) { + Sequence q; + boolean validref=false; + if (s instanceof Sequence) { + q=(Sequence) s; + if (q.getDictionary()!=null + + && q.getDictionary().length()>0 + || !q.getDictionary().equals(SymbolDictionary.STANDARD_AA)) + return false; + return valid_aadictionary_string(q.getSequence(), SymbolDictionary.STANDARD_AA); + } + + // follow references + if (s instanceof AlignmentSequence) { + Object w = (((AlignmentSequence) s).getRefid()); + if (w!=null && w!=s && w instanceof SequenceType) + return is_valid_aa_seq((SequenceType) w) + && valid_aadictionary_string(((AlignmentSequence) s).getSequence(), SymbolDictionary.STANDARD_AA); + } + + return false; + } +} diff --git a/src/uk/ac/vamsas/objects/utils/SeqAln.java b/src/uk/ac/vamsas/objects/utils/SeqAln.java new file mode 100644 index 0000000..e9f49bd --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/SeqAln.java @@ -0,0 +1,238 @@ +/* + * Created on 17-May-2005 + * + * TODO To change the template for this generated file go to + * Window - Preferences - Java - Code Style - Code Templates + */ +package uk.ac.vamsas.objects.utils; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; +import java.util.regex.Pattern; + +import uk.ac.vamsas.objects.core.*; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to Window - + * Preferences - Java - Code Style - Code Templates + */ +public class SeqAln extends uk.ac.vamsas.objects.core.Alignment { + + public static Sequence[] ReadClustalFile(InputStream os) throws Exception { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); + String gapchars = ""; + char gapchar = '-'; + + int i = 0; + boolean flag = false; + + Vector headers = new Vector(); + Hashtable seqhash = new Hashtable(); + Sequence[] seqs = null; + int noSeqs = 0; + String line; + + try { + BufferedReader ins = new BufferedReader(new InputStreamReader(os)); + while ((line = ins.readLine()) != null) { + if (line.indexOf(" ") != 0) { + java.util.StringTokenizer str = new StringTokenizer(line, " "); + String id = ""; + + if (str.hasMoreTokens()) { + id = str.nextToken(); + if (id.equals("CLUSTAL")) { + flag = true; + } else { + if (flag) { + StringBuffer tempseq; + if (seqhash.containsKey(id)) { + tempseq = (StringBuffer) seqhash.get(id); + } else { + tempseq = new StringBuffer(); + seqhash.put(id, tempseq); + } + + if (!(headers.contains(id))) { + headers.addElement(id); + } + + tempseq.append(str.nextToken()); + } + } + } + } + } + + } catch (IOException e) { + throw (new Exception("Exception parsing clustal file ", e)); + } + + if (flag) { + noSeqs = headers.size(); + + // Add sequences to the hash + seqs = new Sequence[headers.size()]; + for (i = 0; i < headers.size(); i++) { + if (seqhash.get(headers.elementAt(i)) != null) { + // TODO: develop automatic dictionary typing for sequences + Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(), + seqhash.get(headers.elementAt(i).toString()).toString(), + SymbolDictionary.STANDARD_AA,0,0); + + seqs[i] = newSeq; + + } else { + throw (new Exception("Bizarreness! Can't find sequence for " + + headers.elementAt(i))); + } + } + } + return seqs; + } + + public static void WriteClustalWAlignment(java.io.OutputStream os, + Alignment seqAl) throws IOException { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + AlignmentSequence[] s = seqAl.getAlignmentSequence(); + + java.io.BufferedWriter out = new BufferedWriter( + new java.io.OutputStreamWriter(os)); + + out.write("CLUSTAL\n\n"); + + int max = 0; + int maxid = 0; + + int i = 0; + + while (i < s.length && s[i] != null) { + String tmp = s[i].getId(); + + if (s[i].getSequence().length() > max) { + max = s[i].getSequence().length(); + } + if (tmp.length() > maxid) { + maxid = tmp.length(); + } + i++; + } + + if (maxid < 15) { + maxid = 15; + } + maxid++; + int len = 60; + int nochunks = max / len + 1; + + for (i = 0; i < nochunks; i++) { + int j = 0; + while (j < s.length && s[j] != null) { + out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " ")); + int start = i * len; + int end = start + len; + + if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) { + out.write(s[j].getSequence().substring(start, end) + "\n"); + } else { + if (start < s[j].getSequence().length()) { + out.write(s[j].getSequence().substring(start) + "\n"); + } + } + j++; + } + out.write("\n"); + + } + } + /** + * manufacture an alignment/dataset from an array of sequences + * @param origin + * @param seqs + * @return + * @throws Exception + */ + public static Alignment make_Alignment(Entry origin, + Sequence[] seqs) throws Exception { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + Alignment al = new Alignment(); + al.setProvenance(ProvenanceStuff.newProvenance(origin)); + + Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); + boolean gapsset = false; + char gapchar = '-'; + int seqLength = 0; + + for (int i = 0, nseq = seqs.length; i < nseq; i++) { + String seq = seqs[i].getSequence(); + String gaps = nonGap.matcher(seq).replaceAll(""); + if (seqLength == 0) { + seqLength = seq.length(); + } else if (seqLength != seq.length()) + throw (new Exception(i + "th Sequence (>" + seqs[i].getId() + + ") is not aligned.\n"));// TODO: move this to assertions part of + // Alignment + + // common check for any sequence... + if (gaps != null && gaps.length() > 0) { + if (!gapsset) + gapchar = gaps.charAt(0); + for (int c = 0, gc = gaps.length(); c < gc; c++) { + if (gapchar != gaps.charAt(c)) { + throw (new IOException("Inconsistent gap characters in sequence " + + i + ": '" + seq + "'")); + } + } + } + AlignmentSequence sq = new AlignmentSequence(); + // TODO: use as basis of default AlignSequence(Sequence) constructor. + sq.setSequence(seq); + sq.setName(seqs[i].getId()); + sq.setRefid(seqs[i].getVorbaId()); + sq.setStart(seqs[i].getStart()); + sq.setEnd(seqs[i].getEnd()); + al.addAlignmentSequence(sq); + } + al.setGapChar(String.valueOf(gapchar)); + return al; + } + + public static Alignment read_FastaAlignment(InputStream os, Entry entry) + throws Exception { + Sequence[] seqs; + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + try { + seqs = SeqSet.read_SeqFasta(os); + if (seqs == null) + throw (new Exception("Empty alignment stream!\n")); + } catch (Exception e) { + throw new Exception("Invalid fasta alignment\n", e); + } + + return make_Alignment(entry, seqs); + } + + public static Alignment read_ClustalAlignment(InputStream os, Entry entry) + throws Exception { + Sequence[] seqs; + try { + seqs = SeqAln.ReadClustalFile(os); + if (seqs == null) + throw (new Exception("Empty alignment stream!\n")); + } catch (Exception e) { + throw new Exception("Invalid fasta alignment\n", e); + } + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + return make_Alignment(entry, seqs); + } +} diff --git a/src/uk/ac/vamsas/objects/utils/SeqSet.java b/src/uk/ac/vamsas/objects/utils/SeqSet.java new file mode 100644 index 0000000..e75bd54 --- /dev/null +++ b/src/uk/ac/vamsas/objects/utils/SeqSet.java @@ -0,0 +1,138 @@ +/* + * Created on 17-May-2005 + * Slurped into VamsasClient object set on 12th Jan 2006 + * + * TODO To change the template for this generated file go to + * Window - Preferences - Java - Code Style - Code Templates + */ +package uk.ac.vamsas.objects.utils; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.Hashtable; +import java.util.Vector; +import java.util.regex.Pattern; + +import uk.ac.vamsas.objects.core.*; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to + * Window - Preferences - Java - Code Style - Code Templates + */ +public class SeqSet { + + public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException { + write_Fasta(os, seqs, 80); + } + + public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException { + write_Fasta(os, seqs, (width80) ? 80 : 0); + } + + public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException { + int i, nseq = seqs.length; + BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + for (i=0; i"+seqs[i].getName()+"\n"); + if (width<=0) { + fasta_out.write(seqs[i].getSequence()+"\n"); + } else { + // TODO: adapt to SymbolDictionary labelwidths + String tempseq = seqs[i].getSequence(); + int j=0, k=tempseq.length(); + while (j=width) { + fasta_out.write(tempseq, j, width); + } else { + fasta_out.write(tempseq, j, d); + } + fasta_out.write("\n"); + j+=width; + } + } + } + fasta_out.flush(); + } + /** + * TODO: introduce a dictionary parameter for qualified sequence symbols + * Reads a sequence set from a stream - will only read prescribed amino acid + * symbols. + * @param os + * @return + * @throws IOException + */ + public static Sequence[] read_SeqFasta(InputStream os) throws IOException { + Vector seqs = new Vector(); + int nseq = 0; + BufferedReader infasta = new BufferedReader(new InputStreamReader(os)); + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + // TODO: decide on return type - SequenceType is a partly complete vamsas Vobject - either for a dataset or alignment sequence + // so could go in either! + String line; + Sequence seq = null; + Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE); + String sname = "", seqstr=null; + do { + line = infasta.readLine(); + if (line==null || line.startsWith(">")) { + if (seqstr!=null) + seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0)); + sname = line; // remove > + seqstr=""; + } else { + String subseq = Pattern.compile("//s+").matcher(line).replaceAll(""); + seqstr += subseq; + } + } while (line!=null); + nseq = seqs.size(); + if (nseq>0) { + // TODO:POSS: should really return a sequence if there's only one in the file. + Sequence[] seqset = new Sequence[nseq]; + for (int i=0; i