From: jprocter Date: Fri, 13 Jan 2006 17:36:29 +0000 (+0000) Subject: refactored and incorporated basic io routines from vamsas web services stack. X-Git-Tag: Release_0.2~396 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=4fc6d93d0c1a33ab5e0765b87d9263baa1798318;p=vamsas.git refactored and incorporated basic io routines from vamsas web services stack. git-svn-id: https://svn.lifesci.dundee.ac.uk/svn/repository/trunk@124 be28352e-c001-0410-b1a7-c7978e42abec --- diff --git a/src/org/vamsas/objects/utils/ProvenanceStuff.java b/src/org/vamsas/objects/utils/ProvenanceStuff.java new file mode 100644 index 0000000..f5eb18c --- /dev/null +++ b/src/org/vamsas/objects/utils/ProvenanceStuff.java @@ -0,0 +1,40 @@ +package org.vamsas.objects.utils; + +import java.util.Date; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.vamsas.objects.core.Entry; +import org.vamsas.objects.core.Provenance; + +public class ProvenanceStuff { + + /** + * stuff for making and doing things with provenance objects. + */ + static Log log = LogFactory.getLog("org.vamsas.objects.utils.ProvenanceStuff"); + + /** + * @param action + * text for action entry + * @return new Provenance entry for ArchiveWriter created docs. + * TODO: Verify and move to SimpleClient class for provenance handling + */ + public static Entry newProvenanceEntry(String user, String action) { + log.debug("Adding ProvenanceEntry("+user+","+action+")"); + Entry e = new Entry(); + e.setAction(action); + e.setUser(user); + e.setDate(new org.exolab.castor.types.Date(new Date())); + return e; + } + public static Provenance newProvenance(Entry entry) { + Provenance list = new Provenance(); + list.addEntry(entry); + return list; + } + public static Provenance newProvenance(String user, String action) { + return newProvenance(ProvenanceStuff.newProvenanceEntry(user, action)); + } + +} diff --git a/src/org/vamsas/objects/utils/SeqAln.java b/src/org/vamsas/objects/utils/SeqAln.java index dfe5e5b..5e351f9 100644 --- a/src/org/vamsas/objects/utils/SeqAln.java +++ b/src/org/vamsas/objects/utils/SeqAln.java @@ -17,196 +17,222 @@ import java.util.StringTokenizer; import java.util.Vector; import java.util.regex.Pattern; -import vamsas.objects.simple.Alignment; -import vamsas.objects.simple.Sequence; -import vamsas.objects.simple.SequenceSet; +import org.vamsas.objects.core.*; /** * @author jimp - * - * TODO To change the template for this generated type comment go to - * Window - Preferences - Java - Code Style - Code Templates + * + * TODO To change the template for this generated type comment go to Window - + * Preferences - Java - Code Style - Code Templates */ -public class SeqAln extends vamsas.objects.simple.Alignment { - - public static Sequence[] ReadClustalFile(InputStream os) throws Exception { - - Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); - String gapchars = ""; - char gapchar='-'; - - int i = 0; - boolean flag = false; - - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); - Sequence[] seqs=null; - int noSeqs = 0; - String line; - - try { - BufferedReader ins = new BufferedReader(new InputStreamReader(os)); - while ((line = ins.readLine()) != null) { - if (line.indexOf(" ") != 0) { - java.util.StringTokenizer str = new StringTokenizer(line," "); - String id = ""; - - if (str.hasMoreTokens()) { - id = str.nextToken(); - if (id.equals("CLUSTAL")) { - flag = true; - } else { - if (flag) { - StringBuffer tempseq; - if (seqhash.containsKey(id)) { - tempseq = (StringBuffer)seqhash.get(id); - } else { - tempseq = new StringBuffer(); - seqhash.put(id,tempseq); - } - - if (!(headers.contains(id))) { - headers.addElement(id); - } - - tempseq.append(str.nextToken()); - } - } - } - } - } - - } catch (IOException e) { - throw(new Exception("Exception parsing clustal file ",e)); - } - - if (flag) { - noSeqs = headers.size(); - - //Add sequences to the hash - seqs = new Sequence[headers.size()]; - for (i = 0; i < headers.size(); i++ ) { - if ( seqhash.get(headers.elementAt(i)) != null) { - - Sequence newSeq = new Sequence(headers.elementAt(i).toString(), - seqhash.get(headers.elementAt(i).toString()).toString()); - - seqs[i]=newSeq; - - } else { - throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i))); - } - } - } - return seqs; - } - - public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException { - Sequence[] s = seqAl.getSeqs().getSeqs(); - - java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os)); - - out.write("CLUSTAL\n\n"); - - int max = 0; - int maxid = 0; - - int i = 0; - - while (i < s.length && s[i] != null) { - String tmp = s[i].getId(); - - if (s[i].getSeq().length() > max) { - max = s[i].getSeq().length(); - } - if (tmp.length() > maxid) { - maxid = tmp.length(); - } - i++; - } - - if (maxid < 15) { - maxid = 15; - } - maxid++; - int len = 60; - int nochunks = max / len + 1; - - for (i = 0; i < nochunks; i++) { - int j = 0; - while ( j < s.length && s[j] != null) { - out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" ")); - int start = i*len; - int end = start + len; - - if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) { - out.write(s[j].getSeq().substring(start,end) + "\n"); - } else { - if (start < s[j].getSeq().length()) { - out.write(s[j].getSeq().substring(start) + "\n"); - } - } - j++; - } - out.write("\n"); - - } - } - - - public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception { - Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); - boolean gapsset = false; - char gapchar='-'; - int seqLength = 0; - - - for (int i=0, nseq=seqs.length; i"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment - - // common check for any sequence... - if (gaps!=null && gaps.length()>0) { - if (!gapsset) - gapchar = gaps.charAt(0); - for (int c=0, gc=gaps.length(); c max) { + max = s[i].getSequence().length(); + } + if (tmp.length() > maxid) { + maxid = tmp.length(); + } + i++; + } + + if (maxid < 15) { + maxid = 15; + } + maxid++; + int len = 60; + int nochunks = max / len + 1; + + for (i = 0; i < nochunks; i++) { + int j = 0; + while (j < s.length && s[j] != null) { + out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " ")); + int start = i * len; + int end = start + len; + + if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) { + out.write(s[j].getSequence().substring(start, end) + "\n"); + } else { + if (start < s[j].getSequence().length()) { + out.write(s[j].getSequence().substring(start) + "\n"); + } + } + j++; + } + out.write("\n"); + + } + } + /** + * manufacture an alignment/dataset from an array of sequences + * @param origin + * @param seqs + * @return + * @throws Exception + */ + public static Alignment make_Alignment(Entry origin, + Sequence[] seqs) throws Exception { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + Alignment al = new Alignment(); + al.setProvenance(ProvenanceStuff.newProvenance(origin)); + + Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); + boolean gapsset = false; + char gapchar = '-'; + int seqLength = 0; + + for (int i = 0, nseq = seqs.length; i < nseq; i++) { + String seq = seqs[i].getSequence(); + String gaps = nonGap.matcher(seq).replaceAll(""); + if (seqLength == 0) { + seqLength = seq.length(); + } else if (seqLength != seq.length()) + throw (new Exception(i + "th Sequence (>" + seqs[i].getId() + + ") is not aligned.\n"));// TODO: move this to assertions part of + // Alignment + + // common check for any sequence... + if (gaps != null && gaps.length() > 0) { + if (!gapsset) + gapchar = gaps.charAt(0); + for (int c = 0, gc = gaps.length(); c < gc; c++) { + if (gapchar != gaps.charAt(c)) { + throw (new IOException("Inconsistent gap characters in sequence " + + i + ": '" + seq + "'")); + } + } + } + AlignmentSequence sq = new AlignmentSequence(); + // TODO: use as basis of default AlignSequence(Sequence) constructor. + sq.setSequence(seq); + sq.setName(seqs[i].getId()); + sq.setRefid(seqs[i].getVorbaId()); + sq.setStart(seqs[i].getStart()); + sq.setEnd(seqs[i].getEnd()); + al.addAlignmentSequence(sq); + } + al.setGapChar(String.valueOf(gapchar)); + return al; + } + + public static Alignment read_FastaAlignment(InputStream os, Entry entry) + throws Exception { + Sequence[] seqs; + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + try { + seqs = SeqSet.read_SeqFasta(os); + if (seqs == null) + throw (new Exception("Empty alignment stream!\n")); + } catch (Exception e) { + throw new Exception("Invalid fasta alignment\n", e); + } + + return make_Alignment(entry, seqs); + } + + public static Alignment read_ClustalAlignment(InputStream os, Entry entry) + throws Exception { + Sequence[] seqs; + try { + seqs = SeqAln.ReadClustalFile(os); + if (seqs == null) + throw (new Exception("Empty alignment stream!\n")); + } catch (Exception e) { + throw new Exception("Invalid fasta alignment\n", e); + } + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + return make_Alignment(entry, seqs); + } } diff --git a/src/org/vamsas/objects/utils/SeqSet.java b/src/org/vamsas/objects/utils/SeqSet.java index 294bce6..b39cba6 100644 --- a/src/org/vamsas/objects/utils/SeqSet.java +++ b/src/org/vamsas/objects/utils/SeqSet.java @@ -1,5 +1,6 @@ /* * Created on 17-May-2005 + * Slurped into VamsasClient object set on 12th Jan 2006 * * TODO To change the template for this generated file go to * Window - Preferences - Java - Code Style - Code Templates @@ -17,7 +18,7 @@ import java.util.Hashtable; import java.util.Vector; import java.util.regex.Pattern; -import vamsas.objects.simple.Sequence; +import org.vamsas.objects.core.*; /** * @author jimp @@ -25,25 +26,27 @@ import vamsas.objects.simple.Sequence; * TODO To change the template for this generated type comment go to * Window - Preferences - Java - Code Style - Code Templates */ -public class SeqSet extends vamsas.objects.simple.SequenceSet { +public class SeqSet { - public static void write_Fasta(OutputStream os, Sequence[] seqs) throws IOException { + public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException { write_Fasta(os, seqs, 80); } - public static void write_Fasta(OutputStream os, Sequence[] seqs, boolean width80) throws IOException { + public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException { write_Fasta(os, seqs, (width80) ? 80 : 0); } - public static void write_Fasta(OutputStream os, Sequence[] seqs, int width) throws IOException { + public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException { int i, nseq = seqs.length; BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method for (i=0; i"+seqs[i].getId()+"\n"); + fasta_out.write(">"+seqs[i].getName()+"\n"); if (width<=0) { - fasta_out.write(seqs[i].getSeq()+"\n"); + fasta_out.write(seqs[i].getSequence()+"\n"); } else { - String tempseq = seqs[i].getSeq(); + // TODO: adapt to SymbolDictionary labelwidths + String tempseq = seqs[i].getSequence(); int j=0, k=tempseq.length(); while (j")) { if (seqstr!=null) - seqs.add((Object) new Sequence(sname.substring(1), seqstr)); + seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0)); sname = line; // remove > seqstr=""; } else { @@ -102,24 +106,28 @@ public class SeqSet extends vamsas.objects.simple.SequenceSet { return null; } - public static Hashtable uniquify(Sequence[] sequences) { + public static Hashtable uniquify(SequenceType[] sequences) { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + // TODO: do we need this with vamsas sequences ? // Generate a safely named sequence set and a hash to recover the sequence names Hashtable map = new Hashtable(); for (int i = 0; i < sequences.length; i++) { String safename = new String("Sequence" + i); - map.put(safename, sequences[i].getId()); - sequences[i].setId(safename); + map.put(safename, sequences[i].getName()); + sequences[i].setName(safename); } return map; } - public static boolean deuniquify(Hashtable map, Sequence[] sequences) { + public static boolean deuniquify(Hashtable map, SequenceType[] sequences) { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method + // TODO: do we need this with vamsas sequences ? // recover unsafe sequence names for a sequence set boolean allfound = true; for (int i = 0; i < sequences.length; i++) { - if (map.containsKey(sequences[i].getId())) { - String unsafename = (String) map.get(sequences[i].getId()); - sequences[i].setId(unsafename); + if (map.containsKey(sequences[i].getName())) { + String unsafename = (String) map.get(sequences[i].getName()); + sequences[i].setName(unsafename); } else { allfound = false; } diff --git a/src/org/vamsas/test/objects/Core.java b/src/org/vamsas/test/objects/Core.java index 880c8bd..ca3d90d 100644 --- a/src/org/vamsas/test/objects/Core.java +++ b/src/org/vamsas/test/objects/Core.java @@ -8,6 +8,8 @@ import org.vamsas.objects.core.AlignmentSequence; import org.vamsas.objects.core.DataSet; import org.vamsas.objects.core.Sequence; import org.vamsas.objects.core.VAMSAS; +import org.vamsas.objects.utils.Seq; +import org.vamsas.objects.utils.SymbolDictionary; /** * @author jim @@ -26,26 +28,11 @@ public class Core { q[i].setEnd(q[i].getSequence().length()+i+1); } } - public static Sequence Sequence(String Name, String Sequence, String Dictionary, int start, int end) { - Sequence seq= new Sequence(); - seq.setDictionary(Dictionary); - seq.setName(Name); - seq.setSequence(Sequence); - seq.setStart(start); - if (start<=end) { - if ((end-start)!=Sequence.length()) - seq.setEnd(start+Sequence.length()); - } else { - // reverse topology mapping. TODO: VAMSAS: decide if allowed to do start>end on Sequence object - if ((start-end)!=Sequence.length()) - seq.setEnd(end+Sequence.length()); - } - return seq; - } + public static VAMSAS getDemoVamsas() { VAMSAS v=new VAMSAS(); DataSet ds = new DataSet(); - ds.addSequence(Sequence("Dummy1","ASDFLEQ","info:iubmb.org/aminoacids", 5, 11)); + ds.addSequence(Seq.newSequence("Dummy1","ASDFLEQ",SymbolDictionary.STANDARD_AA, 5, 11)); complete(ds); v.addDataSet(ds); Alignment al = new Alignment(); diff --git a/src/org/vamsas/test/simpleclient/ArchiveWriter.java b/src/org/vamsas/test/simpleclient/ArchiveWriter.java index cfba99b..91b1d9e 100644 --- a/src/org/vamsas/test/simpleclient/ArchiveWriter.java +++ b/src/org/vamsas/test/simpleclient/ArchiveWriter.java @@ -18,6 +18,7 @@ import org.vamsas.objects.core.Entry; import org.vamsas.objects.core.Provenance; import org.vamsas.objects.core.VAMSAS; import org.vamsas.objects.core.VamsasDocument; +import org.vamsas.objects.utils.ProvenanceStuff; public class ArchiveWriter { @@ -27,28 +28,7 @@ public class ArchiveWriter { */ static Log log = LogFactory.getLog("org.vamsas.test.simpleclient.ArchiveWriter"); - - /** - * @param action - * text for action entry - * @return new Provenance entry for ArchiveWriter created docs. - * TODO: Verify and move to SimpleClient class for provenance handling - */ - public static Entry newProvenanceEntry(String user, String action) { - log.debug("Adding ProvenanceEntry("+user+","+action+")"); - Entry e = new Entry(); - e.setAction(action); - e.setUser(user); - e.setDate(new org.exolab.castor.types.Date(new Date())); - return e; - } - - public static Provenance newProvenance() { - Provenance list = new Provenance(); - list.addEntry(newProvenanceEntry("ArchiveWriter", "Created new Vamsas Document")); - return list; - } - + private static void mergeVecs(Object[] destvec, Object[] svec1, Object[] svec2) { int i; for (i=0; i