--- /dev/null
+/*
+ * Created on 17-May-2005
+ * Slurped into VamsasClient object set on 12th Jan 2006
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package uk.ac.vamsas.objects.utils;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.Hashtable;
+import java.util.Vector;
+import java.util.regex.Pattern;
+
+import uk.ac.vamsas.objects.core.*;
+
+/**
+ * @author jimp
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class SeqSet {
+
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException {
+ write_Fasta(os, seqs, 80);
+ }
+
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException {
+ write_Fasta(os, seqs, (width80) ? 80 : 0);
+ }
+
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException {
+ int i, nseq = seqs.length;
+ BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ for (i=0; i<nseq; i++) {
+ fasta_out.write(">"+seqs[i].getName()+"\n");
+ if (width<=0) {
+ fasta_out.write(seqs[i].getSequence()+"\n");
+ } else {
+ // TODO: adapt to SymbolDictionary labelwidths
+ String tempseq = seqs[i].getSequence();
+ int j=0, k=tempseq.length();
+ while (j<k) {
+ int d = k-j;
+ if (d>=width) {
+ fasta_out.write(tempseq, j, width);
+ } else {
+ fasta_out.write(tempseq, j, d);
+ }
+ fasta_out.write("\n");
+ j+=width;
+ }
+ }
+ }
+ fasta_out.flush();
+ }
+ /**
+ * TODO: introduce a dictionary parameter for qualified sequence symbols
+ * Reads a sequence set from a stream - will only read prescribed amino acid
+ * symbols.
+ * @param os
+ * @return
+ * @throws IOException
+ */
+ public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
+ Vector seqs = new Vector();
+ int nseq = 0;
+ BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: decide on return type - SequenceType is a partly complete vamsas Vobject - either for a dataset or alignment sequence
+ // so could go in either!
+ String line;
+ Sequence seq = null;
+ Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
+ String sname = "", seqstr=null;
+ do {
+ line = infasta.readLine();
+ if (line==null || line.startsWith(">")) {
+ if (seqstr!=null)
+ seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0));
+ sname = line; // remove >
+ seqstr="";
+ } else {
+ String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
+ seqstr += subseq;
+ }
+ } while (line!=null);
+ nseq = seqs.size();
+ if (nseq>0) {
+ // TODO:POSS: should really return a sequence if there's only one in the file.
+ Sequence[] seqset = new Sequence[nseq];
+ for (int i=0; i<nseq; i++) {
+ seqset[i] = (Sequence) seqs.elementAt(i);
+ }
+ return seqset;
+ }
+
+ return null;
+ }
+
+ public static Hashtable uniquify(SequenceType[] sequences) {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: do we need this with vamsas sequences ?
+ // Generate a safely named sequence set and a hash to recover the sequence names
+ Hashtable map = new Hashtable();
+ for (int i = 0; i < sequences.length; i++) {
+ String safename = new String("Sequence" + i);
+ map.put(safename, sequences[i].getName());
+ sequences[i].setName(safename);
+ }
+ return map;
+ }
+
+ public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: do we need this with vamsas sequences ?
+ // recover unsafe sequence names for a sequence set
+ boolean allfound = true;
+ for (int i = 0; i < sequences.length; i++) {
+ if (map.containsKey(sequences[i].getName())) {
+ String unsafename = (String) map.get(sequences[i].getName());
+ sequences[i].setName(unsafename);
+ } else {
+ allfound = false;
+ }
+ }
+ return allfound;
+ }
+
+}