X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fuk%2Fac%2Fvamsas%2Fobjects%2Futils%2FSeqSet.java;fp=src%2Fuk%2Fac%2Fvamsas%2Fobjects%2Futils%2FSeqSet.java;h=b6f48adc228f9e04cb6568ab5618252d99a6486f;hb=844ccad5a3fcbedec17b2af66d460f31abc7cff1;hp=e75bd54a569a4e47290fff8533f527cc417d15e5;hpb=6f33f705957d674dc2ab6c994a6ea87f7a91f40f;p=vamsas.git diff --git a/src/uk/ac/vamsas/objects/utils/SeqSet.java b/src/uk/ac/vamsas/objects/utils/SeqSet.java index e75bd54..b6f48ad 100644 --- a/src/uk/ac/vamsas/objects/utils/SeqSet.java +++ b/src/uk/ac/vamsas/objects/utils/SeqSet.java @@ -1,138 +1,166 @@ -/* - * Created on 17-May-2005 - * Slurped into VamsasClient object set on 12th Jan 2006 - * - * TODO To change the template for this generated file go to - * Window - Preferences - Java - Code Style - Code Templates - */ -package uk.ac.vamsas.objects.utils; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.util.Hashtable; -import java.util.Vector; -import java.util.regex.Pattern; - -import uk.ac.vamsas.objects.core.*; - -/** - * @author jimp - * - * TODO To change the template for this generated type comment go to - * Window - Preferences - Java - Code Style - Code Templates - */ -public class SeqSet { - - public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException { - write_Fasta(os, seqs, 80); - } - - public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException { - write_Fasta(os, seqs, (width80) ? 80 : 0); - } - - public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException { - int i, nseq = seqs.length; - BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); - System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method - for (i=0; i"+seqs[i].getName()+"\n"); - if (width<=0) { - fasta_out.write(seqs[i].getSequence()+"\n"); - } else { - // TODO: adapt to SymbolDictionary labelwidths - String tempseq = seqs[i].getSequence(); - int j=0, k=tempseq.length(); - while (j=width) { - fasta_out.write(tempseq, j, width); - } else { - fasta_out.write(tempseq, j, d); - } - fasta_out.write("\n"); - j+=width; - } - } - } - fasta_out.flush(); - } - /** - * TODO: introduce a dictionary parameter for qualified sequence symbols - * Reads a sequence set from a stream - will only read prescribed amino acid - * symbols. - * @param os - * @return - * @throws IOException - */ - public static Sequence[] read_SeqFasta(InputStream os) throws IOException { - Vector seqs = new Vector(); - int nseq = 0; - BufferedReader infasta = new BufferedReader(new InputStreamReader(os)); - System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method - // TODO: decide on return type - SequenceType is a partly complete vamsas Vobject - either for a dataset or alignment sequence - // so could go in either! - String line; - Sequence seq = null; - Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE); - String sname = "", seqstr=null; - do { - line = infasta.readLine(); - if (line==null || line.startsWith(">")) { - if (seqstr!=null) - seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0)); - sname = line; // remove > - seqstr=""; - } else { - String subseq = Pattern.compile("//s+").matcher(line).replaceAll(""); - seqstr += subseq; - } - } while (line!=null); - nseq = seqs.size(); - if (nseq>0) { - // TODO:POSS: should really return a sequence if there's only one in the file. - Sequence[] seqset = new Sequence[nseq]; - for (int i=0; i. + */ +package uk.ac.vamsas.objects.utils; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.Hashtable; +import java.util.Vector; +import java.util.regex.Pattern; + +import uk.ac.vamsas.objects.core.*; + +/** + * @author jimp + * + * TODO To change the template for this generated type comment go to + * Window - Preferences - Java - Code Style - Code Templates + */ +public class SeqSet { + + public static void write_Fasta(OutputStream os, SequenceType[] seqs) + throws IOException { + write_Fasta(os, seqs, 80); + } + + public static void write_Fasta(OutputStream os, SequenceType[] seqs, + boolean width80) throws IOException { + write_Fasta(os, seqs, (width80) ? 80 : 0); + } + + public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) + throws IOException { + int i, nseq = seqs.length; + BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this + // method + for (i = 0; i < nseq; i++) { + fasta_out.write(">" + seqs[i].getName() + "\n"); + if (width <= 0) { + fasta_out.write(seqs[i].getSequence() + "\n"); + } else { + // TODO: adapt to SymbolDictionary labelwidths + String tempseq = seqs[i].getSequence(); + int j = 0, k = tempseq.length(); + while (j < k) { + int d = k - j; + if (d >= width) { + fasta_out.write(tempseq, j, width); + } else { + fasta_out.write(tempseq, j, d); + } + fasta_out.write("\n"); + j += width; + } + } + } + fasta_out.flush(); + } + + /** + * TODO: introduce a dictionary parameter for qualified sequence symbols Reads + * a sequence set from a stream - will only read prescribed amino acid + * symbols. + * + * @param os + * @return + * @throws IOException + */ + public static Sequence[] read_SeqFasta(InputStream os) throws IOException { + Vector seqs = new Vector(); + int nseq = 0; + BufferedReader infasta = new BufferedReader(new InputStreamReader(os)); + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this + // method + // TODO: decide on return type - SequenceType is a partly complete vamsas + // Vobject - either for a dataset or alignment sequence + // so could go in either! + String line; + Sequence seq = null; + Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", + Pattern.CASE_INSENSITIVE); + String sname = "", seqstr = null; + do { + line = infasta.readLine(); + if (line == null || line.startsWith(">")) { + if (seqstr != null) + seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, + SymbolDictionary.STANDARD_AA, 0, 0)); + sname = line; // remove > + seqstr = ""; + } else { + String subseq = Pattern.compile("//s+").matcher(line).replaceAll(""); + seqstr += subseq; + } + } while (line != null); + nseq = seqs.size(); + if (nseq > 0) { + // TODO:POSS: should really return a sequence if there's only one in the + // file. + Sequence[] seqset = new Sequence[nseq]; + for (int i = 0; i < nseq; i++) { + seqset[i] = (Sequence) seqs.elementAt(i); + } + return seqset; + } + + return null; + } + + public static Hashtable uniquify(SequenceType[] sequences) { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this + // method + // TODO: do we need this with vamsas sequences ? + // Generate a safely named sequence set and a hash to recover the sequence + // names + Hashtable map = new Hashtable(); + for (int i = 0; i < sequences.length; i++) { + String safename = new String("Sequence" + i); + map.put(safename, sequences[i].getName()); + sequences[i].setName(safename); + } + return map; + } + + public static boolean deuniquify(Hashtable map, SequenceType[] sequences) { + System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this + // method + // TODO: do we need this with vamsas sequences ? + // recover unsafe sequence names for a sequence set + boolean allfound = true; + for (int i = 0; i < sequences.length; i++) { + if (map.containsKey(sequences[i].getName())) { + String unsafename = (String) map.get(sequences[i].getName()); + sequences[i].setName(unsafename); + } else { + allfound = false; + } + } + return allfound; + } + +}