2 * Created on 17-May-2005
3 * Slurped into VamsasClient object set on 12th Jan 2006
5 * TODO To change the template for this generated file go to
6 * Window - Preferences - Java - Code Style - Code Templates
8 package uk.ac.vamsas.objects.utils;
10 import java.io.BufferedReader;
11 import java.io.BufferedWriter;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.io.OutputStream;
16 import java.io.OutputStreamWriter;
17 import java.util.Hashtable;
18 import java.util.Vector;
19 import java.util.regex.Pattern;
21 import uk.ac.vamsas.objects.core.*;
26 * TODO To change the template for this generated type comment go to
27 * Window - Preferences - Java - Code Style - Code Templates
31 public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException {
32 write_Fasta(os, seqs, 80);
35 public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException {
36 write_Fasta(os, seqs, (width80) ? 80 : 0);
39 public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException {
40 int i, nseq = seqs.length;
41 BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
42 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
43 for (i=0; i<nseq; i++) {
44 fasta_out.write(">"+seqs[i].getName()+"\n");
46 fasta_out.write(seqs[i].getSequence()+"\n");
48 // TODO: adapt to SymbolDictionary labelwidths
49 String tempseq = seqs[i].getSequence();
50 int j=0, k=tempseq.length();
54 fasta_out.write(tempseq, j, width);
56 fasta_out.write(tempseq, j, d);
58 fasta_out.write("\n");
66 * TODO: introduce a dictionary parameter for qualified sequence symbols
67 * Reads a sequence set from a stream - will only read prescribed amino acid
73 public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
74 Vector seqs = new Vector();
76 BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
77 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
78 // TODO: decide on return type - SequenceType is a partly complete vamsas Vobject - either for a dataset or alignment sequence
79 // so could go in either!
82 Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
83 String sname = "", seqstr=null;
85 line = infasta.readLine();
86 if (line==null || line.startsWith(">")) {
88 seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0));
89 sname = line; // remove >
92 String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
98 // TODO:POSS: should really return a sequence if there's only one in the file.
99 Sequence[] seqset = new Sequence[nseq];
100 for (int i=0; i<nseq; i++) {
101 seqset[i] = (Sequence) seqs.elementAt(i);
109 public static Hashtable uniquify(SequenceType[] sequences) {
110 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
111 // TODO: do we need this with vamsas sequences ?
112 // Generate a safely named sequence set and a hash to recover the sequence names
113 Hashtable map = new Hashtable();
114 for (int i = 0; i < sequences.length; i++) {
115 String safename = new String("Sequence" + i);
116 map.put(safename, sequences[i].getName());
117 sequences[i].setName(safename);
122 public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {
123 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
124 // TODO: do we need this with vamsas sequences ?
125 // recover unsafe sequence names for a sequence set
126 boolean allfound = true;
127 for (int i = 0; i < sequences.length; i++) {
128 if (map.containsKey(sequences[i].getName())) {
129 String unsafename = (String) map.get(sequences[i].getName());
130 sequences[i].setName(unsafename);