2 * Created on 17-May-2005
4 * TODO To change the template for this generated file go to
5 * Window - Preferences - Java - Code Style - Code Templates
7 package uk.ac.vamsas.objects.utils;
9 import java.io.BufferedOutputStream;
10 import java.io.BufferedReader;
11 import java.io.BufferedWriter;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.util.Hashtable;
16 import java.util.StringTokenizer;
17 import java.util.Vector;
18 import java.util.regex.Pattern;
20 import uk.ac.vamsas.objects.core.*;
25 * TODO To change the template for this generated type comment go to Window -
26 * Preferences - Java - Code Style - Code Templates
28 public class SeqAln extends uk.ac.vamsas.objects.core.Alignment {
30 public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
31 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
32 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
39 Vector headers = new Vector();
40 Hashtable seqhash = new Hashtable();
41 Sequence[] seqs = null;
46 BufferedReader ins = new BufferedReader(new InputStreamReader(os));
47 while ((line = ins.readLine()) != null) {
48 if (line.indexOf(" ") != 0) {
49 java.util.StringTokenizer str = new StringTokenizer(line, " ");
52 if (str.hasMoreTokens()) {
54 if (id.equals("CLUSTAL")) {
59 if (seqhash.containsKey(id)) {
60 tempseq = (StringBuffer) seqhash.get(id);
62 tempseq = new StringBuffer();
63 seqhash.put(id, tempseq);
66 if (!(headers.contains(id))) {
67 headers.addElement(id);
70 tempseq.append(str.nextToken());
77 } catch (IOException e) {
78 throw (new Exception("Exception parsing clustal file ", e));
82 noSeqs = headers.size();
84 // Add sequences to the hash
85 seqs = new Sequence[headers.size()];
86 for (i = 0; i < headers.size(); i++) {
87 if (seqhash.get(headers.elementAt(i)) != null) {
88 // TODO: develop automatic dictionary typing for sequences
89 Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(),
90 seqhash.get(headers.elementAt(i).toString()).toString(),
91 SymbolDictionary.STANDARD_AA,0,0);
96 throw (new Exception("Bizarreness! Can't find sequence for "
97 + headers.elementAt(i)));
104 public static void WriteClustalWAlignment(java.io.OutputStream os,
105 Alignment seqAl) throws IOException {
106 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
107 AlignmentSequence[] s = seqAl.getAlignmentSequence();
109 java.io.BufferedWriter out = new BufferedWriter(
110 new java.io.OutputStreamWriter(os));
112 out.write("CLUSTAL\n\n");
119 while (i < s.length && s[i] != null) {
120 String tmp = s[i].getId();
122 if (s[i].getSequence().length() > max) {
123 max = s[i].getSequence().length();
125 if (tmp.length() > maxid) {
126 maxid = tmp.length();
136 int nochunks = max / len + 1;
138 for (i = 0; i < nochunks; i++) {
140 while (j < s.length && s[j] != null) {
141 out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " "));
143 int end = start + len;
145 if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) {
146 out.write(s[j].getSequence().substring(start, end) + "\n");
148 if (start < s[j].getSequence().length()) {
149 out.write(s[j].getSequence().substring(start) + "\n");
159 * manufacture an alignment/dataset from an array of sequences
165 public static Alignment make_Alignment(Entry origin,
166 Sequence[] seqs) throws Exception {
167 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
168 Alignment al = new Alignment();
169 al.setProvenance(ProvenanceStuff.newProvenance(origin));
171 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
172 boolean gapsset = false;
176 for (int i = 0, nseq = seqs.length; i < nseq; i++) {
177 String seq = seqs[i].getSequence();
178 String gaps = nonGap.matcher(seq).replaceAll("");
179 if (seqLength == 0) {
180 seqLength = seq.length();
181 } else if (seqLength != seq.length())
182 throw (new Exception(i + "th Sequence (>" + seqs[i].getId()
183 + ") is not aligned.\n"));// TODO: move this to assertions part of
186 // common check for any sequence...
187 if (gaps != null && gaps.length() > 0) {
189 gapchar = gaps.charAt(0);
190 for (int c = 0, gc = gaps.length(); c < gc; c++) {
191 if (gapchar != gaps.charAt(c)) {
192 throw (new IOException("Inconsistent gap characters in sequence "
193 + i + ": '" + seq + "'"));
197 AlignmentSequence sq = new AlignmentSequence();
198 // TODO: use as basis of default AlignSequence(Sequence) constructor.
200 sq.setName(seqs[i].getId());
201 sq.setRefid(seqs[i].getVorbaId());
202 sq.setStart(seqs[i].getStart());
203 sq.setEnd(seqs[i].getEnd());
204 al.addAlignmentSequence(sq);
206 al.setGapChar(String.valueOf(gapchar));
210 public static Alignment read_FastaAlignment(InputStream os, Entry entry)
213 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
215 seqs = SeqSet.read_SeqFasta(os);
217 throw (new Exception("Empty alignment stream!\n"));
218 } catch (Exception e) {
219 throw new Exception("Invalid fasta alignment\n", e);
222 return make_Alignment(entry, seqs);
225 public static Alignment read_ClustalAlignment(InputStream os, Entry entry)
229 seqs = SeqAln.ReadClustalFile(os);
231 throw (new Exception("Empty alignment stream!\n"));
232 } catch (Exception e) {
233 throw new Exception("Invalid fasta alignment\n", e);
235 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
236 return make_Alignment(entry, seqs);