2 * This file is part of the Vamsas Client version 0.1.
\r
3 * Copyright 2009 by Jim Procter, Iain Milne, Pierre Marguerite,
\r
4 * Andrew Waterhouse and Dominik Lindner.
\r
6 * Earlier versions have also been incorporated into Jalview version 2.4
\r
7 * since 2008, and TOPALi version 2 since 2007.
\r
9 * The Vamsas Client is free software: you can redistribute it and/or modify
\r
10 * it under the terms of the GNU Lesser General Public License as published by
\r
11 * the Free Software Foundation, either version 3 of the License, or
\r
12 * (at your option) any later version.
\r
14 * The Vamsas Client is distributed in the hope that it will be useful,
\r
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
17 * GNU Lesser General Public License for more details.
\r
19 * You should have received a copy of the GNU Lesser General Public License
\r
20 * along with the Vamsas Client. If not, see <http://www.gnu.org/licenses/>.
\r
22 package uk.ac.vamsas.objects.utils;
\r
24 import java.io.BufferedOutputStream;
\r
25 import java.io.BufferedReader;
\r
26 import java.io.BufferedWriter;
\r
27 import java.io.IOException;
\r
28 import java.io.InputStream;
\r
29 import java.io.InputStreamReader;
\r
30 import java.util.Hashtable;
\r
31 import java.util.StringTokenizer;
\r
32 import java.util.Vector;
\r
33 import java.util.regex.Pattern;
\r
35 import uk.ac.vamsas.objects.core.*;
\r
40 * TODO To change the template for this generated type comment go to
\r
41 * Window - Preferences - Java - Code Style - Code Templates
\r
43 public class SeqAln extends uk.ac.vamsas.objects.core.Alignment {
\r
45 public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
\r
46 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
48 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
\r
49 String gapchars = "";
\r
53 boolean flag = false;
\r
55 Vector headers = new Vector();
\r
56 Hashtable seqhash = new Hashtable();
\r
57 Sequence[] seqs = null;
\r
62 BufferedReader ins = new BufferedReader(new InputStreamReader(os));
\r
63 while ((line = ins.readLine()) != null) {
\r
64 if (line.indexOf(" ") != 0) {
\r
65 java.util.StringTokenizer str = new StringTokenizer(line, " ");
\r
68 if (str.hasMoreTokens()) {
\r
69 id = str.nextToken();
\r
70 if (id.equals("CLUSTAL")) {
\r
74 StringBuffer tempseq;
\r
75 if (seqhash.containsKey(id)) {
\r
76 tempseq = (StringBuffer) seqhash.get(id);
\r
78 tempseq = new StringBuffer();
\r
79 seqhash.put(id, tempseq);
\r
82 if (!(headers.contains(id))) {
\r
83 headers.addElement(id);
\r
86 tempseq.append(str.nextToken());
\r
93 } catch (IOException e) {
\r
94 throw (new Exception("Exception parsing clustal file ", e));
\r
98 noSeqs = headers.size();
\r
100 // Add sequences to the hash
\r
101 seqs = new Sequence[headers.size()];
\r
102 for (i = 0; i < headers.size(); i++) {
\r
103 if (seqhash.get(headers.elementAt(i)) != null) {
\r
104 // TODO: develop automatic dictionary typing for sequences
\r
105 Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(),
\r
106 seqhash.get(headers.elementAt(i).toString()).toString(),
\r
107 SymbolDictionary.STANDARD_AA, 0, 0);
\r
112 throw (new Exception("Bizarreness! Can't find sequence for "
\r
113 + headers.elementAt(i)));
\r
120 public static void WriteClustalWAlignment(java.io.OutputStream os,
\r
121 Alignment seqAl) throws IOException {
\r
122 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
124 AlignmentSequence[] s = seqAl.getAlignmentSequence();
\r
126 java.io.BufferedWriter out = new BufferedWriter(
\r
127 new java.io.OutputStreamWriter(os));
\r
129 out.write("CLUSTAL\n\n");
\r
136 while (i < s.length && s[i] != null) {
\r
137 String tmp = s[i].getId();
\r
139 if (s[i].getSequence().length() > max) {
\r
140 max = s[i].getSequence().length();
\r
142 if (tmp.length() > maxid) {
\r
143 maxid = tmp.length();
\r
153 int nochunks = max / len + 1;
\r
155 for (i = 0; i < nochunks; i++) {
\r
157 while (j < s.length && s[j] != null) {
\r
158 out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " "));
\r
159 int start = i * len;
\r
160 int end = start + len;
\r
162 if (end < s[j].getSequence().length()
\r
163 && start < s[j].getSequence().length()) {
\r
164 out.write(s[j].getSequence().substring(start, end) + "\n");
\r
166 if (start < s[j].getSequence().length()) {
\r
167 out.write(s[j].getSequence().substring(start) + "\n");
\r
178 * manufacture an alignment/dataset from an array of sequences
\r
183 * @throws Exception
\r
185 public static Alignment make_Alignment(Entry origin, Sequence[] seqs)
\r
187 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
189 Alignment al = new Alignment();
\r
190 al.setProvenance(ProvenanceStuff.newProvenance(origin));
\r
192 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
\r
193 boolean gapsset = false;
\r
194 char gapchar = '-';
\r
197 for (int i = 0, nseq = seqs.length; i < nseq; i++) {
\r
198 String seq = seqs[i].getSequence();
\r
199 String gaps = nonGap.matcher(seq).replaceAll("");
\r
200 if (seqLength == 0) {
\r
201 seqLength = seq.length();
\r
202 } else if (seqLength != seq.length())
\r
203 throw (new Exception(i + "th Sequence (>" + seqs[i].getId()
\r
204 + ") is not aligned.\n"));// TODO: move this to assertions part of
\r
207 // common check for any sequence...
\r
208 if (gaps != null && gaps.length() > 0) {
\r
210 gapchar = gaps.charAt(0);
\r
211 for (int c = 0, gc = gaps.length(); c < gc; c++) {
\r
212 if (gapchar != gaps.charAt(c)) {
\r
213 throw (new IOException("Inconsistent gap characters in sequence "
\r
214 + i + ": '" + seq + "'"));
\r
218 AlignmentSequence sq = new AlignmentSequence();
\r
219 // TODO: use as basis of default AlignSequence(Sequence) constructor.
\r
220 sq.setSequence(seq);
\r
221 sq.setName(seqs[i].getId());
\r
222 sq.setRefid(seqs[i].getVorbaId());
\r
223 sq.setStart(seqs[i].getStart());
\r
224 sq.setEnd(seqs[i].getEnd());
\r
225 al.addAlignmentSequence(sq);
\r
227 al.setGapChar(String.valueOf(gapchar));
\r
231 public static Alignment read_FastaAlignment(InputStream os, Entry entry)
\r
234 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
237 seqs = SeqSet.read_SeqFasta(os);
\r
239 throw (new Exception("Empty alignment stream!\n"));
\r
240 } catch (Exception e) {
\r
241 throw new Exception("Invalid fasta alignment\n", e);
\r
244 return make_Alignment(entry, seqs);
\r
247 public static Alignment read_ClustalAlignment(InputStream os, Entry entry)
\r
251 seqs = SeqAln.ReadClustalFile(os);
\r
253 throw (new Exception("Empty alignment stream!\n"));
\r
254 } catch (Exception e) {
\r
255 throw new Exception("Invalid fasta alignment\n", e);
\r
257 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
259 return make_Alignment(entry, seqs);
\r