basic class defining symbol dictionaries
[vamsas.git] / src / org / vamsas / objects / utils / SeqAln.java
1 /*
2  * Created on 17-May-2005
3  *
4  * TODO To change the template for this generated file go to
5  * Window - Preferences - Java - Code Style - Code Templates
6  */
7 package org.vamsas.objects.utils;
8
9 import java.io.BufferedOutputStream;
10 import java.io.BufferedReader;
11 import java.io.BufferedWriter;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.util.Hashtable;
16 import java.util.StringTokenizer;
17 import java.util.Vector;
18 import java.util.regex.Pattern;
19
20 import vamsas.objects.simple.Alignment;
21 import vamsas.objects.simple.Sequence;
22 import vamsas.objects.simple.SequenceSet;
23
24 /**
25  * @author jimp
26  *
27  * TODO To change the template for this generated type comment go to
28  * Window - Preferences - Java - Code Style - Code Templates
29  */
30 public class SeqAln extends vamsas.objects.simple.Alignment {
31         
32         public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
33
34                 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);        
35                 String gapchars = "";
36                 char gapchar='-';
37         
38                 int     i    = 0;
39             boolean flag = false;
40             
41             Vector    headers = new Vector();
42             Hashtable seqhash = new Hashtable();
43             Sequence[] seqs=null;
44             int noSeqs = 0;
45             String line;
46
47             try {
48                 BufferedReader ins = new BufferedReader(new InputStreamReader(os));
49                 while ((line = ins.readLine()) != null) {
50                         if (line.indexOf(" ") != 0) {
51                                 java.util.StringTokenizer str = new StringTokenizer(line," ");
52                                 String id = "";
53                                 
54                                 if (str.hasMoreTokens()) {
55                                         id = str.nextToken();
56                                         if (id.equals("CLUSTAL")) {
57                                                 flag = true;
58                                         } else {
59                                                 if (flag) {
60                                                         StringBuffer tempseq;
61                                                         if (seqhash.containsKey(id)) {
62                                                                 tempseq = (StringBuffer)seqhash.get(id);
63                                                         } else {
64                                                                 tempseq = new StringBuffer();
65                                                                 seqhash.put(id,tempseq);
66                                                         }
67                                                         
68                                                         if (!(headers.contains(id))) {
69                                                                 headers.addElement(id);
70                                                         }
71                                                         
72                                                         tempseq.append(str.nextToken());
73                                                 }
74                                         }
75                                 }
76                         }
77                 }
78                 
79             } catch (IOException e) {
80                 throw(new Exception("Exception parsing clustal file ",e));
81             }
82             
83             if (flag) {
84                 noSeqs = headers.size();
85                 
86                 //Add sequences to the hash
87                 seqs = new Sequence[headers.size()];
88               for (i = 0; i < headers.size(); i++ ) {
89                 if ( seqhash.get(headers.elementAt(i)) != null) {
90                         
91                         Sequence newSeq = new Sequence(headers.elementAt(i).toString(),
92                                                  seqhash.get(headers.elementAt(i).toString()).toString());
93                         
94                         seqs[i]=newSeq;
95
96                 } else {
97                   throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i)));
98                 }
99               }
100             }
101             return seqs;
102           }
103
104           public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException {
105                 Sequence[] s = seqAl.getSeqs().getSeqs();
106                 
107                 java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os));
108                 
109                 out.write("CLUSTAL\n\n");
110
111             int max = 0;
112             int maxid = 0;
113
114             int i = 0;
115
116             while (i < s.length && s[i] != null) {
117               String tmp = s[i].getId();
118
119               if (s[i].getSeq().length() > max) {
120                 max = s[i].getSeq().length();
121               }
122               if (tmp.length() > maxid) {
123                 maxid = tmp.length();
124               }
125               i++;
126             }
127
128             if (maxid < 15) {
129               maxid = 15;
130             }
131             maxid++;
132             int len = 60;
133             int nochunks =  max / len + 1;
134
135             for (i = 0; i < nochunks; i++) {
136               int j = 0;
137               while ( j < s.length && s[j] != null) {
138                 out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" "));
139                 int start = i*len;
140                 int end = start + len;
141
142                 if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) {
143                   out.write(s[j].getSeq().substring(start,end) + "\n");
144                 } else {
145                   if (start < s[j].getSeq().length()) {
146                     out.write(s[j].getSeq().substring(start) + "\n");
147                   }
148                 }
149                 j++;
150               }
151               out.write("\n");
152
153             }
154          }
155
156
157           public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception {
158                 Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);        
159                 boolean gapsset = false;
160                 char gapchar='-';
161                 int seqLength = 0;
162                 
163                 
164                 for (int i=0, nseq=seqs.length; i<nseq; i++) {
165                         String seq = seqs[i].getSeq();
166                         String gaps = nonGap.matcher(seq).replaceAll("");
167                         if (seqLength==0) {
168                                 seqLength=seq.length();
169                         } else 
170                                 if (seqLength!=seq.length())
171                                         throw(new Exception(i+"th Sequence (>"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment
172                         
173                         // common check for any sequence...
174                         if (gaps!=null && gaps.length()>0) {
175                                 if (!gapsset)
176                                         gapchar = gaps.charAt(0);
177                                 for (int c=0, gc=gaps.length(); c<gc; c++) {
178                                         if (gapchar!=gaps.charAt(c)) {
179                                                 throw(new IOException("Inconsistent gap characters in sequence "+i+": '"+seq+"'"));
180                                         }
181                                 }
182                         }
183                 }
184
185                 return new Alignment(origin, new SequenceSet(seqs), source, new String(""+gapchar));
186           }
187           
188           public static Alignment read_FastaAlignment(InputStream os, String[] source) throws Exception {
189                 Sequence[] seqs;
190                 try {
191                         seqs = SeqSet.read_SeqFasta(os);
192                         if (seqs==null)
193                                 throw(new Exception("Empty alignment stream!\n"));
194                 } catch (Exception e) {
195                         throw new Exception("Invalid fasta alignment\n",e);
196                 }
197                 
198                 return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
199           }
200           public static Alignment read_ClustalAlignment(InputStream os, String[] source) throws Exception {
201                 Sequence[] seqs;
202                 try {
203                         seqs = SeqAln.ReadClustalFile(os);
204                         if (seqs==null)
205                                 throw(new Exception("Empty alignment stream!\n"));
206                 } catch (Exception e) {
207                         throw new Exception("Invalid fasta alignment\n",e);
208                 }
209                 
210                 return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
211           }
212 }