2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
24 import jalview.datamodel.*;
\r
25 import jalview.util.*;
\r
27 public class ClustalFile
\r
32 public ClustalFile()
\r
36 public ClustalFile(String inStr)
\r
41 public ClustalFile(String inFile, String type)
\r
44 super(inFile, type);
\r
47 public void initData()
\r
56 boolean flag = false;
\r
58 Vector headers = new Vector();
\r
59 Hashtable seqhash = new Hashtable();
\r
65 while ( (line = nextLine()) != null)
\r
67 if (line.indexOf(" ") != 0)
\r
69 StringTokenizer str = new StringTokenizer(line, " ");
\r
72 if (str.hasMoreTokens())
\r
74 id = str.nextToken();
\r
76 if (id.equals("CLUSTAL"))
\r
84 StringBuffer tempseq;
\r
86 if (seqhash.containsKey(id))
\r
88 tempseq = (StringBuffer) seqhash.get(id);
\r
92 tempseq = new StringBuffer();
\r
93 seqhash.put(id, tempseq);
\r
96 if (! (headers.contains(id)))
\r
98 headers.addElement(id);
\r
101 if (str.hasMoreTokens())
\r
103 tempseq.append(str.nextToken());
\r
111 catch (IOException e)
\r
113 System.err.println("Exception parsing clustal file " + e);
\r
114 e.printStackTrace();
\r
119 this.noSeqs = headers.size();
\r
121 //Add sequences to the hash
\r
122 for (i = 0; i < headers.size(); i++)
\r
127 if (seqhash.get(headers.elementAt(i)) != null)
\r
129 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
\r
132 maxLength = seqhash.get(headers.elementAt(i)).toString()
\r
136 String head = headers.elementAt(i).toString();
\r
138 if (head.indexOf("/") > 0)
\r
140 StringTokenizer st = new StringTokenizer(head, "/");
\r
142 if (st.countTokens() == 2)
\r
144 ids.addElement(st.nextToken());
\r
146 String tmp = st.nextToken();
\r
147 st = new StringTokenizer(tmp, "-");
\r
149 if (st.countTokens() == 2)
\r
151 start = Integer.valueOf(st.nextToken())
\r
153 end = Integer.valueOf(st.nextToken()).intValue();
\r
158 ids.addElement(headers.elementAt(i));
\r
163 ids.addElement(headers.elementAt(i));
\r
166 Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
\r
167 seqhash.get(headers.elementAt(i).
\r
169 .toString(), start, end);
\r
171 seqs.addElement(newSeq);
\r
175 System.err.println(
\r
176 "Clustal File Reader: Can't find sequence for " +
\r
177 headers.elementAt(i));
\r
183 public String print()
\r
185 return print(getSeqsAsArray());
\r
188 public static String print(SequenceI[] s)
\r
190 StringBuffer out = new StringBuffer("CLUSTAL\n\n");
\r
197 while ( (i < s.length) && (s[i] != null))
\r
199 String tmp = s[i].getName() + "/" + s[i].getStart() + "-" +
\r
202 if (s[i].getSequence().length() > max)
\r
204 max = s[i].getSequence().length();
\r
207 if (tmp.length() > maxid)
\r
209 maxid = tmp.length();
\r
223 int nochunks = (max / len) + 1;
\r
225 for (i = 0; i < nochunks; i++)
\r
229 while ( (j < s.length) && (s[j] != null))
\r
231 out.append(new Format("%-" + maxid + "s").form(s[j].getName() +
\r
232 "/" + s[j].getStart() + "-" + s[j].getEnd()) + " ");
\r
234 int start = i * len;
\r
235 int end = start + len;
\r
237 if ( (end < s[j].getSequence().length()) &&
\r
238 (start < s[j].getSequence().length()))
\r
240 out.append(s[j].getSequence().substring(start, end));
\r
244 if (start < s[j].getSequence().length())
\r
246 out.append(s[j].getSequence().substring(start));
\r
257 return out.toString();
\r