2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
26 import jalview.datamodel.*;
27 import jalview.util.*;
29 public class ClustalFile extends AlignFile
36 public ClustalFile(String inFile, String type) throws IOException
41 public ClustalFile(FileParse source) throws IOException
46 public void initData()
51 public void parse() throws IOException
57 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
58 Vector headers = new Vector();
59 Hashtable seqhash = new Hashtable();
66 while ((line = nextLine()) != null)
68 if (line.length() == 0)
72 if (line.indexOf(" ") != 0)
74 str = new StringTokenizer(line, " ");
76 if (str.hasMoreTokens())
80 if (id.equalsIgnoreCase("CLUSTAL"))
88 if (seqhash.containsKey(id))
90 tempseq = (StringBuffer) seqhash.get(id);
94 tempseq = new StringBuffer();
95 seqhash.put(id, tempseq);
98 if (!(headers.contains(id)))
100 headers.addElement(id);
103 if (str.hasMoreTokens())
105 tempseq.append(str.nextToken());
118 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
122 pssecstr.append(line.trim());
126 consstr.append(line.trim());
131 } catch (IOException e)
133 System.err.println("Exception parsing clustal file " + e);
139 this.noSeqs = headers.size();
141 // Add sequences to the hash
142 for (i = 0; i < headers.size(); i++)
144 if (seqhash.get(headers.elementAt(i)) != null)
146 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
149 maxLength = seqhash.get(headers.elementAt(i)).toString()
153 Sequence newSeq = parseId(headers.elementAt(i).toString());
154 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
157 seqs.addElement(newSeq);
162 .println("Clustal File Reader: Can't find sequence for "
163 + headers.elementAt(i));
166 AlignmentAnnotation lastssa = null;
167 if (pssecstr.length() == maxLength)
169 Vector ss = new Vector();
170 AlignmentAnnotation ssa = lastssa = StockholmFile
171 .parseAnnotationRow(ss, "secondary structure",
172 pssecstr.toString());
173 ssa.label = "Secondary Structure";
174 annotations.addElement(ssa);
176 if (consstr.length() == maxLength)
178 Vector ss = new Vector();
179 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
180 "secondary structure", consstr.toString());
181 ssa.label = "Consensus Secondary Structure";
183 || !lastssa.getRNAStruc().equals(
184 ssa.getRNAStruc().replace('-', '.')))
186 annotations.addElement(ssa);
192 public String print()
194 return print(getSeqsAsArray());
195 // TODO: locaRNA style aln output
198 public String print(SequenceI[] s)
200 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
207 while ((i < s.length) && (s[i] != null))
209 String tmp = printId(s[i]);
211 if (s[i].getSequence().length > max)
213 max = s[i].getSequence().length;
216 if (tmp.length() > maxid)
218 maxid = tmp.length();
232 int nochunks = (max / len) + 1;
234 for (i = 0; i < nochunks; i++)
238 while ((j < s.length) && (s[j] != null))
240 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
243 int end = start + len;
245 if ((end < s[j].getSequence().length)
246 && (start < s[j].getSequence().length))
248 out.append(s[j].getSequenceAsString(start, end));
252 if (start < s[j].getSequence().length)
254 out.append(s[j].getSequenceAsString().substring(start));
265 return out.toString();