2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.Format;
28 import java.io.IOException;
29 import java.util.Hashtable;
30 import java.util.StringTokenizer;
31 import java.util.Vector;
33 public class ClustalFile extends AlignFile
40 public ClustalFile(String inFile, String type) throws IOException
45 public ClustalFile(FileParse source) throws IOException
50 public void initData()
55 public void parse() throws IOException
61 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
62 Vector headers = new Vector();
63 Hashtable seqhash = new Hashtable();
70 while ((line = nextLine()) != null)
72 if (line.length() == 0)
76 if (line.indexOf(" ") != 0)
78 str = new StringTokenizer(line, " ");
80 if (str.hasMoreTokens())
84 if (id.equalsIgnoreCase("CLUSTAL"))
92 if (seqhash.containsKey(id))
94 tempseq = (StringBuffer) seqhash.get(id);
98 tempseq = new StringBuffer();
99 seqhash.put(id, tempseq);
102 if (!(headers.contains(id)))
104 headers.addElement(id);
107 if (str.hasMoreTokens())
109 tempseq.append(str.nextToken());
122 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
126 pssecstr.append(line.trim());
130 consstr.append(line.trim());
135 } catch (IOException e)
137 System.err.println("Exception parsing clustal file " + e);
143 this.noSeqs = headers.size();
145 // Add sequences to the hash
146 for (i = 0; i < headers.size(); i++)
148 if (seqhash.get(headers.elementAt(i)) != null)
150 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
153 maxLength = seqhash.get(headers.elementAt(i)).toString()
157 Sequence newSeq = parseId(headers.elementAt(i).toString());
158 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
161 seqs.addElement(newSeq);
166 .println("Clustal File Reader: Can't find sequence for "
167 + headers.elementAt(i));
170 AlignmentAnnotation lastssa = null;
171 if (pssecstr.length() == maxLength)
173 Vector ss = new Vector();
174 AlignmentAnnotation ssa = lastssa = StockholmFile
175 .parseAnnotationRow(ss, "secondary structure",
176 pssecstr.toString());
177 ssa.label = "Secondary Structure";
178 annotations.addElement(ssa);
180 if (consstr.length() == maxLength)
182 Vector ss = new Vector();
183 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
184 "secondary structure", consstr.toString());
185 ssa.label = "Consensus Secondary Structure";
187 || !lastssa.getRNAStruc().equals(
188 ssa.getRNAStruc().replace('-', '.')))
190 annotations.addElement(ssa);
196 public String print()
198 return print(getSeqsAsArray());
199 // TODO: locaRNA style aln output
202 public String print(SequenceI[] s)
204 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
211 while ((i < s.length) && (s[i] != null))
213 String tmp = printId(s[i]);
215 if (s[i].getSequence().length > max)
217 max = s[i].getSequence().length;
220 if (tmp.length() > maxid)
222 maxid = tmp.length();
236 int nochunks = (max / len) + 1;
238 for (i = 0; i < nochunks; i++)
242 while ((j < s.length) && (s[j] != null))
244 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
247 int end = start + len;
249 if ((end < s[j].getSequence().length)
250 && (start < s[j].getSequence().length))
252 out.append(s[j].getSequenceAsString(start, end));
256 if (start < s[j].getSequence().length)
258 out.append(s[j].getSequenceAsString().substring(start));
269 return out.toString();