2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
24 import jalview.util.*;
26 public class ClustalFile extends AlignFile
33 public ClustalFile(String inFile, String type) throws IOException
38 public ClustalFile(FileParse source) throws IOException
43 public void initData()
48 public void parse() throws IOException
54 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
55 Vector headers = new Vector();
56 Hashtable seqhash = new Hashtable();
63 while ((line = nextLine()) != null)
65 if (line.length() == 0)
69 if (line.indexOf(" ") != 0)
71 str = new StringTokenizer(line, " ");
73 if (str.hasMoreTokens())
77 if (id.equalsIgnoreCase("CLUSTAL"))
85 if (seqhash.containsKey(id))
87 tempseq = (StringBuffer) seqhash.get(id);
91 tempseq = new StringBuffer();
92 seqhash.put(id, tempseq);
95 if (!(headers.contains(id)))
97 headers.addElement(id);
100 if (str.hasMoreTokens())
102 tempseq.append(str.nextToken());
115 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
119 pssecstr.append(line.trim());
123 consstr.append(line.trim());
128 } catch (IOException e)
130 System.err.println("Exception parsing clustal file " + e);
136 this.noSeqs = headers.size();
138 // Add sequences to the hash
139 for (i = 0; i < headers.size(); i++)
141 if (seqhash.get(headers.elementAt(i)) != null)
143 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
146 maxLength = seqhash.get(headers.elementAt(i)).toString()
150 Sequence newSeq = parseId(headers.elementAt(i).toString());
151 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
154 seqs.addElement(newSeq);
159 .println("Clustal File Reader: Can't find sequence for "
160 + headers.elementAt(i));
163 AlignmentAnnotation lastssa = null;
164 if (pssecstr.length() == maxLength)
166 Vector ss = new Vector();
167 AlignmentAnnotation ssa = lastssa = StockholmFile
168 .parseAnnotationRow(ss, "secondary structure",
169 pssecstr.toString());
170 ssa.label = "Secondary Structure";
171 annotations.addElement(ssa);
173 if (consstr.length() == maxLength)
175 Vector ss = new Vector();
176 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
177 "secondary structure", consstr.toString());
178 ssa.label = "Consensus Secondary Structure";
180 || !lastssa.getRNAStruc().equals(
181 ssa.getRNAStruc().replace('-', '.')))
183 annotations.addElement(ssa);
189 public String print()
191 return print(getSeqsAsArray());
192 // TODO: locaRNA style aln output
195 public String print(SequenceI[] s)
197 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
204 while ((i < s.length) && (s[i] != null))
206 String tmp = printId(s[i]);
208 if (s[i].getSequence().length > max)
210 max = s[i].getSequence().length;
213 if (tmp.length() > maxid)
215 maxid = tmp.length();
229 int nochunks = (max / len) + 1;
231 for (i = 0; i < nochunks; i++)
235 while ((j < s.length) && (s[j] != null))
237 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
240 int end = start + len;
242 if ((end < s[j].getSequence().length)
243 && (start < s[j].getSequence().length))
245 out.append(s[j].getSequenceAsString(start, end));
249 if (start < s[j].getSequence().length)
251 out.append(s[j].getSequenceAsString().substring(start));
262 return out.toString();