2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
24 import jalview.util.*;
26 public class ClustalFile extends AlignFile
33 public ClustalFile(String inFile, String type) throws IOException
38 public ClustalFile(FileParse source) throws IOException
43 public void initData()
48 public void parse() throws IOException
54 StringBuffer pssecstr=new StringBuffer(),consstr=new StringBuffer();
55 Vector headers = new Vector();
56 Hashtable seqhash = new Hashtable();
63 while ((line = nextLine()) != null)
69 if (line.indexOf(" ") != 0)
71 str = new StringTokenizer(line, " ");
73 if (str.hasMoreTokens())
77 if (id.equalsIgnoreCase("CLUSTAL"))
85 if (seqhash.containsKey(id))
87 tempseq = (StringBuffer) seqhash.get(id);
91 tempseq = new StringBuffer();
92 seqhash.put(id, tempseq);
95 if (!(headers.contains(id)))
97 headers.addElement(id);
100 if (str.hasMoreTokens())
102 tempseq.append(str.nextToken());
113 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
117 pssecstr.append(line.trim());
119 consstr.append(line.trim());
124 } catch (IOException e)
126 System.err.println("Exception parsing clustal file " + e);
132 this.noSeqs = headers.size();
134 // Add sequences to the hash
135 for (i = 0; i < headers.size(); i++)
137 if (seqhash.get(headers.elementAt(i)) != null)
139 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
142 maxLength = seqhash.get(headers.elementAt(i)).toString()
146 Sequence newSeq = parseId(headers.elementAt(i).toString());
147 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
150 seqs.addElement(newSeq);
155 .println("Clustal File Reader: Can't find sequence for "
156 + headers.elementAt(i));
159 AlignmentAnnotation lastssa=null;
160 if (pssecstr.length()==maxLength)
162 Vector ss=new Vector();
163 AlignmentAnnotation ssa=lastssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", pssecstr.toString());
164 ssa.label="Secondary Structure";
165 annotations.addElement(ssa);
167 if (consstr.length()==maxLength)
169 Vector ss=new Vector();
170 AlignmentAnnotation ssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", consstr.toString());
171 ssa.label="Consensus Secondary Structure";
172 if (lastssa==null || !lastssa.getRNAStruc().equals(ssa.getRNAStruc().replace('-', '.')))
174 annotations.addElement(ssa);
179 public String print()
181 return print(getSeqsAsArray());
182 // TODO: locaRNA style aln output
185 public String print(SequenceI[] s)
187 StringBuffer out = new StringBuffer("CLUSTAL"+newline+newline);
194 while ((i < s.length) && (s[i] != null))
196 String tmp = printId(s[i]);
198 if (s[i].getSequence().length > max)
200 max = s[i].getSequence().length;
203 if (tmp.length() > maxid)
205 maxid = tmp.length();
219 int nochunks = (max / len) + 1;
221 for (i = 0; i < nochunks; i++)
225 while ((j < s.length) && (s[j] != null))
227 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
230 int end = start + len;
232 if ((end < s[j].getSequence().length)
233 && (start < s[j].getSequence().length))
235 out.append(s[j].getSequenceAsString(start, end));
239 if (start < s[j].getSequence().length)
241 out.append(s[j].getSequenceAsString().substring(start));
252 return out.toString();