2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
24 import jalview.util.*;
26 public class ClustalFile extends AlignFile
33 public ClustalFile(String inFile, String type) throws IOException
38 public ClustalFile(FileParse source) throws IOException
43 public void initData()
48 public void parse() throws IOException
54 StringBuffer pssecstr=new StringBuffer(),consstr=new StringBuffer();
55 Vector headers = new Vector();
56 Hashtable seqhash = new Hashtable();
63 while ((line = nextLine()) != null)
69 if (line.indexOf(" ") != 0)
71 str = new StringTokenizer(line, " ");
73 if (str.hasMoreTokens())
77 if (id.equalsIgnoreCase("CLUSTAL"))
85 if (seqhash.containsKey(id))
87 tempseq = (StringBuffer) seqhash.get(id);
91 tempseq = new StringBuffer();
92 seqhash.put(id, tempseq);
95 if (!(headers.contains(id)))
97 headers.addElement(id);
100 if (str.hasMoreTokens())
102 tempseq.append(str.nextToken());
113 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
117 pssecstr.append(line.trim());
119 consstr.append(line.trim());
124 } catch (IOException e)
126 System.err.println("Exception parsing clustal file " + e);
132 this.noSeqs = headers.size();
134 // Add sequences to the hash
135 for (i = 0; i < headers.size(); i++)
137 if (seqhash.get(headers.elementAt(i)) != null)
139 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
142 maxLength = seqhash.get(headers.elementAt(i)).toString()
146 Sequence newSeq = parseId(headers.elementAt(i).toString());
147 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
150 seqs.addElement(newSeq);
155 .println("Clustal File Reader: Can't find sequence for "
156 + headers.elementAt(i));
159 AlignmentAnnotation lastssa=null;
160 if (pssecstr.length()==maxLength)
162 Vector ss=new Vector();
163 AlignmentAnnotation ssa=lastssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", pssecstr.toString());
164 ssa.label="Secondary Structure";
165 annotations.addElement(ssa);
167 if (consstr.length()==maxLength)
169 Vector ss=new Vector();
170 AlignmentAnnotation ssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", consstr.toString());
171 ssa.label="Consensus Secondary Structure";
172 if (lastssa==null || !lastssa.getRNAStruc().equals(ssa.getRNAStruc().replace('-', '.')))
174 annotations.addElement(ssa);
179 public String print()
181 return print(getSeqsAsArray());
184 public String print(SequenceI[] s)
186 StringBuffer out = new StringBuffer("CLUSTAL"+newline+newline);
193 while ((i < s.length) && (s[i] != null))
195 String tmp = printId(s[i]);
197 if (s[i].getSequence().length > max)
199 max = s[i].getSequence().length;
202 if (tmp.length() > maxid)
204 maxid = tmp.length();
218 int nochunks = (max / len) + 1;
220 for (i = 0; i < nochunks; i++)
224 while ((j < s.length) && (s[j] != null))
226 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
229 int end = start + len;
231 if ((end < s[j].getSequence().length)
232 && (start < s[j].getSequence().length))
234 out.append(s[j].getSequenceAsString(start, end));
238 if (start < s[j].getSequence().length)
240 out.append(s[j].getSequenceAsString().substring(start));
251 return out.toString();