2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
31 import jalview.datamodel.*;
32 import jalview.util.*;
34 public class ClustalFile extends AlignFile
41 public ClustalFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed
46 public ClustalFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed
51 public void initData()
56 public void parse() throws IOException
62 StringBuffer pssecstr=new StringBuffer(),consstr=new StringBuffer();
63 Vector headers = new Vector();
64 Hashtable seqhash = new Hashtable();
71 while ((line = nextLine()) != null)
77 if (line.indexOf(" ") != 0)
79 str = new StringTokenizer(line, " ");
81 if (str.hasMoreTokens())
85 if (id.equalsIgnoreCase("CLUSTAL"))
93 if (seqhash.containsKey(id))
95 tempseq = (StringBuffer) seqhash.get(id);
99 tempseq = new StringBuffer();
100 seqhash.put(id, tempseq);
103 if (!(headers.contains(id)))
105 headers.addElement(id);
108 if (str.hasMoreTokens())
110 tempseq.append(str.nextToken());
121 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
125 pssecstr.append(line.trim());
127 consstr.append(line.trim());
132 } catch (IOException e)
134 System.err.println("Exception parsing clustal file " + e);
140 this.noSeqs = headers.size();
142 // Add sequences to the hash
143 for (i = 0; i < headers.size(); i++)
145 if (seqhash.get(headers.elementAt(i)) != null)
147 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
150 maxLength = seqhash.get(headers.elementAt(i)).toString()
154 Sequence newSeq = parseId(headers.elementAt(i).toString());
155 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
158 seqs.addElement(newSeq);
163 .println("Clustal File Reader: Can't find sequence for "
164 + headers.elementAt(i));
167 AlignmentAnnotation lastssa=null;
168 if (pssecstr.length()==maxLength)
170 Vector ss=new Vector();
171 AlignmentAnnotation ssa=lastssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", pssecstr.toString());
172 ssa.label="Secondary Structure";
173 annotations.addElement(ssa);
175 if (consstr.length()==maxLength)
177 Vector ss=new Vector();
178 AlignmentAnnotation ssa=StockholmFile.parseAnnotationRow(ss, "secondary structure", consstr.toString());
179 ssa.label="Consensus Secondary Structure";
180 if (lastssa==null || !lastssa.getRNAStruc().equals(ssa.getRNAStruc().replace('-', '.')))
182 annotations.addElement(ssa);
187 public String print()
189 return print(getSeqsAsArray());
190 // TODO: locaRNA style aln output
193 public String print(SequenceI[] s)
195 StringBuffer out = new StringBuffer("CLUSTAL"+newline+newline);
202 while ((i < s.length) && (s[i] != null))
204 String tmp = printId(s[i]);
206 if (s[i].getSequence().length > max)
208 max = s[i].getSequence().length;
211 if (tmp.length() > maxid)
213 maxid = tmp.length();
227 int nochunks = (max / len) + 1;
229 for (i = 0; i < nochunks; i++)
233 while ((j < s.length) && (s[j] != null))
235 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
238 int end = start + len;
240 if ((end < s[j].getSequence().length)
241 && (start < s[j].getSequence().length))
243 out.append(s[j].getSequenceAsString(start, end));
247 if (start < s[j].getSequence().length)
249 out.append(s[j].getSequenceAsString().substring(start));
260 return out.toString();