2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
30 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
32 import jalview.datamodel.*;
33 import jalview.util.*;
35 public class ClustalFile extends AlignFile
42 public ClustalFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
47 public ClustalFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
52 public void initData()
57 public void parse() throws IOException
63 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
64 Vector headers = new Vector();
65 Hashtable seqhash = new Hashtable();
72 while ((line = nextLine()) != null)
74 if (line.length() == 0)
78 if (line.indexOf(" ") != 0)
80 str = new StringTokenizer(line, " ");
82 if (str.hasMoreTokens())
86 if (id.equalsIgnoreCase("CLUSTAL"))
94 if (seqhash.containsKey(id))
96 tempseq = (StringBuffer) seqhash.get(id);
100 tempseq = new StringBuffer();
101 seqhash.put(id, tempseq);
104 if (!(headers.contains(id)))
106 headers.addElement(id);
109 if (str.hasMoreTokens())
111 tempseq.append(str.nextToken());
124 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
128 pssecstr.append(line.trim());
132 consstr.append(line.trim());
137 } catch (IOException e)
139 System.err.println("Exception parsing clustal file " + e);
145 this.noSeqs = headers.size();
147 // Add sequences to the hash
148 for (i = 0; i < headers.size(); i++)
150 if (seqhash.get(headers.elementAt(i)) != null)
152 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
155 maxLength = seqhash.get(headers.elementAt(i)).toString()
159 Sequence newSeq = parseId(headers.elementAt(i).toString());
160 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
163 seqs.addElement(newSeq);
168 .println("Clustal File Reader: Can't find sequence for "
169 + headers.elementAt(i));
172 AlignmentAnnotation lastssa = null;
173 if (pssecstr.length() == maxLength)
175 Vector ss = new Vector();
176 AlignmentAnnotation ssa = lastssa = StockholmFile
177 .parseAnnotationRow(ss, "secondary structure",
178 pssecstr.toString());
179 ssa.label = "Secondary Structure";
180 annotations.addElement(ssa);
182 if (consstr.length() == maxLength)
184 Vector ss = new Vector();
185 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
186 "secondary structure", consstr.toString());
187 ssa.label = "Consensus Secondary Structure";
189 || !lastssa.getRNAStruc().equals(
190 ssa.getRNAStruc().replace('-', '.')))
192 annotations.addElement(ssa);
198 public String print()
200 return print(getSeqsAsArray());
201 // TODO: locaRNA style aln output
204 public String print(SequenceI[] s)
206 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
213 while ((i < s.length) && (s[i] != null))
215 String tmp = printId(s[i]);
217 if (s[i].getSequence().length > max)
219 max = s[i].getSequence().length;
222 if (tmp.length() > maxid)
224 maxid = tmp.length();
238 int nochunks = (max / len) + 1;
240 for (i = 0; i < nochunks; i++)
244 while ((j < s.length) && (s[j] != null))
246 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
249 int end = start + len;
251 if ((end < s[j].getSequence().length)
252 && (start < s[j].getSequence().length))
254 out.append(s[j].getSequenceAsString(start, end));
258 if (start < s[j].getSequence().length)
260 out.append(s[j].getSequenceAsString().substring(start));
271 return out.toString();