2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
24 import jalview.datamodel.*;
25 import jalview.util.*;
27 public class ClustalFile extends AlignFile
34 public ClustalFile(String inFile, String type) throws IOException
39 public ClustalFile(FileParse source) throws IOException
44 public void initData()
49 public void parse() throws IOException
55 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
56 Vector headers = new Vector();
57 Hashtable seqhash = new Hashtable();
64 while ((line = nextLine()) != null)
66 if (line.length() == 0)
70 if (line.indexOf(" ") != 0)
72 str = new StringTokenizer(line, " ");
74 if (str.hasMoreTokens())
78 if (id.equalsIgnoreCase("CLUSTAL"))
86 if (seqhash.containsKey(id))
88 tempseq = (StringBuffer) seqhash.get(id);
92 tempseq = new StringBuffer();
93 seqhash.put(id, tempseq);
96 if (!(headers.contains(id)))
98 headers.addElement(id);
101 if (str.hasMoreTokens())
103 tempseq.append(str.nextToken());
116 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
120 pssecstr.append(line.trim());
124 consstr.append(line.trim());
129 } catch (IOException e)
131 System.err.println("Exception parsing clustal file " + e);
137 this.noSeqs = headers.size();
139 // Add sequences to the hash
140 for (i = 0; i < headers.size(); i++)
142 if (seqhash.get(headers.elementAt(i)) != null)
144 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
147 maxLength = seqhash.get(headers.elementAt(i)).toString()
151 Sequence newSeq = parseId(headers.elementAt(i).toString());
152 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
155 seqs.addElement(newSeq);
160 .println("Clustal File Reader: Can't find sequence for "
161 + headers.elementAt(i));
164 AlignmentAnnotation lastssa = null;
165 if (pssecstr.length() == maxLength)
167 Vector ss = new Vector();
168 AlignmentAnnotation ssa = lastssa = StockholmFile
169 .parseAnnotationRow(ss, "secondary structure",
170 pssecstr.toString());
171 ssa.label = "Secondary Structure";
172 annotations.addElement(ssa);
174 if (consstr.length() == maxLength)
176 Vector ss = new Vector();
177 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
178 "secondary structure", consstr.toString());
179 ssa.label = "Consensus Secondary Structure";
181 || !lastssa.getRNAStruc().equals(
182 ssa.getRNAStruc().replace('-', '.')))
184 annotations.addElement(ssa);
190 public String print()
192 return print(getSeqsAsArray());
193 // TODO: locaRNA style aln output
196 public String print(SequenceI[] s)
198 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
205 while ((i < s.length) && (s[i] != null))
207 String tmp = printId(s[i]);
209 if (s[i].getSequence().length > max)
211 max = s[i].getSequence().length;
214 if (tmp.length() > maxid)
216 maxid = tmp.length();
230 int nochunks = (max / len) + 1;
232 for (i = 0; i < nochunks; i++)
236 while ((j < s.length) && (s[j] != null))
238 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
241 int end = start + len;
243 if ((end < s[j].getSequence().length)
244 && (start < s[j].getSequence().length))
246 out.append(s[j].getSequenceAsString(start, end));
250 if (start < s[j].getSequence().length)
252 out.append(s[j].getSequenceAsString().substring(start));
263 return out.toString();