2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.Format;
28 import java.io.IOException;
29 import java.util.Hashtable;
30 import java.util.StringTokenizer;
31 import java.util.Vector;
33 public class ClustalFile extends AlignFile
40 public ClustalFile(String inFile, String type) throws IOException
45 public ClustalFile(FileParse source) throws IOException
51 public void initData()
57 public void parse() throws IOException
63 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
64 Vector headers = new Vector();
65 Hashtable seqhash = new Hashtable();
72 while ((line = nextLine()) != null)
74 if (line.length() == 0)
78 if (line.indexOf(" ") != 0)
80 str = new StringTokenizer(line, " ");
82 if (str.hasMoreTokens())
86 if (id.equalsIgnoreCase("CLUSTAL"))
94 if (seqhash.containsKey(id))
96 tempseq = (StringBuffer) seqhash.get(id);
100 tempseq = new StringBuffer();
101 seqhash.put(id, tempseq);
104 if (!(headers.contains(id)))
106 headers.addElement(id);
109 if (str.hasMoreTokens())
111 tempseq.append(str.nextToken());
124 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
128 pssecstr.append(line.trim());
132 consstr.append(line.trim());
137 } catch (IOException e)
139 System.err.println("Exception parsing clustal file " + e);
145 this.noSeqs = headers.size();
147 // Add sequences to the hash
148 for (i = 0; i < headers.size(); i++)
150 if (seqhash.get(headers.elementAt(i)) != null)
152 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
155 maxLength = seqhash.get(headers.elementAt(i)).toString()
159 Sequence newSeq = parseId(headers.elementAt(i).toString());
160 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
163 seqs.addElement(newSeq);
168 .println("Clustal File Reader: Can't find sequence for "
169 + headers.elementAt(i));
172 AlignmentAnnotation lastssa = null;
173 if (pssecstr.length() == maxLength)
175 Vector ss = new Vector();
176 AlignmentAnnotation ssa = lastssa = StockholmFile
177 .parseAnnotationRow(ss, "secondary structure",
178 pssecstr.toString());
179 ssa.label = "Secondary Structure";
180 annotations.addElement(ssa);
182 if (consstr.length() == maxLength)
184 Vector ss = new Vector();
185 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
186 "secondary structure", consstr.toString());
187 ssa.label = "Consensus Secondary Structure";
189 || !lastssa.getRNAStruc().equals(
190 ssa.getRNAStruc().replace('-', '.')))
192 annotations.addElement(ssa);
199 public String print()
201 return print(getSeqsAsArray());
202 // TODO: locaRNA style aln output
205 public String print(SequenceI[] s)
207 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
214 while ((i < s.length) && (s[i] != null))
216 String tmp = printId(s[i]);
218 if (s[i].getSequence().length > max)
220 max = s[i].getSequence().length;
223 if (tmp.length() > maxid)
225 maxid = tmp.length();
239 int nochunks = (max / len) + (max % len > 0 ? 1 : 0);
241 for (i = 0; i < nochunks; i++)
245 while ((j < s.length) && (s[j] != null))
247 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
250 int end = start + len;
252 if ((end < s[j].getSequence().length)
253 && (start < s[j].getSequence().length))
255 out.append(s[j].getSequenceAsString(start, end));
259 if (start < s[j].getSequence().length)
261 out.append(s[j].getSequenceAsString().substring(start));
272 return out.toString();