2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.Format;
28 import java.io.IOException;
29 import java.util.Hashtable;
30 import java.util.StringTokenizer;
31 import java.util.Vector;
33 public class ClustalFile extends AlignFile
40 public ClustalFile(String inFile, DataSourceType sourceType)
43 super(inFile, sourceType);
46 public ClustalFile(FileParse source) throws IOException
52 public void initData()
58 public void parse() throws IOException
64 StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer();
65 Vector headers = new Vector();
66 Hashtable seqhash = new Hashtable();
73 while ((line = nextLine()) != null)
75 if (line.length() == 0)
79 if (line.indexOf(" ") != 0)
81 str = new StringTokenizer(line, " ");
83 if (str.hasMoreTokens())
87 if (id.equalsIgnoreCase("CLUSTAL"))
95 if (seqhash.containsKey(id))
97 tempseq = (StringBuffer) seqhash.get(id);
101 tempseq = new StringBuffer();
102 seqhash.put(id, tempseq);
105 if (!(headers.contains(id)))
107 headers.addElement(id);
110 if (str.hasMoreTokens())
112 tempseq.append(str.nextToken());
125 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
129 pssecstr.append(line.trim());
133 consstr.append(line.trim());
138 } catch (IOException e)
140 System.err.println("Exception parsing clustal file " + e);
146 this.noSeqs = headers.size();
148 // Add sequences to the hash
149 for (i = 0; i < headers.size(); i++)
151 if (seqhash.get(headers.elementAt(i)) != null)
153 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
156 maxLength = seqhash.get(headers.elementAt(i)).toString()
160 Sequence newSeq = parseId(headers.elementAt(i).toString());
161 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
164 seqs.addElement(newSeq);
169 .println("Clustal File Reader: Can't find sequence for "
170 + headers.elementAt(i));
173 AlignmentAnnotation lastssa = null;
174 if (pssecstr.length() == maxLength)
176 Vector ss = new Vector();
177 AlignmentAnnotation ssa = lastssa = StockholmFile
178 .parseAnnotationRow(ss, "secondary structure",
179 pssecstr.toString());
180 ssa.label = "Secondary Structure";
181 annotations.addElement(ssa);
183 if (consstr.length() == maxLength)
185 Vector ss = new Vector();
186 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
187 "secondary structure", consstr.toString());
188 ssa.label = "Consensus Secondary Structure";
190 || !lastssa.getRNAStruc().equals(
191 ssa.getRNAStruc().replace('-', '.')))
193 annotations.addElement(ssa);
200 public String print(SequenceI[] s, boolean jvsuffix)
202 StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
209 while ((i < s.length) && (s[i] != null))
211 String tmp = printId(s[i], jvsuffix);
213 if (s[i].getSequence().length > max)
215 max = s[i].getSequence().length;
218 if (tmp.length() > maxid)
220 maxid = tmp.length();
234 int nochunks = (max / len) + (max % len > 0 ? 1 : 0);
236 for (i = 0; i < nochunks; i++)
240 while ((j < s.length) && (s[j] != null))
242 out.append(new Format("%-" + maxid + "s").form(printId(s[j],
246 int end = start + len;
248 if ((end < s[j].getSequence().length)
249 && (start < s[j].getSequence().length))
251 out.append(s[j].getSequenceAsString(start, end));
255 if (start < s[j].getSequence().length)
257 out.append(s[j].getSequenceAsString().substring(start));
268 return out.toString();