2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.Format;
28 import java.io.IOException;
29 import java.util.HashMap;
31 import java.util.StringTokenizer;
32 import java.util.Vector;
34 public class ClustalFile extends AlignFile
42 public ClustalFile(String inFile, DataSourceType sourceType)
45 super(inFile, sourceType);
49 public ClustalFile(FileParse source) throws IOException
55 private void _initHeader()
58 clustalHeader = jalview.bin.Cache.getDefault("CLUSTAL_HEADER","CLUSTAL");
64 public void initData()
70 public void parse() throws IOException
75 StringBuffer pssecstr = new StringBuffer();
76 StringBuffer consstr = new StringBuffer();
77 Vector<String> headers = new Vector<>();
78 Map<String, StringBuffer> seqhash = new HashMap<>();
85 while ((line = nextLine()) != null)
87 if (line.length() == 0)
91 boolean isConservation = line.startsWith(SPACE)
92 || line.startsWith(TAB);
95 str = new StringTokenizer(line);
97 if (str.hasMoreTokens())
100 // TODO: JAL-1236 other tokens may be indicative of a header for Clustal format
101 if (id.equalsIgnoreCase("CLUSTAL"))
109 if (seqhash.containsKey(id))
111 tempseq = seqhash.get(id);
115 tempseq = new StringBuffer();
116 seqhash.put(id, tempseq);
119 if (!(headers.contains(id)))
121 headers.addElement(id);
124 if (str.hasMoreTokens())
126 tempseq.append(str.nextToken());
139 if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
143 pssecstr.append(line.trim());
147 consstr.append(line.trim());
152 } catch (IOException e)
154 System.err.println("Exception parsing clustal file " + e);
160 this.noSeqs = headers.size();
162 // Add sequences to the hash
163 for (i = 0; i < headers.size(); i++)
165 if (seqhash.get(headers.elementAt(i)) != null)
167 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
170 maxLength = seqhash.get(headers.elementAt(i)).toString()
174 Sequence newSeq = parseId(headers.elementAt(i).toString());
176 seqhash.get(headers.elementAt(i).toString()).toString());
178 seqs.addElement(newSeq);
182 System.err.println("Clustal File Reader: Can't find sequence for "
183 + headers.elementAt(i));
186 AlignmentAnnotation lastssa = null;
187 if (pssecstr.length() == maxLength)
189 Vector<AlignmentAnnotation> ss = new Vector<>();
190 AlignmentAnnotation ssa = lastssa = StockholmFile
191 .parseAnnotationRow(ss, "secondary structure",
192 pssecstr.toString());
193 ssa.label = "Secondary Structure";
194 annotations.addElement(ssa);
196 if (consstr.length() == maxLength)
198 Vector<AlignmentAnnotation> ss = new Vector<>();
199 AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
200 "secondary structure", consstr.toString());
201 ssa.label = "Consensus Secondary Structure";
202 if (lastssa == null || !lastssa.getRNAStruc()
203 .equals(ssa.getRNAStruc().replace('-', '.')))
205 annotations.addElement(ssa);
211 * clustal header - customise if needed
213 public String clustalHeader = "CLUSTAL";
216 public String print(SequenceI[] s, boolean jvsuffix)
218 StringBuffer out = new StringBuffer(clustalHeader + newline + newline);
225 while ((i < s.length) && (s[i] != null))
227 String tmp = printId(s[i], jvsuffix);
229 max = Math.max(max, s[i].getLength());
231 if (tmp.length() > maxid)
233 maxid = tmp.length();
247 int nochunks = (max / len) + (max % len > 0 ? 1 : 0);
249 for (i = 0; i < nochunks; i++)
253 while ((j < s.length) && (s[j] != null))
255 out.append(new Format("%-" + maxid + "s")
256 .form(printId(s[j], jvsuffix) + " "));
258 int chunkStart = i * len;
259 int chunkEnd = chunkStart + len;
261 int length = s[j].getLength();
262 if ((chunkEnd < length) && (chunkStart < length))
264 out.append(s[j].getSequenceAsString(chunkStart, chunkEnd));
268 if (chunkStart < length)
270 out.append(s[j].getSequenceAsString().substring(chunkStart));
281 return out.toString();