2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.Sequence;
26 import jalview.datamodel.SequenceGroup;
27 import jalview.datamodel.SequenceI;
28 import jalview.util.MessageManager;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Enumeration;
33 import java.util.Hashtable;
34 import java.util.List;
35 import java.util.Vector;
43 public abstract class AlignFile extends FileParse
44 implements AlignmentFileReaderI, AlignmentFileWriterI
51 * Sequences to be added to form a new alignment. TODO: remove vector in this
54 protected Vector<SequenceI> seqs;
57 * annotation to be added to generated alignment object
59 protected Vector<AlignmentAnnotation> annotations;
62 * SequenceGroups to be added to the alignment object
64 protected List<SequenceGroup> seqGroups;
67 * Properties to be added to generated alignment object
69 private Hashtable properties;
76 * true if parse() has been called
78 private boolean parseCalled = false;
80 private boolean parseImmediately = true;
83 * @return if doParse() was called at construction time
85 protected boolean isParseImmediately()
87 return parseImmediately;
91 * Creates a new AlignFile object.
95 // Shouldn't we init data structures (JBPNote: not sure - initData is for
96 // initialising the structures used for reading from a datasource, and the
97 // bare constructor hasn't got any datasource)
101 public AlignFile(SequenceI[] seqs)
108 * Constructor which parses the data from a file of some specified type.
111 * Filename, URL or Pasted String to read from.
113 * What type of file to read from (File, URL, Pasted String)
115 public AlignFile(String dataObject, DataSourceType sourceType)
118 this(true, dataObject, sourceType);
122 * Constructor which (optionally delays) parsing of data from a file of some
125 * @param parseImmediately
126 * if false, need to call 'doParse()' to begin parsing data
128 * Filename, URL or Pasted String to read from.
130 * What type of file to read from (File, URL)
131 * @throws IOException
133 public AlignFile(boolean parseImmediately, String dataObject,
134 DataSourceType sourceType) throws IOException
136 super(dataObject, sourceType);
138 if (parseImmediately)
145 * Attempt to read from the position where some other parsing process left
149 * @throws IOException
151 public AlignFile(FileParse source) throws IOException
157 * Construct a new parser to read from the position where some other parsing
160 * @param parseImmediately
161 * if false, need to call 'doParse()' to begin parsing data
164 public AlignFile(boolean parseImmediately, FileParse source)
170 // stash flag in case parse needs to know if it has to autoconfigure or was
171 // configured after construction
172 this.parseImmediately = parseImmediately;
174 if (parseImmediately)
181 * called if parsing was delayed till after parser was constructed
183 * @throws IOException
185 public void doParse() throws IOException
189 throw new IOException(
190 "Implementation error: Parser called twice for same data.\n"
191 + "Need to call initData() again before parsing can be reattempted.");
198 * Return the seqs Vector
200 public Vector<SequenceI> getSeqs()
205 public List<SequenceGroup> getSeqGroups()
211 * Return the Sequences in the seqs Vector as an array of Sequences
214 public SequenceI[] getSeqsAsArray()
216 SequenceI[] s = new SequenceI[seqs.size()];
218 for (int i = 0; i < seqs.size(); i++)
220 s[i] = seqs.elementAt(i);
227 * called by AppletFormatAdapter to generate an annotated alignment, rather
228 * than bare sequences.
233 public void addAnnotations(AlignmentI al)
236 for (int i = 0; i < annotations.size(); i++)
238 // detect if annotations.elementAt(i) rna secondary structure
241 * SequenceFeature[] pairArray =
242 * Rna.GetBasePairsFromAlignmentAnnotation(annotations.elementAt(i));
243 * Rna.HelixMap(pairArray);
245 AlignmentAnnotation an = annotations.elementAt(i);
246 an.validateRangeAndDisplay();
247 al.addAnnotation(an);
253 * register sequence groups on the alignment for **output**
257 public void addSeqGroups(AlignmentI al)
259 this.seqGroups = al.getGroups();
264 * Add any additional information extracted from the file to the alignment
267 * @note implicitly called by addAnnotations()
270 public void addProperties(AlignmentI al)
272 if (properties != null && properties.size() > 0)
274 Enumeration keys = properties.keys();
275 Enumeration vals = properties.elements();
276 while (keys.hasMoreElements())
278 al.setProperty(keys.nextElement(), vals.nextElement());
284 * Store a non-null key-value pair in a hashtable used to set alignment
285 * properties note: null keys will raise an error, null values will result in
286 * the key/value pair being silently ignored.
289 * - non-null key object
293 protected void setAlignmentProperty(Object key, Object value)
297 throw new Error(MessageManager.getString(
298 "error.implementation_error_cannot_have_null_alignment"));
302 return; // null properties are ignored.
304 if (properties == null)
306 properties = new Hashtable();
308 properties.put(key, value);
311 protected Object getAlignmentProperty(Object key)
313 if (properties != null && key != null)
315 return properties.get(key);
321 * Initialise objects to store sequence data in.
323 protected void initData()
325 seqs = new Vector<>();
326 annotations = new Vector<>();
327 seqGroups = new ArrayList<>();
338 public void setSeqs(SequenceI[] s)
340 seqs = new Vector<>();
342 for (int i = 0; i < s.length; i++)
344 seqs.addElement(s[i]);
349 * This method must be implemented to parse the contents of the file.
351 public abstract void parse() throws IOException;
354 * A general parser for ids.
356 * @String id Id to be parsed
358 Sequence parseId(String id)
362 int space = id.indexOf(" ");
365 seq = new Sequence(id.substring(0, space), "");
366 String desc = id.substring(space + 1);
367 seq.setDescription(desc);
370 * it is tempting to parse Ensembl style gene description e.g.
371 * chromosome:GRCh38:7:140696688:140721955:1 and set the
372 * start position of the sequence, but this causes much confusion
373 * for reverse strand feature locations
378 seq = new Sequence(id, "");
385 * Creates the output id. Adds prefix Uniprot format source|id and optionally
386 * suffix Jalview /start-end
390 * @String id Id to be parsed
392 String printId(SequenceI seq, boolean jvsuffix)
394 return seq.getDisplayId(jvsuffix);
397 String printId(SequenceI seq)
399 return printId(seq, true);
403 * vector of String[] treeName, newickString pairs
405 Vector<String[]> newickStrings = null;
407 protected void addNewickTree(String treeName, String newickString)
409 if (newickStrings == null)
411 newickStrings = new Vector<>();
413 newickStrings.addElement(new String[] { treeName, newickString });
416 protected int getTreeCount()
418 return newickStrings == null ? 0 : newickStrings.size();
422 public void addGroups(AlignmentI al)
425 for (SequenceGroup sg : getSeqGroups())
431 protected void addSequence(SequenceI seq)
437 * Used only for hmmer statistics, so should probably be removed at some
438 * point. TODO remove this
442 public Vector<AlignmentAnnotation> getAnnotations()