2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import java.io.IOException;
25 import jalview.bin.Console;
26 import jalview.datamodel.DBRefEntry;
27 import jalview.util.DBRefUtils;
30 * A class that provides selective parsing of the EMBL flatfile format.
32 * The initial implementation is limited to extracting fields used by Jalview
33 * after fetching an EMBL or EMBLCDS entry:
36 * accession, version, sequence, xref
37 * and (for CDS feature) location, protein_id, product, codon_start, translation
40 * For a complete parser, it may be best to adopt that provided in
41 * https://github.com/enasequence/sequencetools/tree/master/src/main/java/uk/ac/ebi/embl/flatfile
42 * (but note this has a dependency on the Apache Commons library)
45 * @see ftp://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/doc/usrman.txt
46 * @see ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html
48 public class EmblFlatFile extends EMBLLikeFlatFile
51 * Constructor given a data source and the id of the source database
57 public EmblFlatFile(FileParse fp, String sourceId) throws IOException
63 * Parses the flatfile, and if successful, saves as an annotated sequence
64 * which may be retrieved by calling {@code getSequence()}
69 public void parse() throws IOException
71 String line = nextLine();
74 if (line.startsWith("ID"))
78 else if (line.startsWith("DE"))
82 else if (line.startsWith("DR"))
86 else if (line.startsWith("SQ"))
88 line = parseSequence();
90 else if (line.startsWith("FT"))
92 line = parseFeature(line.substring(2));
103 * Extracts and saves the primary accession and version (SV value) from an ID
104 * line, or null if not found. Returns the next line after the one processed.
107 * @throws IOException
109 String parseID(String line) throws IOException
111 String[] tokens = line.substring(2).split(";");
114 * first is primary accession
116 String token = tokens[0].trim();
117 if (!token.isEmpty())
119 this.accession = token;
123 * second token is 'SV versionNo'
125 if (tokens.length > 1)
127 token = tokens[1].trim();
128 if (token.startsWith("SV"))
130 String[] bits = token.trim().split(WHITESPACE);
131 this.version = bits[bits.length - 1];
136 * seventh token is 'length BP'
138 if (tokens.length > 6)
140 token = tokens[6].trim();
141 String[] bits = token.trim().split(WHITESPACE);
144 this.length = Integer.valueOf(bits[0]);
145 } catch (NumberFormatException e)
147 Console.error("bad length read in flatfile, line: " + line);
155 * Reads sequence description from the first DE line found. Any trailing
156 * period is discarded. If there are multiple DE lines, only the first (short
157 * description) is read, the rest are ignored.
161 * @throws IOException
163 String parseDE(String line) throws IOException
165 String desc = line.substring(2).trim();
166 if (desc.endsWith("."))
168 desc = desc.substring(0, desc.length() - 1);
170 this.description = desc;
173 * pass over any additional DE lines
175 while ((line = nextLine()) != null)
177 if (!line.startsWith("DE"))
187 * Processes one DR line and saves as a DBRefEntry cross-reference. Returns
188 * the line following the line processed.
191 * @throws IOException
193 String parseDR(String line) throws IOException
195 String[] tokens = line.substring(2).split(";");
196 if (tokens.length > 1)
199 * ensure UniProtKB/Swiss-Prot converted to UNIPROT
201 String db = tokens[0].trim();
202 db = DBRefUtils.getCanonicalName(db);
203 String acc = tokens[1].trim();
204 if (acc.endsWith("."))
206 acc = acc.substring(0, acc.length() - 1);
208 String version = "0";
209 if (tokens.length > 2)
211 String secondaryId = tokens[2].trim();
212 if (!secondaryId.isEmpty())
214 // todo: is this right? secondary id is not a version number
215 // version = secondaryId;
218 this.dbrefs.add(new DBRefEntry(db, version, acc));
225 protected boolean isFeatureContinuationLine(String line)
227 return line.startsWith("FT "); // 4 spaces