From 069ab787ab3f2de53f5b24f8af3ea078ff52671b Mon Sep 17 00:00:00 2001 From: Ben Soares Date: Fri, 17 Sep 2021 18:11:05 +0100 Subject: [PATCH] JAL-1260 rudimentary parsing of LOCUS line to at least get accession id --- src/jalview/io/GenBankFile.java | 46 ++++++++++++--------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/src/jalview/io/GenBankFile.java b/src/jalview/io/GenBankFile.java index 7988764..ba7b4b4 100644 --- a/src/jalview/io/GenBankFile.java +++ b/src/jalview/io/GenBankFile.java @@ -2,8 +2,6 @@ package jalview.io; import java.io.IOException; -import jalview.bin.Cache; - /** * A class that provides selective parsing of the GenBank flatfile format. *

@@ -47,7 +45,11 @@ public class GenBankFile extends FlatFile String line = nextLine(); while (line != null) { - if (line.startsWith(DEFINITION)) + if (line.startsWith("LOCUS")) + { + line = parseLocus(line); + } + else if (line.startsWith(DEFINITION)) { line = parseDefinition(line); } @@ -89,45 +91,25 @@ public class GenBankFile extends FlatFile */ String parseLocus(String line) throws IOException { - String[] tokens = line.substring(2).split(";"); + String[] tokens = line.split(WHITESPACE); /* - * first is primary accession + * first should be "LOCUS" */ - String token = tokens[0].trim(); - if (!token.isEmpty()) + if (tokens.length < 2 || !"LOCUS".equals(tokens[0])) { - this.accession = token; + return nextLine(); } - /* - * second token is 'SV versionNo' + * second is primary accession */ - if (tokens.length > 1) + String token = tokens[1].trim(); + if (!token.isEmpty()) { - token = tokens[1].trim(); - if (token.startsWith("SV")) - { - String[] bits = token.trim().split(WHITESPACE); - this.version = bits[bits.length - 1]; - } + this.accession = token; } - /* - * seventh token is 'length BP' - */ - if (tokens.length > 6) - { - token = tokens[6].trim(); - String[] bits = token.trim().split(WHITESPACE); - try - { - this.length = Integer.valueOf(bits[0]); - } catch (NumberFormatException e) - { - Cache.log.error("bad length read in flatfile, line: " + line); - } - } + // not going to guess the rest just yet, but third is length with unit (bp) return nextLine(); } -- 1.7.10.2