From: Jim Procter Date: Thu, 23 Sep 2021 16:24:16 +0000 (+0100) Subject: JAL-3821 ported to refactored FlatFile parser X-Git-Tag: Release_2_11_2_0~32^2^2~1 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=9ae61a06b581ffba65614a2f08acc36e7f7685e7;p=jalview.git JAL-3821 ported to refactored FlatFile parser --- diff --git a/src/jalview/io/FlatFile.java b/src/jalview/io/FlatFile.java index 55fdd37..9e5c652 100644 --- a/src/jalview/io/FlatFile.java +++ b/src/jalview/io/FlatFile.java @@ -7,6 +7,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Hashtable; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; @@ -159,6 +160,13 @@ public abstract class FlatFile extends AlignFile } /* + * when true, interpret the mol_type 'source' feature attribute + * and generate an RNA sequence from the DNA record + */ + protected boolean produceRna=true; + + + /* * values parsed from the data file */ protected String sourceDb; @@ -173,6 +181,8 @@ public abstract class FlatFile extends AlignFile protected List dbrefs; + protected boolean sequenceStringIsRNA=false; + protected String sequenceString; protected Map cds; @@ -199,6 +209,52 @@ public abstract class FlatFile extends AlignFile } /** + * process attributes for 'source' until the next FT feature entry + * only interested in 'mol_type' + * @param tokens + * @return + * @throws IOException + */ + private String parseSourceQualifiers(String[] tokens) throws IOException + { + if (!"source".equals(tokens[0])) + { + throw (new RuntimeException("Not given a 'source' qualifier line")); + } + // search for mol_type attribute + + StringBuilder sb = new StringBuilder().append(tokens[1]); // extent of + // sequence + + String line = parseFeatureQualifier(sb, false); + while (line != null) + { + if (!line.startsWith("FT ")) // four spaces, end of this feature table + // entry + { + return line; + } + + // case sensitive ? + int p = line.indexOf("\\mol_type"); + int qs = line.indexOf("\"", p); + int qe = line.indexOf("\"", qs + 1); + String qualifier=line.substring(qs,qe).toLowerCase(Locale.ROOT); + if (qualifier.indexOf("rna") > -1) + { + sequenceStringIsRNA = true; + } + if (qualifier.indexOf("dna") > -1) + { + sequenceStringIsRNA = false; + } + line=parseFeatureQualifier(sb, false); + } + return line; + } + + + /** * Parses one (GenBank or EMBL format) CDS feature, saves the parsed data, and * returns the next line * @@ -335,6 +391,12 @@ public abstract class FlatFile extends AlignFile { name = this.sourceDb + "|" + name; } + + if (produceRna && sequenceStringIsRNA) + { + sequenceString = sequenceString.replace('T', 'U').replace('t', 'u'); + } + SequenceI seq = new Sequence(name, this.sequenceString); seq.setDescription(this.description); @@ -738,11 +800,14 @@ public abstract class FlatFile extends AlignFile protected String parseFeature(String line) throws IOException { String[] tokens = line.trim().split(WHITESPACE); - if (tokens.length < 2 || !"CDS".equals(tokens[0])) + if (tokens.length < 2 || (!"CDS".equals(tokens[0]) && (!"source".equals(tokens[0])))) { return nextLine(); } - + if (tokens[0].equals("source")) + { + return parseSourceQualifiers(tokens); + } return parseCDSFeature(tokens[1]); } }