X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FEmblFlatFile.java;h=92af0dfc8dbfee08d96c5ea5ca3551822e391efc;hb=551b3a8e53ff6b43f9d0bce08759e46f49373bed;hp=900aef82205f867947617c424a3c2c256e56e3a9;hpb=4b72422011d0d8ec24718b68cd4ea33daa61097e;p=jalview.git diff --git a/src/jalview/io/EmblFlatFile.java b/src/jalview/io/EmblFlatFile.java index 900aef8..92af0df 100644 --- a/src/jalview/io/EmblFlatFile.java +++ b/src/jalview/io/EmblFlatFile.java @@ -50,6 +50,12 @@ public class EmblFlatFile extends AlignFile // FileParse private static final String DOUBLED_QUOTE = QUOTE + QUOTE; /** + * when true, interpret the mol_type 'source' feature attribute and generate + * an RNA sequence from the DNA record + */ + private boolean produceRna = true; + + /** * A data bean class to hold values parsed from one CDS Feature (FT) */ class CdsData @@ -86,6 +92,8 @@ public class EmblFlatFile extends AlignFile // FileParse private List dbrefs; // from DR + private boolean sequenceStringIsRNA = false; + private String sequenceString; // from SQ lines /* @@ -317,11 +325,20 @@ public class EmblFlatFile extends AlignFile // FileParse String parseFT(String line) throws IOException { String[] tokens = line.split(WHITESPACE); - if (tokens.length < 3 || !"CDS".equals(tokens[1])) + if (tokens.length < 3 + || (!"CDS".equals(tokens[1]) && !"source".equals(tokens[1]))) { return nextLine(); } + if (tokens[1].equals("source")) + { + return parseSourceQualifiers(tokens); + } + + /* + * parse location - which may be over more than one line e.g. EAW51554 + */ CdsData data = new CdsData(); data.cdsLocation = tokens[2]; // TODO location can be over >1 line e.g. EAW51554 @@ -418,6 +435,51 @@ public class EmblFlatFile extends AlignFile // FileParse } /** + * process attributes for 'source' until the next FT feature entry only + * interested in 'mol_type' + * + * @param tokens + * @return + * @throws IOException + */ + private String parseSourceQualifiers(String[] tokens) throws IOException + { + if (!"source".equals(tokens[1])) + { + throw (new RuntimeException("Not given a source qualifier")); + } + // search for mol_type attribute + + StringBuilder sb = new StringBuilder().append(tokens[2]); // extent of + // sequence + + String line = parseFeatureQualifier(sb, "source"); + while (line != null) + { + if (!line.startsWith("FT ")) // four spaces, end of this feature table + // entry + { + return line; + } + + int p = line.indexOf("\\mol_type"); + int qs = line.indexOf("\"", p); + int qe = line.indexOf("\"", qs + 1); + String qualifier = line.substring(qs, qe).toLowerCase(); + if (qualifier.indexOf("rna") > -1) + { + sequenceStringIsRNA = true; + } + if (qualifier.indexOf("dna") > -1) + { + sequenceStringIsRNA = false; + } + line = parseFeatureQualifier(sb, "source"); + } + return line; + } + + /** * Removes leading or trailing double quotes (") unless doubled, and changes * any 'escaped' (doubled) double quotes to single characters. As per the * Feature Table specification for Qualifiers, Free Text. @@ -427,7 +489,7 @@ public class EmblFlatFile extends AlignFile // FileParse */ static String removeQuotes(String value) { - if (value == null) + if (value == null) { return null; } @@ -524,6 +586,12 @@ public class EmblFlatFile extends AlignFile // FileParse { name = this.sourceDb + "|" + name; } + + if (produceRna && sequenceStringIsRNA) + { + sequenceString = sequenceString.replace('T', 'U').replace('t', 'u'); + } + SequenceI seq = new Sequence(name, this.sequenceString); seq.setDescription(this.description); @@ -797,7 +865,7 @@ public class EmblFlatFile extends AlignFile // FileParse try { List ranges = DnaUtils.parseLocation(location); - return MappingUtils.listToArray(ranges); + return MappingUtils.rangeListToArray(ranges); } catch (ParseException e) { Cache.log.warn(