private static final String DOUBLED_QUOTE = QUOTE + QUOTE;
/**
+ * when true, interpret the mol_type 'source' feature attribute
+ * and generate an RNA sequence from the DNA record
+ */
+ private boolean produceRna=true;
+ /**
* A data bean class to hold values parsed from one CDS Feature (FT)
*/
class CdsData
private List<DBRefEntry> dbrefs; // from DR
+ private boolean sequenceStringIsRNA=false;
private String sequenceString; // from SQ lines
/*
String parseFT(String line) throws IOException
{
String[] tokens = line.split(WHITESPACE);
- if (tokens.length < 3 || !"CDS".equals(tokens[1]))
+ if (tokens.length < 3 || (!"CDS".equals(tokens[1]) && !"source".equals(tokens[1])))
{
return nextLine();
}
+
+ if (tokens[1].equals("source"))
+ {
+ return parseSourceQualifiers(tokens);
+ }
+ /*
+ * parse location - which may be over more than one line e.g. EAW51554
+ */
CdsData data = new CdsData();
data.cdsLocation = tokens[2];
// TODO location can be over >1 line e.g. EAW51554
}
/**
+ * process attributes for 'source' until the next FT feature entry
+ * only interested in 'mol_type'
+ * @param tokens
+ * @return
+ * @throws IOException
+ */
+ private String parseSourceQualifiers(String[] tokens) throws IOException
+ {
+ if (!"source".equals(tokens[1]))
+ {
+ throw (new RuntimeException("Not given a source qualifier"));
+ }
+ // search for mol_type attribute
+
+ StringBuilder sb = new StringBuilder().append(tokens[2]); // extent of
+ // sequence
+
+ String line = parseFeatureQualifier(sb, "source");
+ while (line != null)
+ {
+ if (!line.startsWith("FT ")) // four spaces, end of this feature table
+ // entry
+ {
+ return line;
+ }
+
+ int p = line.indexOf("\\mol_type");
+ int qs = line.indexOf("\"", p);
+ int qe = line.indexOf("\"", qs + 1);
+ String qualifier=line.substring(qs,qe).toLowerCase();
+ if (qualifier.indexOf("rna") > -1)
+ {
+ sequenceStringIsRNA = true;
+ }
+ if (qualifier.indexOf("dna") > -1)
+ {
+ sequenceStringIsRNA = false;
+ }
+ line=parseFeatureQualifier(sb, "source");
+ }
+ return line;
+ }
+
+ /**
* Removes leading or trailing double quotes (") unless doubled, and changes
* any 'escaped' (doubled) double quotes to single characters. As per the
* Feature Table specification for Qualifiers, Free Text.
{
name = this.sourceDb + "|" + name;
}
+
+ if (produceRna && sequenceStringIsRNA)
+ {
+ sequenceString = sequenceString.replace('T', 'U').replace('t', 'u');
+ }
+
SequenceI seq = new Sequence(name, this.sequenceString);
seq.setDescription(this.description);