private static final String DOUBLED_QUOTE = QUOTE + QUOTE;
/**
+ * when true, interpret the mol_type 'source' feature attribute and generate
+ * an RNA sequence from the DNA record
+ */
+ private boolean produceRna = true;
+
+ /**
* A data bean class to hold values parsed from one CDS Feature (FT)
*/
class CdsData
private List<DBRefEntry> dbrefs; // from DR
+ private boolean sequenceStringIsRNA = false;
+
private String sequenceString; // from SQ lines
/*
String parseFT(String line) throws IOException
{
String[] tokens = line.split(WHITESPACE);
- if (tokens.length < 3 || !"CDS".equals(tokens[1]))
+ if (tokens.length < 3
+ || (!"CDS".equals(tokens[1]) && !"source".equals(tokens[1])))
{
return nextLine();
}
+ if (tokens[1].equals("source"))
+ {
+ return parseSourceQualifiers(tokens);
+ }
+
+ /*
+ * parse location - which may be over more than one line e.g. EAW51554
+ */
CdsData data = new CdsData();
data.cdsLocation = tokens[2];
// TODO location can be over >1 line e.g. EAW51554
}
/**
+ * process attributes for 'source' until the next FT feature entry only
+ * interested in 'mol_type'
+ *
+ * @param tokens
+ * @return
+ * @throws IOException
+ */
+ private String parseSourceQualifiers(String[] tokens) throws IOException
+ {
+ if (!"source".equals(tokens[1]))
+ {
+ throw (new RuntimeException("Not given a source qualifier"));
+ }
+ // search for mol_type attribute
+
+ StringBuilder sb = new StringBuilder().append(tokens[2]); // extent of
+ // sequence
+
+ String line = parseFeatureQualifier(sb, "source");
+ while (line != null)
+ {
+ if (!line.startsWith("FT ")) // four spaces, end of this feature table
+ // entry
+ {
+ return line;
+ }
+
+ int p = line.indexOf("\\mol_type");
+ int qs = line.indexOf("\"", p);
+ int qe = line.indexOf("\"", qs + 1);
+ String qualifier = line.substring(qs, qe).toLowerCase();
+ if (qualifier.indexOf("rna") > -1)
+ {
+ sequenceStringIsRNA = true;
+ }
+ if (qualifier.indexOf("dna") > -1)
+ {
+ sequenceStringIsRNA = false;
+ }
+ line = parseFeatureQualifier(sb, "source");
+ }
+ return line;
+ }
+
+ /**
* Removes leading or trailing double quotes (") unless doubled, and changes
* any 'escaped' (doubled) double quotes to single characters. As per the
* Feature Table specification for Qualifiers, Free Text.
*/
static String removeQuotes(String value)
{
- if (value == null)
+ if (value == null)
{
return null;
}
{
name = this.sourceDb + "|" + name;
}
+
+ if (produceRna && sequenceStringIsRNA)
+ {
+ sequenceString = sequenceString.replace('T', 'U').replace('t', 'u');
+ }
+
SequenceI seq = new Sequence(name, this.sequenceString);
seq.setDescription(this.description);
try
{
List<int[]> ranges = DnaUtils.parseLocation(location);
- return MappingUtils.listToArray(ranges);
+ return MappingUtils.rangeListToArray(ranges);
} catch (ParseException e)
{
Cache.log.warn(