import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
}
/*
+ * when true, interpret the mol_type 'source' feature attribute
+ * and generate an RNA sequence from the DNA record
+ */
+ protected boolean produceRna=true;
+
+
+ /*
* values parsed from the data file
*/
protected String sourceDb;
protected List<DBRefEntry> dbrefs;
+ protected boolean sequenceStringIsRNA=false;
+
protected String sequenceString;
protected Map<String, CdsData> cds;
}
/**
+ * process attributes for 'source' until the next FT feature entry
+ * only interested in 'mol_type'
+ * @param tokens
+ * @return
+ * @throws IOException
+ */
+ private String parseSourceQualifiers(String[] tokens) throws IOException
+ {
+ if (!"source".equals(tokens[0]))
+ {
+ throw (new RuntimeException("Not given a 'source' qualifier line"));
+ }
+ // search for mol_type attribute
+
+ StringBuilder sb = new StringBuilder().append(tokens[1]); // extent of
+ // sequence
+
+ String line = parseFeatureQualifier(sb, false);
+ while (line != null)
+ {
+ if (!line.startsWith("FT ")) // four spaces, end of this feature table
+ // entry
+ {
+ return line;
+ }
+
+ // case sensitive ?
+ int p = line.indexOf("\\mol_type");
+ int qs = line.indexOf("\"", p);
+ int qe = line.indexOf("\"", qs + 1);
+ String qualifier=line.substring(qs,qe).toLowerCase(Locale.ROOT);
+ if (qualifier.indexOf("rna") > -1)
+ {
+ sequenceStringIsRNA = true;
+ }
+ if (qualifier.indexOf("dna") > -1)
+ {
+ sequenceStringIsRNA = false;
+ }
+ line=parseFeatureQualifier(sb, false);
+ }
+ return line;
+ }
+
+
+ /**
* Parses one (GenBank or EMBL format) CDS feature, saves the parsed data, and
* returns the next line
*
{
name = this.sourceDb + "|" + name;
}
+
+ if (produceRna && sequenceStringIsRNA)
+ {
+ sequenceString = sequenceString.replace('T', 'U').replace('t', 'u');
+ }
+
SequenceI seq = new Sequence(name, this.sequenceString);
seq.setDescription(this.description);
protected String parseFeature(String line) throws IOException
{
String[] tokens = line.trim().split(WHITESPACE);
- if (tokens.length < 2 || !"CDS".equals(tokens[0]))
+ if (tokens.length < 2 || (!"CDS".equals(tokens[0]) && (!"source".equals(tokens[0]))))
{
return nextLine();
}
-
+ if (tokens[0].equals("source"))
+ {
+ return parseSourceQualifiers(tokens);
+ }
return parseCDSFeature(tokens[1]);
}
}