// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
new int[] { 1, dna.getLength() }, 1, 1));
- // TODO: transform EMBL Database refs to canonical form
+
+
+ /*
+ * transform EMBL Database refs to canonical form
+ */
if (dbRefs != null)
{
for (DBRefEntry dbref : dbRefs)
{
+ dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource()));
dna.addDBRef(dbref);
}
}
+ SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
try
{
for (EmblFeature feature : features)
{
- if (feature.dbRefs != null)
- {
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
- }
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
- parseCodingFeature(feature, sourceDb, dna, peptides);
+ parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
} catch (Exception e)
* parent dna sequence for this record
* @param peptides
* list of protein product sequences for Embl entry
+ * @param matcher
+ * helper to match xrefs in already retrieved sequences
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
- SequenceI dna, List<SequenceI> peptides)
+ SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
String prname = "";
String prid = null;
Map<String, String> vals = new Hashtable<String, String>();
- SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
/*
* codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS
}
else if (qname.equals("protein_id"))
{
- prid = q.getValues()[0];
+ prid = q.getValues()[0].trim();
}
else if (qname.equals("codon_start"))
{
try
{
- codonStart = Integer.parseInt(q.getValues()[0]);
+ codonStart = Integer.parseInt(q.getValues()[0].trim());
} catch (NumberFormatException e)
{
System.err.println("Invalid codon_start in XML for "
else if (qname.equals("product"))
{
// sometimes name is returned e.g. for V00488
- prname = q.getValues()[0];
+ prname = q.getValues()[0].trim();
}
else
{
boolean mappingUsed = false;
for (DBRefEntry ref : feature.dbRefs)
{
+ /*
+ * ensure UniProtKB/Swiss-Prot converted to UNIPROT
+ */
ref.setSource(DBRefUtils.getCanonicalName(ref.getSource()));
if (ref.getSource().equals(DBRefSource.UNIPROT))
{