* Castor binding file
*
* For example:
- * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=ena_sequence&id=J03321
- * &format=emblxml
+ * http://www.ebi.ac.uk/ena/data/view/J03321&display=xml
*
* @see embl_mapping.xml
*/
*/
public SequenceI getSequence(String sourceDb, List<SequenceI> peptides)
{
- SequenceI dna = new Sequence(sourceDb + "|" + accession,
- sequence.getSequence());
+ SequenceI dna = makeSequence(sourceDb);
+ if (dna == null)
+ {
+ return null;
+ }
dna.setDescription(description);
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
}
/**
+ * @param sourceDb
+ * @return
+ */
+ SequenceI makeSequence(String sourceDb)
+ {
+ if (sequence == null)
+ {
+ System.err.println("No sequence was returned for ENA accession "
+ + accession);
+ return null;
+ }
+ SequenceI dna = new Sequence(sourceDb + "|" + accession,
+ sequence.getSequence());
+ return dna;
+ }
+
+ /**
* Extracts coding region and product from a CDS feature and properly decorate
* it with annotations.
*
Mapping dnaToProteinMapping = null;
if (translation != null && proteinName != null && proteinId != null)
{
+ int translationLength = translation.length();
+
/*
* look for product in peptides list, if not found, add it
*/
product = matcher.findIdMatch(proteinId);
if (product == null)
{
- product = new Sequence(proteinId, translation, 1, translation.length());
+ product = new Sequence(proteinId, translation, 1, translationLength);
product.setDescription(((proteinName.length() == 0) ? "Protein Product from "
+ sourceDb
: proteinName));
// sequence
if (exons == null || exons.length == 0)
{
+ /*
+ * workaround until we handle dna location for CDS sequence
+ * e.g. location="X53828.1:60..1058" correctly
+ */
System.err
.println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
+ sourceDb + ":" + getAccession() + ")");
- if (translation.length() * 3 == (1 - codonStart + dna.getSequence().length))
+ if (translationLength * 3 == (1 - codonStart + dna.getSequence().length))
{
System.err
.println("Not allowing for additional stop codon at end of cDNA fragment... !");
- // this might occur for CDS sequences where no features are
- // marked.
+ // this might occur for CDS sequences where no features are marked
exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() };
dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
- translation.length() },
- 3, 1);
+ translationLength }, 3, 1);
}
- if ((translation.length() + 1) * 3 == (1 - codonStart + dna.getSequence().length))
+ if ((translationLength + 1) * 3 == (1 - codonStart + dna
+ .getSequence().length))
{
System.err
.println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() - 3 };
dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
- translation.length() },
- 3, 1);
+ translationLength }, 3, 1);
}
}
else
else
{
// final product length truncation check
- int[] cdsRanges = adjustForProteinLength(translation.length(), exons);
- dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] { 1,
- translation.length() }, 3, 1);
+ int[] cdsRanges = adjustForProteinLength(translationLength, exons);
+ dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] {
+ 1, translationLength }, 3, 1);
if (product != null)
{
/*
+ * make xref with mapping from protein to EMBL dna
+ */
+ DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL,
+ getSequenceVersion(), proteinId, new Mapping(
+ dnaToProteinMapping.getMap().getInverse()));
+ product.addDBRef(proteinToEmblRef);
+
+ /*
* make xref from protein to EMBLCDS; we assume here that the
* CDS sequence version is same as dna sequence (?!)
*/
MapList proteinToCdsMapList = new MapList(new int[] { 1,
- translation.length() }, new int[] { 1 + (codonStart - 1),
- (codonStart - 1) + 3 * translation.length() }, 1, 3);
+ translationLength }, new int[] { 1 + (codonStart - 1),
+ (codonStart - 1) + 3 * translationLength }, 1, 3);
DBRefEntry proteinToEmblCdsRef = new DBRefEntry(
DBRefSource.EMBLCDS, getSequenceVersion(), proteinId,
new Mapping(proteinToCdsMapList));
product.addDBRef(proteinToEmblCdsRef);
/*
- * make xref from protein to EMBLCDSPROTEIN
+ * make 'direct' xref from protein to EMBLCDSPROTEIN
*/
proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef);
proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct);
+ proteinToEmblProteinRef.setMap(null);
product.addDBRef(proteinToEmblProteinRef);
}
}
*/
for (int xint = 0; exons != null && xint < exons.length; xint += 2)
{
- SequenceFeature sf = makeCdsFeature(exons, xint, proteinName, proteinId, vals,
- codonStart);
+ SequenceFeature sf = makeCdsFeature(exons, xint, proteinName,
+ proteinId, vals, codonStart);
sf.setType(feature.getName()); // "CDS"
sf.setEnaLocation(feature.getLocation());
sf.setFeatureGroup(sourceDb);
*/
String source = DBRefUtils.getCanonicalName(ref.getSource());
ref.setSource(source);
- DBRefEntry proteinToDnaRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref
+ DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref
.getAccessionId());
if (source.equals(DBRefSource.UNIPROT))
{
peptides.add(proteinSeq);
}
dnaToProteinMapping.setTo(proteinSeq);
- proteinSeq.addDBRef(proteinToDnaRef);
+ dnaToProteinMapping.setMappedFromId(proteinId);
+ proteinSeq.addDBRef(proteinDbRef);
ref.setMap(dnaToProteinMapping);
}
hasUniprotDbref = true;
/*
* copy feature dbref to our protein product
*/
- DBRefEntry pref = proteinToDnaRef;
+ DBRefEntry pref = proteinDbRef;
pref.setMap(null); // reference is direct
product.addDBRef(pref);
// Add converse mapping reference
DBRefEntry dnaToEmblProteinRef = new DBRefEntry(
DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
dnaToEmblProteinRef.setMap(dnaToProteinMapping);
+ dnaToProteinMapping.setMappedFromId(proteinId);
dna.addDBRef(dnaToEmblProteinRef);
}
}