X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=56b132508ce5ad76c91fef93ce9885864fb02443;hb=c14d9e8c9ff59de1857d4834ee70e80abf623415;hp=67d70595c9cb50ab9d52df716a94705248567057;hpb=10083e0731425dedd9c2e4f3677dbdee2db8bc33;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 67d7059..56b1325 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -187,12 +187,12 @@ public class EmblEntry */ public SequenceI getSequence(String sourceDb, List peptides) { - SequenceI dna = new Sequence(sourceDb + "|" + accession, - sequence.getSequence()); + SequenceI dna = makeSequence(sourceDb); dna.setDescription(description); DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(), accession); dna.addDBRef(retrievedref); + dna.setSourceDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, @@ -235,6 +235,17 @@ public class EmblEntry } /** + * @param sourceDb + * @return + */ + SequenceI makeSequence(String sourceDb) + { + SequenceI dna = new Sequence(sourceDb + "|" + accession, + sequence.getSequence()); + return dna; + } + + /** * Extracts coding region and product from a CDS feature and properly decorate * it with annotations. * @@ -322,13 +333,15 @@ public class EmblEntry Mapping dnaToProteinMapping = null; if (translation != null && proteinName != null && proteinId != null) { + int translationLength = translation.length(); + /* * look for product in peptides list, if not found, add it */ product = matcher.findIdMatch(proteinId); if (product == null) { - product = new Sequence(proteinId, translation, 1, translation.length()); + product = new Sequence(proteinId, translation, 1, translationLength); product.setDescription(((proteinName.length() == 0) ? "Protein Product from " + sourceDb : proteinName)); @@ -340,30 +353,32 @@ public class EmblEntry // sequence if (exons == null || exons.length == 0) { + /* + * workaround until we handle dna location for CDS sequence + * e.g. location="X53828.1:60..1058" correctly + */ System.err .println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect (" + sourceDb + ":" + getAccession() + ")"); - if (translation.length() * 3 == (1 - codonStart + dna.getSequence().length)) + if (translationLength * 3 == (1 - codonStart + dna.getSequence().length)) { System.err .println("Not allowing for additional stop codon at end of cDNA fragment... !"); - // this might occur for CDS sequences where no features are - // marked. + // this might occur for CDS sequences where no features are marked exons = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() }; dnaToProteinMapping = new Mapping(product, exons, new int[] { 1, - translation.length() }, - 3, 1); + translationLength }, 3, 1); } - if ((translation.length() + 1) * 3 == (1 - codonStart + dna.getSequence().length)) + if ((translationLength + 1) * 3 == (1 - codonStart + dna + .getSequence().length)) { System.err .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); exons = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() - 3 }; dnaToProteinMapping = new Mapping(product, exons, new int[] { 1, - translation.length() }, - 3, 1); + translationLength }, 3, 1); } } else @@ -382,28 +397,37 @@ public class EmblEntry else { // final product length truncation check - int[] cdsRanges = adjustForProteinLength(translation.length(), exons); - dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] { 1, - translation.length() }, 3, 1); + int[] cdsRanges = adjustForProteinLength(translationLength, exons); + dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] { + 1, translationLength }, 3, 1); if (product != null) { /* + * make xref with mapping from protein to EMBL dna + */ + DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL, + getSequenceVersion(), proteinId, new Mapping( + dnaToProteinMapping.getMap().getInverse())); + product.addDBRef(proteinToEmblRef); + + /* * make xref from protein to EMBLCDS; we assume here that the * CDS sequence version is same as dna sequence (?!) */ MapList proteinToCdsMapList = new MapList(new int[] { 1, - translation.length() }, new int[] { 1 + (codonStart - 1), - (codonStart - 1) + 3 * translation.length() }, 1, 3); + translationLength }, new int[] { 1 + (codonStart - 1), + (codonStart - 1) + 3 * translationLength }, 1, 3); DBRefEntry proteinToEmblCdsRef = new DBRefEntry( DBRefSource.EMBLCDS, getSequenceVersion(), proteinId, new Mapping(proteinToCdsMapList)); product.addDBRef(proteinToEmblCdsRef); /* - * make xref from protein to EMBLCDSPROTEIN + * make 'direct' xref from protein to EMBLCDSPROTEIN */ proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef); proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct); + proteinToEmblProteinRef.setMap(null); product.addDBRef(proteinToEmblProteinRef); } } @@ -414,8 +438,8 @@ public class EmblEntry */ for (int xint = 0; exons != null && xint < exons.length; xint += 2) { - SequenceFeature sf = makeCdsFeature(exons, xint, proteinName, proteinId, vals, - codonStart); + SequenceFeature sf = makeCdsFeature(exons, xint, proteinName, + proteinId, vals, codonStart); sf.setType(feature.getName()); // "CDS" sf.setEnaLocation(feature.getLocation()); sf.setFeatureGroup(sourceDb); @@ -437,7 +461,7 @@ public class EmblEntry */ String source = DBRefUtils.getCanonicalName(ref.getSource()); ref.setSource(source); - DBRefEntry proteinToDnaRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref + DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref .getAccessionId()); if (source.equals(DBRefSource.UNIPROT)) { @@ -469,7 +493,9 @@ public class EmblEntry peptides.add(proteinSeq); } dnaToProteinMapping.setTo(proteinSeq); - proteinSeq.addDBRef(proteinToDnaRef); + dnaToProteinMapping.setMappedFromId(proteinId); + proteinSeq.addDBRef(proteinDbRef); + proteinSeq.setSourceDBRef(proteinDbRef); ref.setMap(dnaToProteinMapping); } hasUniprotDbref = true; @@ -479,7 +505,7 @@ public class EmblEntry /* * copy feature dbref to our protein product */ - DBRefEntry pref = proteinToDnaRef; + DBRefEntry pref = proteinDbRef; pref.setMap(null); // reference is direct product.addDBRef(pref); // Add converse mapping reference @@ -514,6 +540,7 @@ public class EmblEntry DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); } product.addDBRef(proteinToEmblProteinRef); + product.setSourceDBRef(proteinToEmblProteinRef); if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null) @@ -521,6 +548,7 @@ public class EmblEntry DBRefEntry dnaToEmblProteinRef = new DBRefEntry( DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); dnaToEmblProteinRef.setMap(dnaToProteinMapping); + dnaToProteinMapping.setMappedFromId(proteinId); dna.addDBRef(dnaToEmblProteinRef); } }