X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=42419906b6c0810aa9753ec1127407f17e75717c;hb=ebadc5463ca35daa153cb20c3537db740fb5f707;hp=1a24415efa34a2e0a71d73564be3fe53019b6333;hpb=ad15cff29620f960119f80176f1fd443da9f6763;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 1a24415..4241990 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -27,12 +27,21 @@ import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; -import java.util.Enumeration; import java.util.Hashtable; -import java.util.Iterator; +import java.util.Map.Entry; import java.util.Vector; +/** + * Data model for one entry returned from an EMBL query, as marshalled by a + * Castor binding file + * + * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml + * + * @see embl_mapping.xml + */ public class EmblEntry { String accession; @@ -49,13 +58,11 @@ public class EmblEntry String lastUpdated; - Vector keywords; - - Vector refs; + Vector keywords; - Vector dbRefs; + Vector dbRefs; - Vector features; + Vector features; EmblSequence sequence; @@ -79,7 +86,7 @@ public class EmblEntry /** * @return the dbRefs */ - public Vector getDbRefs() + public Vector getDbRefs() { return dbRefs; } @@ -88,7 +95,7 @@ public class EmblEntry * @param dbRefs * the dbRefs to set */ - public void setDbRefs(Vector dbRefs) + public void setDbRefs(Vector dbRefs) { this.dbRefs = dbRefs; } @@ -113,7 +120,7 @@ public class EmblEntry /** * @return the features */ - public Vector getFeatures() + public Vector getFeatures() { return features; } @@ -122,7 +129,7 @@ public class EmblEntry * @param features * the features to set */ - public void setFeatures(Vector features) + public void setFeatures(Vector features) { this.features = features; } @@ -130,7 +137,7 @@ public class EmblEntry /** * @return the keywords */ - public Vector getKeywords() + public Vector getKeywords() { return keywords; } @@ -139,7 +146,7 @@ public class EmblEntry * @param keywords * the keywords to set */ - public void setKeywords(Vector keywords) + public void setKeywords(Vector keywords) { this.keywords = keywords; } @@ -162,23 +169,6 @@ public class EmblEntry } /** - * @return the refs - */ - public Vector getRefs() - { - return refs; - } - - /** - * @param refs - * the refs to set - */ - public void setRefs(Vector refs) - { - this.refs = refs; - } - - /** * @return the releaseCreated */ public String getRCreated() @@ -190,7 +180,7 @@ public class EmblEntry * @param releaseCreated * the releaseCreated to set */ - public void setRcreated(String releaseCreated) + public void setRCreated(String releaseCreated) { this.rCreated = releaseCreated; } @@ -404,11 +394,11 @@ public class EmblEntry * don't return any translated protein sequences marked in features * @return dataset sequences with DBRefs and features - DNA always comes first */ - public jalview.datamodel.SequenceI[] getSequences(boolean noNa, + public SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) { // TODO: ensure emblEntry.getSequences behaves correctly for returning all // cases of noNa and noPeptide - Vector seqs = new Vector(); + Vector seqs = new Vector(); Sequence dna = null; if (!noNa) { @@ -425,22 +415,25 @@ public class EmblEntry { 1, dna.getLength() }, 1, 1)); // TODO: transform EMBL Database refs to canonical form if (dbRefs != null) - for (Iterator i = dbRefs.iterator(); i.hasNext(); dna - .addDBRef((DBRefEntry) i.next())) - ; + { + for (DBRefEntry dbref : dbRefs) + { + dna.addDBRef(dbref); + } + } } try { - for (Iterator i = features.iterator(); i.hasNext();) + for (EmblFeature feature: features) { - EmblFeature feature = (EmblFeature) i.next(); if (!noNa) { - if (feature.dbRefs != null && feature.dbRefs.size() > 0) + if (feature.dbRefs != null) { - for (Iterator dbr = feature.dbRefs.iterator(); dbr.hasNext(); dna - .addDBRef((DBRefEntry) dbr.next())) - ; + for (DBRefEntry dbref : feature.dbRefs) + { + dna.addDBRef(dbref); + } } } if (FeatureProperties.isCodingFeature(sourceDb, feature.getName())) @@ -450,13 +443,15 @@ public class EmblEntry else { // General feature type. + // TODO this is just duplicated code ?? if (!noNa) { - if (feature.dbRefs != null && feature.dbRefs.size() > 0) + if (feature.dbRefs != null) { - for (Iterator dbr = feature.dbRefs.iterator(); dbr.hasNext(); dna - .addDBRef((DBRefEntry) dbr.next())) - ; + for (DBRefEntry dbref : feature.dbRefs) + { + dna.addDBRef(dbref); + } } } } @@ -477,7 +472,7 @@ public class EmblEntry SequenceI[] sqs = new SequenceI[seqs.size()]; for (int i = 0, j = seqs.size(); i < j; i++) { - sqs[i] = (SequenceI) seqs.elementAt(i); + sqs[i] = seqs.elementAt(i); seqs.set(i, null); } return sqs; @@ -499,19 +494,16 @@ public class EmblEntry * flag for generation of Peptide sequence objects */ private void parseCodingFeature(EmblFeature feature, String sourceDb, - Vector seqs, Sequence dna, boolean noPeptide) + Vector seqs, Sequence dna, boolean noPeptide) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); // extract coding region(s) - jalview.datamodel.Mapping map = null; + Mapping map = null; int[] exon = null; - if (feature.locations != null && feature.locations.size() > 0) + if (feature.locations != null) { - for (Enumeration locs = feature.locations.elements(); locs - .hasMoreElements();) + for (EmblFeatureLocations loc : feature.locations) { - EmblFeatureLocations loc = (EmblFeatureLocations) locs - .nextElement(); int[] se = loc.getElementRanges(accession); if (exon == null) { @@ -529,19 +521,17 @@ public class EmblEntry String prseq = null; String prname = new String(); String prid = null; - Hashtable vals = new Hashtable(); + Hashtable vals = new Hashtable(); int prstart = 1; // get qualifiers - if (feature.getQualifiers() != null - && feature.getQualifiers().size() > 0) + if (feature.getQualifiers() != null) { - for (Iterator quals = feature.getQualifiers().iterator(); quals - .hasNext();) + for (Qualifier q : feature.getQualifiers()) { - Qualifier q = (Qualifier) quals.next(); - if (q.getName().equals("translation")) + String qname = q.getName(); + if (qname.equals("translation")) { - StringBuffer prsq = new StringBuffer(q.getValues()[0]); + StringBuilder prsq = new StringBuilder(q.getValues()[0]); int p = prsq.indexOf(" "); while (p > -1) { @@ -552,15 +542,15 @@ public class EmblEntry prsq = null; } - else if (q.getName().equals("protein_id")) + else if (qname.equals("protein_id")) { prid = q.getValues()[0]; } - else if (q.getName().equals("codon_start")) + else if (qname.equals("codon_start")) { prstart = Integer.parseInt(q.getValues()[0]); } - else if (q.getName().equals("product")) + else if (qname.equals("product")) { prname = q.getValues()[0]; } @@ -568,7 +558,7 @@ public class EmblEntry { // throw anything else into the additional properties hash String[] s = q.getValues(); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); if (s != null) { for (int i = 0; i < s.length; i++) @@ -577,12 +567,14 @@ public class EmblEntry sb.append("\n"); } } - vals.put(q.getName(), sb.toString()); + vals.put(qname, sb.toString()); } } } Sequence product = null; + DBRefEntry protEMBLCDS = null; exon = adjustForPrStart(prstart, exon); + boolean noProteinDbref = true; if (prseq != null && prname != null && prid != null) { @@ -611,7 +603,7 @@ public class EmblEntry // marked. exon = new int[] { dna.getStart() + (prstart - 1), dna.getEnd() }; - map = new jalview.datamodel.Mapping(product, exon, new int[] + map = new Mapping(product, exon, new int[] { 1, prseq.length() }, 3, 1); } if ((prseq.length() + 1) * 3 == (1 - prstart + dna.getSequence().length)) @@ -620,7 +612,7 @@ public class EmblEntry .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); exon = new int[] { dna.getStart() + (prstart - 1), dna.getEnd() - 3 }; - map = new jalview.datamodel.Mapping(product, exon, new int[] + map = new Mapping(product, exon, new int[] { 1, prseq.length() }, 3, 1); } } @@ -641,7 +633,7 @@ public class EmblEntry { // final product length trunctation check - map = new jalview.datamodel.Mapping(product, + map = new Mapping(product, adjustForProteinLength(prseq.length(), exon), new int[] { 1, prseq.length() }, 3, 1); // reconstruct the EMBLCDS entry @@ -651,7 +643,7 @@ public class EmblEntry pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); pcdnaref.setVersion(getVersion()); // same as parent EMBL version. - jalview.util.MapList mp = new jalview.util.MapList(new int[] + MapList mp = new MapList(new int[] { 1, prseq.length() }, new int[] { 1 + (prstart - 1), (prstart - 1) + 3 * prseq.length() }, 1, 3); // { 1 + (prstart - 1) * 3, @@ -659,7 +651,13 @@ public class EmblEntry // { 1prstart, prstart + prseq.length() - 1 }, 3, 1); pcdnaref.setMap(new Mapping(mp)); if (product != null) + { product.addDBRef(pcdnaref); + protEMBLCDS = new DBRefEntry(pcdnaref); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + product.addDBRef(protEMBLCDS); + + } } } @@ -671,33 +669,29 @@ public class EmblEntry sf.setEnd(exon[xint + 1]); sf.setType(feature.getName()); sf.setFeatureGroup(sourceDb); - sf.setDescription("Exon " + (1 + (int) (xint / 2)) - + " for protein '" + prname + "' EMBLCDS:" + prid); + sf.setDescription("Exon " + (1 + xint / 2) + " for protein '" + + prname + "' EMBLCDS:" + prid); sf.setValue(FeatureProperties.EXONPOS, new Integer(1 + xint)); sf.setValue(FeatureProperties.EXONPRODUCT, prname); - if (vals != null && vals.size() > 0) + if (vals != null) { - Enumeration kv = vals.elements(); - while (kv.hasMoreElements()) + for (Entry val : vals.entrySet()) { - Object key = kv.nextElement(); - if (key != null) - sf.setValue(key.toString(), vals.get(key)); + sf.setValue(val.getKey(), val.getValue()); } } dna.addSequenceFeature(sf); } } // add dbRefs to sequence - if (feature.dbRefs != null && feature.dbRefs.size() > 0) + if (feature.dbRefs != null) { - for (Iterator dbr = feature.dbRefs.iterator(); dbr.hasNext();) + for (DBRefEntry ref : feature.dbRefs) { - DBRefEntry ref = (DBRefEntry) dbr.next(); - ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref + ref.setSource(DBRefUtils.getCanonicalName(ref .getSource())); // Hard code the kind of protein product accessions that EMBL cite - if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT)) + if (ref.getSource().equals(DBRefSource.UNIPROT)) { ref.setMap(map); if (map != null && map.getTo() != null) @@ -708,10 +702,11 @@ public class EmblEntry if (map.getTo().getName().indexOf(prid) == 0) { map.getTo().setName( - jalview.datamodel.DBRefSource.UNIPROT + "|" + DBRefSource.UNIPROT + "|" + ref.getAccessionId()); } } + noProteinDbref = false; } if (product != null) { @@ -734,6 +729,33 @@ public class EmblEntry } dna.addDBRef(ref); } + if (noProteinDbref && product != null) + { + // add protein coding reference to dna sequence so xref matches + if (protEMBLCDS == null) + { + protEMBLCDS = new DBRefEntry(); + protEMBLCDS.setAccessionId(prid); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + protEMBLCDS.setVersion(getVersion()); + protEMBLCDS + .setMap(new Mapping(product, map.getMap().getInverse())); + } + product.addDBRef(protEMBLCDS); + + // Add converse mapping reference + if (map != null) + { + Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap() + .getInverse()); + DBRefEntry ncMap = new DBRefEntry(protEMBLCDS); + ncMap.setMap(pmap); + if (map.getTo() != null) + { + dna.addDBRef(ncMap); + } + } + } } }