X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=5409d5b177066c80db96805504fd76308f129c5e;hb=0ac97c219bf88278f77306a5695e8bd9d9ca9179;hp=50a262fd93ef67541be7d9890b7aeeb74a261062;hpb=884952b9d0bf7e07ff8c8f70ed1601bfe20ac554;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 50a262f..5409d5b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -21,6 +21,7 @@ package jalview.datamodel.xdb.embl; import jalview.analysis.SequenceIdMatcher; +import jalview.bin.Cache; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.FeatureProperties; @@ -34,6 +35,7 @@ import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; +import java.text.ParseException; import java.util.Arrays; import java.util.Hashtable; import java.util.List; @@ -62,21 +64,25 @@ public class EmblEntry String sequenceVersion; + String dataClass; + String moleculeType; String topology; String sequenceLength; - String taxDivision; + String taxonomicDivision; + + String description; - String desc; + String firstPublicDate; - String rCreated; + String firstPublicRelease; - String rLastUpdated; + String lastUpdatedDate; - String lastUpdated; + String lastUpdatedRelease; Vector keywords; @@ -121,23 +127,6 @@ public class EmblEntry } /** - * @return the desc - */ - public String getDesc() - { - return desc; - } - - /** - * @param desc - * the desc to set - */ - public void setDesc(String desc) - { - this.desc = desc; - } - - /** * @return the features */ public Vector getFeatures() @@ -172,57 +161,6 @@ public class EmblEntry } /** - * @return the lastUpdated - */ - public String getLastUpdated() - { - return lastUpdated; - } - - /** - * @param lastUpdated - * the lastUpdated to set - */ - public void setLastUpdated(String lastUpdated) - { - this.lastUpdated = lastUpdated; - } - - /** - * @return the releaseCreated - */ - public String getRCreated() - { - return rCreated; - } - - /** - * @param releaseCreated - * the releaseCreated to set - */ - public void setRCreated(String releaseCreated) - { - this.rCreated = releaseCreated; - } - - /** - * @return the releaseLastUpdated - */ - public String getRLastUpdated() - { - return rLastUpdated; - } - - /** - * @param releaseLastUpdated - * the releaseLastUpdated to set - */ - public void setRLastUpdated(String releaseLastUpdated) - { - this.rLastUpdated = releaseLastUpdated; - } - - /** * @return the sequence */ public EmblSequence getSequence() @@ -240,40 +178,6 @@ public class EmblEntry } /** - * @return the taxDivision - */ - public String getTaxDivision() - { - return taxDivision; - } - - /** - * @param taxDivision - * the taxDivision to set - */ - public void setTaxDivision(String taxDivision) - { - this.taxDivision = taxDivision; - } - - /** - * @return the entry version - */ - public String getEntryVersion() - { - return entryVersion; - } - - /** - * @param version - * the version to set - */ - public void setEntryVersion(String version) - { - this.entryVersion = version; - } - - /** * Recover annotated sequences from EMBL file * * @param sourceDb @@ -285,7 +189,7 @@ public class EmblEntry { SequenceI dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence()); - dna.setDescription(desc); + dna.setDescription(description); DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(), accession); dna.addDBRef(retrievedref); @@ -293,29 +197,28 @@ public class EmblEntry // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1, 1)); - // TODO: transform EMBL Database refs to canonical form + + + /* + * transform EMBL Database refs to canonical form + */ if (dbRefs != null) { for (DBRefEntry dbref : dbRefs) { + dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource())); dna.addDBRef(dbref); } } + SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); try { for (EmblFeature feature : features) { - if (feature.dbRefs != null) - { - for (DBRefEntry dbref : feature.dbRefs) - { - dna.addDBRef(dbref); - } - } if (FeatureProperties.isCodingFeature(sourceDb, feature.getName())) { - parseCodingFeature(feature, sourceDb, dna, peptides); + parseCodingFeature(feature, sourceDb, dna, peptides, matcher); } } } catch (Exception e) @@ -343,9 +246,11 @@ public class EmblEntry * parent dna sequence for this record * @param peptides * list of protein product sequences for Embl entry + * @param matcher + * helper to match xrefs in already retrieved sequences */ void parseCodingFeature(EmblFeature feature, String sourceDb, - SequenceI dna, List peptides) + SequenceI dna, List peptides, SequenceIdMatcher matcher) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); @@ -355,7 +260,6 @@ public class EmblEntry String prname = ""; String prid = null; Map vals = new Hashtable(); - SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); /* * codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS @@ -379,13 +283,13 @@ public class EmblEntry } else if (qname.equals("protein_id")) { - prid = q.getValues()[0]; + prid = q.getValues()[0].trim(); } else if (qname.equals("codon_start")) { try { - codonStart = Integer.parseInt(q.getValues()[0]); + codonStart = Integer.parseInt(q.getValues()[0].trim()); } catch (NumberFormatException e) { System.err.println("Invalid codon_start in XML for " @@ -395,7 +299,7 @@ public class EmblEntry else if (qname.equals("product")) { // sometimes name is returned e.g. for V00488 - prname = q.getValues()[0]; + prname = q.getValues()[0].trim(); } else { @@ -524,6 +428,9 @@ public class EmblEntry boolean mappingUsed = false; for (DBRefEntry ref : feature.dbRefs) { + /* + * ensure UniProtKB/Swiss-Prot converted to UNIPROT + */ ref.setSource(DBRefUtils.getCanonicalName(ref.getSource())); if (ref.getSource().equals(DBRefSource.UNIPROT)) { @@ -662,7 +569,7 @@ public class EmblEntry } /** - * Returns the CDS positions as a list of [start, end, start, end...] + * Returns the CDS positions as a single array of [start, end, start, end...] * positions. If on the reverse strand, these will be in descending order. * * @param feature @@ -674,8 +581,18 @@ public class EmblEntry { return new int[] {}; } - List ranges = DnaUtils.parseLocation(feature.location); - return ranges == null ? new int[] {} : listToArray(ranges); + + try + { + List ranges = DnaUtils.parseLocation(feature.location); + return listToArray(ranges); + } catch (ParseException e) + { + Cache.log.warn(String.format( + "Not parsing inexact CDS location %s in ENA %s", + feature.location, this.accession)); + return new int[] {}; + } } /** @@ -780,6 +697,26 @@ public class EmblEntry this.sequenceVersion = sequenceVersion; } + public String getSequenceLength() + { + return sequenceLength; + } + + public void setSequenceLength(String sequenceLength) + { + this.sequenceLength = sequenceLength; + } + + public String getEntryVersion() + { + return entryVersion; + } + + public void setEntryVersion(String entryVersion) + { + this.entryVersion = entryVersion; + } + public String getMoleculeType() { return moleculeType; @@ -800,33 +737,73 @@ public class EmblEntry this.topology = topology; } - public String getSequenceLength() + public String getTaxonomicDivision() { - return sequenceLength; + return taxonomicDivision; } - public void setSequenceLength(String sequenceLength) + public void setTaxonomicDivision(String taxonomicDivision) { - this.sequenceLength = sequenceLength; + this.taxonomicDivision = taxonomicDivision; + } + + public String getDescription() + { + return description; + } + + public void setDescription(String description) + { + this.description = description; + } + + public String getFirstPublicDate() + { + return firstPublicDate; + } + + public void setFirstPublicDate(String firstPublicDate) + { + this.firstPublicDate = firstPublicDate; + } + + public String getFirstPublicRelease() + { + return firstPublicRelease; + } + + public void setFirstPublicRelease(String firstPublicRelease) + { + this.firstPublicRelease = firstPublicRelease; + } + + public String getLastUpdatedDate() + { + return lastUpdatedDate; + } + + public void setLastUpdatedDate(String lastUpdatedDate) + { + this.lastUpdatedDate = lastUpdatedDate; } - public String getrCreated() + public String getLastUpdatedRelease() { - return rCreated; + return lastUpdatedRelease; } - public void setrCreated(String rCreated) + public void setLastUpdatedRelease(String lastUpdatedRelease) { - this.rCreated = rCreated; + this.lastUpdatedRelease = lastUpdatedRelease; } - public String getrLastUpdated() + public String getDataClass() { - return rLastUpdated; + return dataClass; } - public void setrLastUpdated(String rLastUpdated) + public void setDataClass(String dataClass) { - this.rLastUpdated = rLastUpdated; + this.dataClass = dataClass; } }