X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=5409d5b177066c80db96805504fd76308f129c5e;hb=0ac97c219bf88278f77306a5695e8bd9d9ca9179;hp=b7509735547418c703c2ac950cfbd48c18d167a3;hpb=627bafea52e4d5702bed00725afca1e0d17f8f60;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index b750973..5409d5b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -197,7 +197,11 @@ public class EmblEntry // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1, 1)); - // TODO: transform EMBL Database refs to canonical form + + + /* + * transform EMBL Database refs to canonical form + */ if (dbRefs != null) { for (DBRefEntry dbref : dbRefs) @@ -207,24 +211,14 @@ public class EmblEntry } } + SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); try { for (EmblFeature feature : features) { - if (feature.dbRefs != null) - { - for (DBRefEntry dbref : feature.dbRefs) - { - /* - * convert UniProtKB/Swiss-Prot to UNIPROT - */ - dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource())); - dna.addDBRef(dbref); - } - } if (FeatureProperties.isCodingFeature(sourceDb, feature.getName())) { - parseCodingFeature(feature, sourceDb, dna, peptides); + parseCodingFeature(feature, sourceDb, dna, peptides, matcher); } } } catch (Exception e) @@ -252,9 +246,11 @@ public class EmblEntry * parent dna sequence for this record * @param peptides * list of protein product sequences for Embl entry + * @param matcher + * helper to match xrefs in already retrieved sequences */ void parseCodingFeature(EmblFeature feature, String sourceDb, - SequenceI dna, List peptides) + SequenceI dna, List peptides, SequenceIdMatcher matcher) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); @@ -264,7 +260,6 @@ public class EmblEntry String prname = ""; String prid = null; Map vals = new Hashtable(); - SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); /* * codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS @@ -288,13 +283,13 @@ public class EmblEntry } else if (qname.equals("protein_id")) { - prid = q.getValues()[0]; + prid = q.getValues()[0].trim(); } else if (qname.equals("codon_start")) { try { - codonStart = Integer.parseInt(q.getValues()[0]); + codonStart = Integer.parseInt(q.getValues()[0].trim()); } catch (NumberFormatException e) { System.err.println("Invalid codon_start in XML for " @@ -304,7 +299,7 @@ public class EmblEntry else if (qname.equals("product")) { // sometimes name is returned e.g. for V00488 - prname = q.getValues()[0]; + prname = q.getValues()[0].trim(); } else { @@ -426,13 +421,17 @@ public class EmblEntry } /* - * add mappings for Uniprot xrefs + * add dbRefs to sequence, and mappings for Uniprot xrefs */ if (feature.dbRefs != null) { boolean mappingUsed = false; for (DBRefEntry ref : feature.dbRefs) { + /* + * ensure UniProtKB/Swiss-Prot converted to UNIPROT + */ + ref.setSource(DBRefUtils.getCanonicalName(ref.getSource())); if (ref.getSource().equals(DBRefSource.UNIPROT)) { String proteinSeqName = DBRefSource.UNIPROT + "|" @@ -488,6 +487,7 @@ public class EmblEntry } } } + dna.addDBRef(ref); } if (noProteinDbref && product != null) {