From bac4802486edabc43143c926c0615f8fb577b1b5 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Sat, 1 Oct 2016 13:21:29 +0100 Subject: [PATCH] JAL-2210 factored out code for processing cross-referenced sequences retrieved via a dbref on source sequence --- src/jalview/analysis/CrossRef.java | 205 +++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 97 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 05814c2..6779b87 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -428,8 +428,16 @@ public class CrossRef // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL-> SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence : retrievedSequence.getDatasetSequence(); - DBRefEntry[] dbr = retrievedSequence.getDBRefs(); - if (dbr != null) + importCrossRefSeq(cf, dss, retrievedDss); + rseqs.add(retrievedDss); + if (dataset.findIndex(retrievedDss) == -1) + { + dataset.addSequence(retrievedDss); + matcher.add(retrievedDss); + } + } + } + } /** * Search dataset for sequences with a primary reference contained in @@ -467,122 +475,125 @@ public class CrossRef } } + /** + * process sequence retrieved via a dbref on source sequence to resolve and + * transfer data + * + * @param cf + * @param sourceSequence + * @param retrievedSequence + */ + private void importCrossRefSeq(AlignedCodonFrame cf, + SequenceI sourceSequence, SequenceI retrievedSequence) + { + DBRefEntry[] dbr = retrievedSequence.getDBRefs(); + if (dbr != null) + { + for (DBRefEntry dbref : dbr) + { + // find any entry where we should put in the sequence being + // cross-referenced into the map + Mapping map = dbref.getMap(); + if (map != null) { - for (DBRefEntry dbref : dbr) + if (map.getTo() != null && map.getMap() != null) { - // find any entry where we should put in the sequence being - // cross-referenced into the map - Mapping map = dbref.getMap(); - if (map != null) + // TODO findInDataset requires exact sequence match but + // 'congruent' test is only for the mapped part + // maybe not a problem in practice since only ENA provide a + // mapping and it is to the full protein translation of CDS + SequenceI matched = findInDataset(dbref); + // matcher.findIdMatch(map.getTo()); + if (matched != null) { - if (map.getTo() != null && map.getMap() != null) + /* + * already got an xref to this sequence; update this + * map to point to the same sequence, and add + * any new dbrefs to it + */ + DBRefEntry[] toRefs = map.getTo().getDBRefs(); + if (toRefs != null) { - // TODO findInDataset requires exact sequence match but - // 'congruent' test is only for the mapped part - // maybe not a problem in practice since only ENA provide a - // mapping and it is to the full protein translation of CDS - SequenceI matched = findInDataset(dbref); - // matcher.findIdMatch(map.getTo()); - if (matched != null) + for (DBRefEntry ref : toRefs) { - /* - * already got an xref to this sequence; update this - * map to point to the same sequence, and add - * any new dbrefs to it - */ - DBRefEntry[] toRefs = map.getTo().getDBRefs(); - if (toRefs != null) - { - for (DBRefEntry ref : toRefs) - { - matched.addDBRef(ref); // add or update mapping - } - } - map.setTo(matched); + matched.addDBRef(ref); // add or update mapping } - else - { - if (dataset.findIndex(map.getTo()) == -1) - { - dataset.addSequence(map.getTo()); - matcher.add(map.getTo()); - } - } - try + } + map.setTo(matched); + } + else + { + if (dataset.findIndex(map.getTo()) == -1) + { + dataset.addSequence(map.getTo()); + matcher.add(map.getTo()); + } + } + + try + { + // compare ms with dss and replace with dss in mapping + // if map is congruent + SequenceI ms = map.getTo(); + int sf = map.getMap().getToLowest(); + int st = map.getMap().getToHighest(); + SequenceI mappedrg = ms.getSubSequence(sf, st); + if (mappedrg.getLength() > 0 + && ms.getSequenceAsString().equals( + sourceSequence.getSequenceAsString())) + { + String msg = "Mapping updated from " + ms.getName() + + " to retrieved crossreference " + + sourceSequence.getName(); + System.out.println(msg); + map.setTo(sourceSequence); + + /* + * give the reverse reference the inverse mapping + * (if it doesn't have one already) + */ + setReverseMapping(sourceSequence, dbref, cf); + + /* + * copy sequence features as well, avoiding + * duplication (e.g. same variation from two + * transcripts) + */ + SequenceFeature[] sfs = ms.getSequenceFeatures(); + if (sfs != null) { - // compare ms with dss and replace with dss in mapping - // if map is congruent - SequenceI ms = map.getTo(); - int sf = map.getMap().getToLowest(); - int st = map.getMap().getToHighest(); - SequenceI mappedrg = ms.getSubSequence(sf, st); - // SequenceI loc = dss.getSubSequence(sf, st); - if (mappedrg.getLength() > 0 - && ms.getSequenceAsString().equals( - dss.getSequenceAsString())) - // && mappedrg.getSequenceAsString().equals( - // loc.getSequenceAsString())) + for (SequenceFeature feat : sfs) { - String msg = "Mapping updated from " + ms.getName() - + " to retrieved crossreference " - + dss.getName(); - System.out.println(msg); - map.setTo(dss); - - /* - * give the reverse reference the inverse mapping - * (if it doesn't have one already) - */ - setReverseMapping(dss, dbref, cf); - /* - * copy sequence features as well, avoiding - * duplication (e.g. same variation from two - * transcripts) + * make a flyweight feature object which ignores Parent + * attribute in equality test; this avoids creating many + * otherwise duplicate exon features on genomic sequence */ - SequenceFeature[] sfs = ms.getSequenceFeatures(); - if (sfs != null) + SequenceFeature newFeature = new SequenceFeature( + feat) { - for (SequenceFeature feat : sfs) + @Override + public boolean equals(Object o) { - /* - * make a flyweight feature object which ignores Parent - * attribute in equality test; this avoids creating many - * otherwise duplicate exon features on genomic sequence - */ - SequenceFeature newFeature = new SequenceFeature( - feat) - { - @Override - public boolean equals(Object o) - { - return super.equals(o, true); - } - }; - dss.addSequenceFeature(newFeature); + return super.equals(o, true); } - } + }; + sourceSequence.addSequenceFeature(newFeature); } - cf.addMap(retrievedDss, map.getTo(), map.getMap()); - } catch (Exception e) - { - System.err - .println("Exception when consolidating Mapped sequence set..."); - e.printStackTrace(System.err); } } + cf.addMap(retrievedSequence, map.getTo(), map.getMap()); + } catch (Exception e) + { + System.err + .println("Exception when consolidating Mapped sequence set..."); + e.printStackTrace(System.err); } } } - retrievedSequence.updatePDBIds(); - rseqs.add(retrievedDss); - if (dataset.findIndex(retrievedDss) == -1) - { - dataset.addSequence(retrievedDss); - matcher.add(retrievedDss); - } } } + retrievedSequence.updatePDBIds(); } /** * Sets the inverse sequence mapping in the corresponding dbref of the mapped -- 1.7.10.2