X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=288d60e68121b23a569cdbb149696a711f7befea;hb=76bbf0af9eb3e35013ff1516713b356c76cf95a5;hp=7dcaa17d9e6b1509f6a29e8fedc79536d63437d2;hpb=dfa980fc8e8dd186278e5aa1fcf356964afdf61a;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 7dcaa17..288d60e 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -68,11 +68,6 @@ public class CrossRef List rseqs; /** - * mappings constructed - */ - AlignedCodonFrame cf; - - /** * Constructor * * @param seqs @@ -125,6 +120,9 @@ public class CrossRef * * @param seq * the sequence whose dbrefs we are searching against + * @param fromDna + * when true, context is DNA - so sources identifying protein + * products will be returned. * @param sources * a list of sources to add matches to */ @@ -142,18 +140,18 @@ public class CrossRef * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs */ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); - List rseqs = new ArrayList(); + List foundSeqs = new ArrayList(); /* * find sequences in the alignment which xref one of these DBRefs * i.e. is xref-ed to a common sequence identifier */ - searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null); + searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); /* * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources */ - for (SequenceI rs : rseqs) + for (SequenceI rs : foundSeqs) { DBRefEntry[] xrs = DBRefUtils .selectDbRefs(!fromDna, rs.getDBRefs()); @@ -210,7 +208,7 @@ public class CrossRef { rseqs = new ArrayList(); - cf = new AlignedCodonFrame(); + AlignedCodonFrame cf = new AlignedCodonFrame(); matcher = new SequenceIdMatcher( dataset.getSequences()); @@ -292,12 +290,14 @@ public class CrossRef if (fromDna) { // map is from dna seq to a protein product - cf.addMap(dss, rsq, xref.getMap().getMap()); + cf.addMap(dss, rsq, xref.getMap().getMap(), xref.getMap() + .getMappedFromId()); } else { // map should be from protein seq to its coding dna - cf.addMap(rsq, dss, xref.getMap().getMap().getInverse()); + cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(), + xref.getMap().getMappedFromId()); } } } @@ -333,7 +333,7 @@ public class CrossRef */ if (!sourceRefs.isEmpty()) { - retrieveCrossRef(sourceRefs, seq, xrfs, fromDna); + retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf); } } @@ -350,11 +350,12 @@ public class CrossRef } private void retrieveCrossRef(List sourceRefs, SequenceI seq, - DBRefEntry[] xrfs, boolean fromDna) + DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf) { ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); SequenceI[] retrieved = null; - SequenceI dss = null; + SequenceI dss = seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence(); try { retrieved = sftch.getSequences(sourceRefs, !fromDna); @@ -617,14 +618,14 @@ public class CrossRef void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) { - SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved); + SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); for (DBRefEntry xref : xrefs) { if (!xref.hasMap()) { String targetSeqName = xref.getSource() + "|" + xref.getAccessionId(); - SequenceI[] matches = matcher.findAllIdMatches(targetSeqName); + SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName); if (matches == null) { return; @@ -701,6 +702,20 @@ public class CrossRef return false; } xref.setMap(new Mapping(mapTo, mapping)); + + /* + * and add a reverse DbRef with the inverse mapping + */ + if (mapFrom.getDatasetSequence() != null + && mapFrom.getDatasetSequence().getSourceDBRef() != null) + { + DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() + .getSourceDBRef()); + dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping + .getInverse())); + mapTo.addDBRef(dbref); + } + if (fromDna) { AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); @@ -724,11 +739,11 @@ public class CrossRef * context was searching from Protein sequences * @param sequenceI * @param lrfs - * @param rseqs + * @param foundSeqs * @return true if matches were found. */ private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, - DBRefEntry[] lrfs, List rseqs, AlignedCodonFrame cf) + DBRefEntry[] lrfs, List foundSeqs, AlignedCodonFrame cf) { boolean found = false; if (lrfs == null) @@ -741,7 +756,7 @@ public class CrossRef // add in wildcards xref.setVersion(null); xref.setMap(null); - found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false); + found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false); } return found; } @@ -753,20 +768,29 @@ public class CrossRef * @param fromDna * true if context was searching for refs *from* dna sequence, false * if context was searching for refs *from* protein sequence - * @param sequenceI + * @param fromSeq * a sequence to ignore (start point of search) * @param xrf * a cross-reference to try to match - * @param rseqs + * @param foundSeqs * result list to add to - * @param cf + * @param mappings * a set of sequence mappings to add to * @param direct - * - search all references or only subset + * - indicates the type of relationship between returned sequences, + * xrf, and sequenceI that is required. + *
    + *
  • direct implies xrf is a primary reference for sequenceI AND + * the sequences to be located (eg a uniprot ID for a protein + * sequence, and a uniprot ref on a transcript sequence).
  • + *
  • indirect means xrf is a cross reference with respect to + * sequenceI or all the returned sequences (eg a genomic reference + * associated with a locus and one or more transcripts)
  • + *
* @return true if relationship found and sequence added. */ - boolean searchDataset(boolean fromDna, SequenceI sequenceI, - DBRefEntry xrf, List rseqs, AlignedCodonFrame cf, + boolean searchDataset(boolean fromDna, SequenceI fromSeq, + DBRefEntry xrf, List foundSeqs, AlignedCodonFrame mappings, boolean direct) { boolean found = false; @@ -789,9 +813,13 @@ public class CrossRef if (nxt.getDatasetSequence() != null) { System.err - .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!"); + .println("Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" + + nxt.getDisplayId(true) + + " has ds reference " + + nxt.getDatasetSequence().getDisplayId(true) + + ")"); } - if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence()) + if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) { continue; } @@ -811,30 +839,29 @@ public class CrossRef // look for direct or indirect references in common DBRefEntry[] poss = nxt.getDBRefs(); List cands = null; - /* - * TODO does this make any sense? - * if 'direct', search the dbrefs for xrf - * else, filter the dbrefs by type and then search for xrf - * - the result is the same isn't it? - */ - if (direct) - { - cands = DBRefUtils.searchRefs(poss, xrf); - } - else - { - poss = DBRefUtils.selectDbRefs(!fromDna, poss); - cands = DBRefUtils.searchRefs(poss, xrf); - } + + // todo: indirect specifies we select either direct references to nxt + // that match xrf which is indirect to sequenceI, or indirect + // references to nxt that match xrf which is direct to sequenceI + cands = DBRefUtils.searchRefs(poss, xrf); + // else + // { + // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss); + // cands = DBRefUtils.searchRefs(poss, xrf); + // } if (!cands.isEmpty()) { - if (!rseqs.contains(nxt)) + if (!foundSeqs.contains(nxt)) { found = true; - rseqs.add(nxt); - if (cf != null) + foundSeqs.add(nxt); + if (mappings != null && !direct) { - // don't search if we aren't given a codon map object + /* + * if the matched sequence has mapped dbrefs to + * protein product / cdna, add equivalent mappings to + * our source sequence + */ for (DBRefEntry candidate : cands) { Mapping mapping = candidate.getMap(); @@ -844,23 +871,21 @@ public class CrossRef if (mapping.getTo() != null && map.getFromRatio() != map.getToRatio()) { - // get sense of map correct for adding to product - // alignment. - if (fromDna) + /* + * add a mapping, as from dna to peptide sequence + */ + if (map.getFromRatio() == 3) { - // map is from dna seq to a protein product - cf.addMap(sequenceI, nxt, map); + mappings.addMap(nxt, fromSeq, map); } else { - // map should be from protein seq to its coding dna - cf.addMap(nxt, sequenceI, map.getInverse()); + mappings.addMap(nxt, fromSeq, map.getInverse()); } } } } } - // TODO: add mapping between sequences if necessary } } }