X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=f355d1f09c6d139e8c82344e91e46c97de635cd7;hb=30bc1314d4206cafea412dcd9decdea7e318481f;hp=f475ecb4cc6c9fa5b96446ab44e562a42d2eb9b0;hpb=6906bad46d9a65df68c979fc2afe7a1c73cbcec5;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index f475ecb..f355d1f 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -11,8 +11,8 @@ import jalview.datamodel.DBRefSource; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import jalview.ws.ASequenceFetcher; import jalview.ws.SequenceFetcher; +import jalview.ws.seqfetcher.ASequenceFetcher; /** * Functions for cross-referencing sequence databases. user must first specify @@ -39,27 +39,29 @@ public class CrossRef else { rfs = jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DNACODINGDBS); // could attempt to find other cross refs and return here - ie PDB xrefs (not dna, not protein seq) + DBRefSource.DNACODINGDBS); // could attempt to find other cross + // refs and return here - ie PDB xrefs + // (not dna, not protein seq) } return rfs; } - public static Hashtable classifyDbRefs(DBRefEntry[] rfs) { Hashtable classes = new Hashtable(); - classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS)); - classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DNACODINGDBS)); - classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DOMAINDBS)); + classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs( + rfs, DBRefSource.PROTEINDBS)); + classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils + .selectRefs(rfs, DBRefSource.DNACODINGDBS)); + classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs( + rfs, DBRefSource.DOMAINDBS)); // classes.put(OTHER, ) return classes; } /** * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ @@ -67,22 +69,25 @@ public class CrossRef { return findSequenceXrefTypes(dna, seqs, null); } + /** - * Indirect references are references from other sequences from the dataset to any of the direct - * DBRefEntrys on the given sequences. + * Indirect references are references from other sequences from the dataset to + * any of the direct DBRefEntrys on the given sequences. + * * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ - public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs, AlignmentI dataset) + public static String[] findSequenceXrefTypes(boolean dna, + SequenceI[] seqs, AlignmentI dataset) { String[] dbrefs = null; Vector refs = new Vector(); for (int s = 0; s < seqs.length; s++) { SequenceI dss = seqs[s]; - while (dss.getDatasetSequence()!=null) + while (dss.getDatasetSequence() != null) { dss = dss.getDatasetSequence(); } @@ -94,18 +99,19 @@ public class CrossRef refs.addElement(rfs[r].getSource()); } } - if (dataset!=null) + if (dataset != null) { // search for references to this sequence's direct references. DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); Vector rseqs = new Vector(); - CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, null); // don't need to specify codon frame for mapping here + CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, + null); // don't need to specify codon frame for mapping here Enumeration lr = rseqs.elements(); while (lr.hasMoreElements()) { SequenceI rs = (SequenceI) lr.nextElement(); DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); - for (int r=0; rfs != null && r < rfs.length; r++) + for (int r = 0; rfs != null && r < rfs.length; r++) { if (!refs.contains(rfs[r].getSource())) { @@ -188,7 +194,7 @@ public class CrossRef * @param dna * @param source * @param dataset - * alignment to search for product sequences. + * alignment to search for product sequences. * @return products (as dataset sequences) */ public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna, @@ -196,24 +202,32 @@ public class CrossRef { Vector rseqs = new Vector(); Alignment ral = null; - AlignedCodonFrame cf=new AlignedCodonFrame(0); // nominal width + AlignedCodonFrame cf = new AlignedCodonFrame(0); // nominal width for (int s = 0; s < seqs.length; s++) { SequenceI dss = seqs[s]; - while (dss.getDatasetSequence()!=null) + while (dss.getDatasetSequence() != null) { dss = dss.getDatasetSequence(); } boolean found = false; DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRef()); - if ((xrfs == null || xrfs.length == 0) && dataset!=null) + if ((xrfs == null || xrfs.length == 0) && dataset != null) { System.out.println("Attempting to find ds Xrefs refs."); - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less ambiguous would be a 'find primary dbRefEntry' method. + DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less + // ambiguous + // would + // be a + // 'find + // primary + // dbRefEntry' + // method. // filter for desired source xref here - found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, rseqs, cf); + found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, + rseqs, cf); } - for (int r = 0; xrfs!=null && r < xrfs.length; r++) + for (int r = 0; xrfs != null && r < xrfs.length; r++) { if (source != null && !source.equals(xrfs[r].getSource())) continue; @@ -223,14 +237,17 @@ public class CrossRef { Sequence rsq = new Sequence(xrfs[r].getMap().getTo()); rseqs.addElement(rsq); - if (xrfs[r].getMap().getMap().getFromRatio()!=xrfs[r].getMap().getMap().getToRatio()) + if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r] + .getMap().getMap().getToRatio()) { // get sense of map correct for adding to product alignment. if (dna) { // map is from dna seq to a protein product cf.addMap(dss, rsq, xrfs[r].getMap().getMap()); - } else { + } + else + { // map should be from protein seq to its coding dna cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse()); } @@ -238,13 +255,13 @@ public class CrossRef found = true; } } - else + if (!found) { // do a bit more work - search for sequences with references matching // xrefs on this sequence. if (dataset != null) { - found = searchDataset(dss, xrfs[r], dataset, rseqs, cf); + found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); if (found) xrfs[r] = null; // we've recovered seqs for this one. } @@ -265,8 +282,10 @@ public class CrossRef for (int r = 0; r < xrfs.length; r++) { // filter out any irrelevant or irretrievable references - if (xrfs[r]==null || ((source != null && !source.equals(xrfs[r].getSource())) - || !sftch.isFetchable(xrfs[r].getSource()))) + if (xrfs[r] == null + || ((source != null && !source.equals(xrfs[r] + .getSource())) || !sftch.isFetchable(xrfs[r] + .getSource()))) { l--; xrfs[r] = null; @@ -275,7 +294,7 @@ public class CrossRef if (l > 0) { System.out - .println("Attempting to retrieve cross referenced sequences."); + .println("Attempting to retrieve cross referenced sequences."); DBRefEntry[] t = new DBRefEntry[l]; l = 0; for (int r = 0; r < xrfs.length; r++) @@ -286,18 +305,58 @@ public class CrossRef xrfs = t; try { - retrieved = sftch.getSequences(xrfs); + retrieved = sftch.getSequences(xrfs); // problem here is we don't know which of xrfs resulted in which retrieved element } catch (Exception e) { System.err - .println("Problem whilst retrieving cross references for Sequence : " - + seqs[s].getName()); + .println("Problem whilst retrieving cross references for Sequence : " + + seqs[s].getName()); e.printStackTrace(); } if (retrieved != null) { for (int rs = 0; rs < retrieved.length; rs++) { + // TODO: examine each sequence for 'redundancy' + jalview.datamodel.DBRefEntry[] dbr = retrieved[rs].getDBRef(); + if (dbr != null && dbr.length > 0) + { + for (int di = 0; di < dbr.length; di++) + { + // find any entry where we should put in the sequence being cross-referenced into the map + jalview.datamodel.Mapping map = dbr[di].getMap(); + if (map != null) + { + if (map.getTo() != null && map.getMap() != null) + { + // should search the local dataset to find any existing candidates for To ! + try + { + // compare ms with dss and replace with dss in mapping if map is congruent + SequenceI ms = map.getTo(); + int sf = map.getMap().getToLowest(); + int st = map.getMap().getToHighest(); + SequenceI mappedrg = ms.getSubSequence(sf, st); + SequenceI loc = dss.getSubSequence(sf, st); + if (mappedrg.getLength()>0 && mappedrg.getSequenceAsString().equals( + loc.getSequenceAsString())) + { + System.err + .println("Mapping updated for retrieved crossreference"); + // method to update all refs of existing To on retrieved sequence with dss and merge any props on To onto dss. + map.setTo(dss); + } + } catch (Exception e) + { + System.err + .println("Exception when consolidating Mapped sequence set..."); + e.printStackTrace(System.err); + } + } + } + } + } + retrieved[rs].updatePDBIds(); rseqs.addElement(retrieved[rs]); } } @@ -310,7 +369,7 @@ public class CrossRef SequenceI[] rsqs = new SequenceI[rseqs.size()]; rseqs.copyInto(rsqs); ral = new Alignment(rsqs); - if (cf!=null && cf.getProtMappings()!=null) + if (cf != null && cf.getProtMappings() != null) { ral.addCodonFrame(cf); } @@ -319,20 +378,24 @@ public class CrossRef } /** - * find references to lrfs in the cross-reference set of each sequence in dataset (that is not equal to sequenceI) - * Identifies matching DBRefEntry based on source and accession string only - Map and Version are nulled. + * find references to lrfs in the cross-reference set of each sequence in + * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry + * based on source and accession string only - Map and Version are nulled. + * * @param sequenceI * @param lrfs * @param dataset * @param rseqs * @return true if matches were found. */ - private static boolean searchDatasetXrefs(SequenceI sequenceI, boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf) + private static boolean searchDatasetXrefs(SequenceI sequenceI, + boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, + AlignedCodonFrame cf) { - boolean found=false; - if (lrfs==null) + boolean found = false; + if (lrfs == null) return false; - for (int i=0;i