X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=1c14240e49f3b49414cec522c5f2d41ee372d53e;hb=153dd62dc91da13ae732600e6ea55ddbe15eab39;hp=d2f03589bedd499eee9f96356e095dbe1d7faa36;hpb=006226fa282af3eed4c9770a20073b0f4fa990d4;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index d2f0358..1c14240 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -1,3 +1,20 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6) + * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + */ package jalview.analysis; import java.util.Enumeration; @@ -11,8 +28,8 @@ import jalview.datamodel.DBRefSource; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import jalview.ws.ASequenceFetcher; import jalview.ws.SequenceFetcher; +import jalview.ws.seqfetcher.ASequenceFetcher; /** * Functions for cross-referencing sequence databases. user must first specify @@ -40,8 +57,8 @@ public class CrossRef { rfs = jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.DNACODINGDBS); // could attempt to find other cross - // refs and return here - ie PDB xrefs - // (not dna, not protein seq) + // refs and return here - ie PDB xrefs + // (not dna, not protein seq) } return rfs; } @@ -61,7 +78,7 @@ public class CrossRef /** * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ @@ -75,7 +92,7 @@ public class CrossRef * any of the direct DBRefEntrys on the given sequences. * * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ @@ -86,36 +103,41 @@ public class CrossRef Vector refs = new Vector(); for (int s = 0; s < seqs.length; s++) { - SequenceI dss = seqs[s]; - while (dss.getDatasetSequence() != null) - { - dss = dss.getDatasetSequence(); - } - DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef()); - for (int r = 0; rfs != null && r < rfs.length; r++) + if (seqs[s] != null) { - if (!refs.contains(rfs[r].getSource())) + + SequenceI dss = seqs[s]; + while (dss.getDatasetSequence() != null) { - refs.addElement(rfs[r].getSource()); + dss = dss.getDatasetSequence(); } - } - if (dataset != null) - { - // search for references to this sequence's direct references. - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); - Vector rseqs = new Vector(); - CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, - null); // don't need to specify codon frame for mapping here - Enumeration lr = rseqs.elements(); - while (lr.hasMoreElements()) + DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef()); + for (int r = 0; rfs != null && r < rfs.length; r++) { - SequenceI rs = (SequenceI) lr.nextElement(); - DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); - for (int r = 0; rfs != null && r < rfs.length; r++) + if (!refs.contains(rfs[r].getSource())) { - if (!refs.contains(rfs[r].getSource())) + refs.addElement(rfs[r].getSource()); + } + } + if (dataset != null) + { + // search for references to this sequence's direct references. + DBRefEntry[] lrfs = CrossRef + .findXDbRefs(!dna, seqs[s].getDBRef()); + Vector rseqs = new Vector(); + CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, + null); // don't need to specify codon frame for mapping here + Enumeration lr = rseqs.elements(); + while (lr.hasMoreElements()) + { + SequenceI rs = (SequenceI) lr.nextElement(); + DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); + for (int r = 0; rfs != null && r < rfs.length; r++) { - refs.addElement(rfs[r].getSource()); + if (!refs.contains(rfs[r].getSource())) + { + refs.addElement(rfs[r].getSource()); + } } } } @@ -158,7 +180,9 @@ public class CrossRef { if (cdna[c].getSource().equals(DBRefSource.EMBLCDS)) { - // retrieve CDS dataset sequences + System.err + .println("TODO: unimplemented sequence retrieval for coding region sequence."); + // TODO: retrieve CDS dataset sequences // need global dataset sequence retriever/resolver to reuse refs // and construct Mapping entry. // insert gaps in CDS according to peptide gaps. @@ -194,7 +218,7 @@ public class CrossRef * @param dna * @param source * @param dataset - * alignment to search for product sequences. + * alignment to search for product sequences. * @return products (as dataset sequences) */ public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna, @@ -216,13 +240,13 @@ public class CrossRef { System.out.println("Attempting to find ds Xrefs refs."); DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less - // ambiguous - // would - // be a - // 'find - // primary - // dbRefEntry' - // method. + // ambiguous + // would + // be a + // 'find + // primary + // dbRefEntry' + // method. // filter for desired source xref here found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, rseqs, cf); @@ -261,7 +285,7 @@ public class CrossRef // xrefs on this sequence. if (dataset != null) { - found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); + found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); // ,false,!dna); if (found) xrfs[r] = null; // we've recovered seqs for this one. } @@ -305,7 +329,10 @@ public class CrossRef xrfs = t; try { - retrieved = sftch.getSequences(xrfs); + retrieved = sftch.getSequences(xrfs); // problem here is we don't + // know which of xrfs + // resulted in which + // retrieved element } catch (Exception e) { System.err @@ -317,6 +344,53 @@ public class CrossRef { for (int rs = 0; rs < retrieved.length; rs++) { + // TODO: examine each sequence for 'redundancy' + jalview.datamodel.DBRefEntry[] dbr = retrieved[rs] + .getDBRef(); + if (dbr != null && dbr.length > 0) + { + for (int di = 0; di < dbr.length; di++) + { + // find any entry where we should put in the sequence being + // cross-referenced into the map + jalview.datamodel.Mapping map = dbr[di].getMap(); + if (map != null) + { + if (map.getTo() != null && map.getMap() != null) + { + // should search the local dataset to find any existing + // candidates for To ! + try + { + // compare ms with dss and replace with dss in mapping + // if map is congruent + SequenceI ms = map.getTo(); + int sf = map.getMap().getToLowest(); + int st = map.getMap().getToHighest(); + SequenceI mappedrg = ms.getSubSequence(sf, st); + SequenceI loc = dss.getSubSequence(sf, st); + if (mappedrg.getLength() > 0 + && mappedrg.getSequenceAsString().equals( + loc.getSequenceAsString())) + { + System.err + .println("Mapping updated for retrieved crossreference"); + // method to update all refs of existing To on + // retrieved sequence with dss and merge any props + // on To onto dss. + map.setTo(dss); + } + } catch (Exception e) + { + System.err + .println("Exception when consolidating Mapped sequence set..."); + e.printStackTrace(System.err); + } + } + } + } + } + retrieved[rs].updatePDBIds(); rseqs.addElement(retrieved[rs]); } } @@ -374,7 +448,7 @@ public class CrossRef * @param xrf * @param dataset * @param rseqs - * set of unique sequences + * set of unique sequences * @param cf * @return true if one or more unique sequences were found and added */ @@ -393,10 +467,10 @@ public class CrossRef * @param xrf * @param dataset * @param rseqs - * @param direct - - * search all references or only subset + * @param direct + * - search all references or only subset * @param dna - * search dna or protein xrefs (if direct=false) + * search dna or protein xrefs (if direct=false) * @return true if relationship found and sequence added. */ public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, @@ -404,6 +478,7 @@ public class CrossRef boolean direct, boolean dna) { boolean found = false; + SequenceI[] typer = new SequenceI[1]; if (dataset == null) return false; if (dataset.getSequences() == null) @@ -424,17 +499,27 @@ public class CrossRef } if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence()) { + // check if this is the correct sequence type + { + typer[0] = nxt; + boolean isDna = jalview.util.Comparison.isNucleotide(typer); + if ((direct && isDna == dna) || (!direct && isDna != dna)) + { + // skip this sequence because it is same molecule type + continue; + } + } + // look for direct or indirect references in common - DBRefEntry[] poss = null, cands = null; + DBRefEntry[] poss = nxt.getDBRef(), cands = null; if (direct) { - cands = jalview.util.DBRefUtils.searchRefs(poss = nxt - .getDBRef(), xrf); + cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); } else { - cands = jalview.util.DBRefUtils.searchRefs(poss = CrossRef - .findXDbRefs(dna, nxt.getDBRef()), xrf); + poss = CrossRef.findXDbRefs(dna, poss); // + cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); } if (cands != null) { @@ -442,7 +527,7 @@ public class CrossRef { rseqs.addElement(nxt); boolean foundmap = cf != null; // don't search if we aren't given - // a codon map object + // a codon map object for (int r = 0; foundmap && r < cands.length; r++) { if (cands[r].hasMap()) @@ -485,8 +570,8 @@ public class CrossRef * @param dna * @param seqs * @param dataset - * @param fake - - * don't actually build lists - just get types + * @param fake + * - don't actually build lists - just get types * @return public static Object[] buildXProductsList(boolean dna, SequenceI[] * seqs, AlignmentI dataset, boolean fake) { String types[] = * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs, @@ -495,18 +580,19 @@ public class CrossRef * System.out.println("Type: " + types[t]); SequenceI[] prod = * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]); * System.out.println("Found " + ((prod == null) ? "no" : "" + - * prod.length) + " products"); if (prod!=null) { for (int p=0; p