X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=1c14240e49f3b49414cec522c5f2d41ee372d53e;hb=153dd62dc91da13ae732600e6ea55ddbe15eab39;hp=d2f03589bedd499eee9f96356e095dbe1d7faa36;hpb=006226fa282af3eed4c9770a20073b0f4fa990d4;p=jalview.git
diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java
index d2f0358..1c14240 100644
--- a/src/jalview/analysis/CrossRef.java
+++ b/src/jalview/analysis/CrossRef.java
@@ -1,3 +1,20 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)
+ * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
+ */
package jalview.analysis;
import java.util.Enumeration;
@@ -11,8 +28,8 @@ import jalview.datamodel.DBRefSource;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
-import jalview.ws.ASequenceFetcher;
import jalview.ws.SequenceFetcher;
+import jalview.ws.seqfetcher.ASequenceFetcher;
/**
* Functions for cross-referencing sequence databases. user must first specify
@@ -40,8 +57,8 @@ public class CrossRef
{
rfs = jalview.util.DBRefUtils.selectRefs(rfs,
DBRefSource.DNACODINGDBS); // could attempt to find other cross
- // refs and return here - ie PDB xrefs
- // (not dna, not protein seq)
+ // refs and return here - ie PDB xrefs
+ // (not dna, not protein seq)
}
return rfs;
}
@@ -61,7 +78,7 @@ public class CrossRef
/**
* @param dna
- * true if seqs are DNA seqs
+ * true if seqs are DNA seqs
* @param seqs
* @return a list of sequence database cross reference source types
*/
@@ -75,7 +92,7 @@ public class CrossRef
* any of the direct DBRefEntrys on the given sequences.
*
* @param dna
- * true if seqs are DNA seqs
+ * true if seqs are DNA seqs
* @param seqs
* @return a list of sequence database cross reference source types
*/
@@ -86,36 +103,41 @@ public class CrossRef
Vector refs = new Vector();
for (int s = 0; s < seqs.length; s++)
{
- SequenceI dss = seqs[s];
- while (dss.getDatasetSequence() != null)
- {
- dss = dss.getDatasetSequence();
- }
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
- for (int r = 0; rfs != null && r < rfs.length; r++)
+ if (seqs[s] != null)
{
- if (!refs.contains(rfs[r].getSource()))
+
+ SequenceI dss = seqs[s];
+ while (dss.getDatasetSequence() != null)
{
- refs.addElement(rfs[r].getSource());
+ dss = dss.getDatasetSequence();
}
- }
- if (dataset != null)
- {
- // search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef());
- Vector rseqs = new Vector();
- CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
- null); // don't need to specify codon frame for mapping here
- Enumeration lr = rseqs.elements();
- while (lr.hasMoreElements())
+ DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
+ for (int r = 0; rfs != null && r < rfs.length; r++)
{
- SequenceI rs = (SequenceI) lr.nextElement();
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
- for (int r = 0; rfs != null && r < rfs.length; r++)
+ if (!refs.contains(rfs[r].getSource()))
{
- if (!refs.contains(rfs[r].getSource()))
+ refs.addElement(rfs[r].getSource());
+ }
+ }
+ if (dataset != null)
+ {
+ // search for references to this sequence's direct references.
+ DBRefEntry[] lrfs = CrossRef
+ .findXDbRefs(!dna, seqs[s].getDBRef());
+ Vector rseqs = new Vector();
+ CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
+ null); // don't need to specify codon frame for mapping here
+ Enumeration lr = rseqs.elements();
+ while (lr.hasMoreElements())
+ {
+ SequenceI rs = (SequenceI) lr.nextElement();
+ DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
+ for (int r = 0; rfs != null && r < rfs.length; r++)
{
- refs.addElement(rfs[r].getSource());
+ if (!refs.contains(rfs[r].getSource()))
+ {
+ refs.addElement(rfs[r].getSource());
+ }
}
}
}
@@ -158,7 +180,9 @@ public class CrossRef
{
if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
{
- // retrieve CDS dataset sequences
+ System.err
+ .println("TODO: unimplemented sequence retrieval for coding region sequence.");
+ // TODO: retrieve CDS dataset sequences
// need global dataset sequence retriever/resolver to reuse refs
// and construct Mapping entry.
// insert gaps in CDS according to peptide gaps.
@@ -194,7 +218,7 @@ public class CrossRef
* @param dna
* @param source
* @param dataset
- * alignment to search for product sequences.
+ * alignment to search for product sequences.
* @return products (as dataset sequences)
*/
public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
@@ -216,13 +240,13 @@ public class CrossRef
{
System.out.println("Attempting to find ds Xrefs refs.");
DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less
- // ambiguous
- // would
- // be a
- // 'find
- // primary
- // dbRefEntry'
- // method.
+ // ambiguous
+ // would
+ // be a
+ // 'find
+ // primary
+ // dbRefEntry'
+ // method.
// filter for desired source xref here
found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
rseqs, cf);
@@ -261,7 +285,7 @@ public class CrossRef
// xrefs on this sequence.
if (dataset != null)
{
- found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf);
+ found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); // ,false,!dna);
if (found)
xrfs[r] = null; // we've recovered seqs for this one.
}
@@ -305,7 +329,10 @@ public class CrossRef
xrfs = t;
try
{
- retrieved = sftch.getSequences(xrfs);
+ retrieved = sftch.getSequences(xrfs); // problem here is we don't
+ // know which of xrfs
+ // resulted in which
+ // retrieved element
} catch (Exception e)
{
System.err
@@ -317,6 +344,53 @@ public class CrossRef
{
for (int rs = 0; rs < retrieved.length; rs++)
{
+ // TODO: examine each sequence for 'redundancy'
+ jalview.datamodel.DBRefEntry[] dbr = retrieved[rs]
+ .getDBRef();
+ if (dbr != null && dbr.length > 0)
+ {
+ for (int di = 0; di < dbr.length; di++)
+ {
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ jalview.datamodel.Mapping map = dbr[di].getMap();
+ if (map != null)
+ {
+ if (map.getTo() != null && map.getMap() != null)
+ {
+ // should search the local dataset to find any existing
+ // candidates for To !
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && mappedrg.getSequenceAsString().equals(
+ loc.getSequenceAsString()))
+ {
+ System.err
+ .println("Mapping updated for retrieved crossreference");
+ // method to update all refs of existing To on
+ // retrieved sequence with dss and merge any props
+ // on To onto dss.
+ map.setTo(dss);
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
+ }
+ }
+ }
+ }
+ }
+ retrieved[rs].updatePDBIds();
rseqs.addElement(retrieved[rs]);
}
}
@@ -374,7 +448,7 @@ public class CrossRef
* @param xrf
* @param dataset
* @param rseqs
- * set of unique sequences
+ * set of unique sequences
* @param cf
* @return true if one or more unique sequences were found and added
*/
@@ -393,10 +467,10 @@ public class CrossRef
* @param xrf
* @param dataset
* @param rseqs
- * @param direct -
- * search all references or only subset
+ * @param direct
+ * - search all references or only subset
* @param dna
- * search dna or protein xrefs (if direct=false)
+ * search dna or protein xrefs (if direct=false)
* @return true if relationship found and sequence added.
*/
public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
@@ -404,6 +478,7 @@ public class CrossRef
boolean direct, boolean dna)
{
boolean found = false;
+ SequenceI[] typer = new SequenceI[1];
if (dataset == null)
return false;
if (dataset.getSequences() == null)
@@ -424,17 +499,27 @@ public class CrossRef
}
if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
{
+ // check if this is the correct sequence type
+ {
+ typer[0] = nxt;
+ boolean isDna = jalview.util.Comparison.isNucleotide(typer);
+ if ((direct && isDna == dna) || (!direct && isDna != dna))
+ {
+ // skip this sequence because it is same molecule type
+ continue;
+ }
+ }
+
// look for direct or indirect references in common
- DBRefEntry[] poss = null, cands = null;
+ DBRefEntry[] poss = nxt.getDBRef(), cands = null;
if (direct)
{
- cands = jalview.util.DBRefUtils.searchRefs(poss = nxt
- .getDBRef(), xrf);
+ cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
}
else
{
- cands = jalview.util.DBRefUtils.searchRefs(poss = CrossRef
- .findXDbRefs(dna, nxt.getDBRef()), xrf);
+ poss = CrossRef.findXDbRefs(dna, poss); //
+ cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
}
if (cands != null)
{
@@ -442,7 +527,7 @@ public class CrossRef
{
rseqs.addElement(nxt);
boolean foundmap = cf != null; // don't search if we aren't given
- // a codon map object
+ // a codon map object
for (int r = 0; foundmap && r < cands.length; r++)
{
if (cands[r].hasMap())
@@ -485,8 +570,8 @@ public class CrossRef
* @param dna
* @param seqs
* @param dataset
- * @param fake -
- * don't actually build lists - just get types
+ * @param fake
+ * - don't actually build lists - just get types
* @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
* seqs, AlignmentI dataset, boolean fake) { String types[] =
* jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
@@ -495,18 +580,19 @@ public class CrossRef
* System.out.println("Type: " + types[t]); SequenceI[] prod =
* jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
* System.out.println("Found " + ((prod == null) ? "no" : "" +
- * prod.length) + " products"); if (prod!=null) { for (int p=0; p