X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=629850511fefc57400d34c9e07753c938e442c3e;hb=797df64fa2a0a30773d0f48f5494d4155e5a8be3;hp=f475ecb4cc6c9fa5b96446ab44e562a42d2eb9b0;hpb=6906bad46d9a65df68c979fc2afe7a1c73cbcec5;p=jalview.git
diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java
index f475ecb..6298505 100644
--- a/src/jalview/analysis/CrossRef.java
+++ b/src/jalview/analysis/CrossRef.java
@@ -1,3 +1,20 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
+ */
package jalview.analysis;
import java.util.Enumeration;
@@ -11,8 +28,8 @@ import jalview.datamodel.DBRefSource;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
-import jalview.ws.ASequenceFetcher;
import jalview.ws.SequenceFetcher;
+import jalview.ws.seqfetcher.ASequenceFetcher;
/**
* Functions for cross-referencing sequence databases. user must first specify
@@ -39,20 +56,22 @@ public class CrossRef
else
{
rfs = jalview.util.DBRefUtils.selectRefs(rfs,
- DBRefSource.DNACODINGDBS); // could attempt to find other cross refs and return here - ie PDB xrefs (not dna, not protein seq)
+ DBRefSource.DNACODINGDBS); // could attempt to find other cross
+ // refs and return here - ie PDB xrefs
+ // (not dna, not protein seq)
}
return rfs;
}
-
public static Hashtable classifyDbRefs(DBRefEntry[] rfs)
{
Hashtable classes = new Hashtable();
- classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS));
- classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils.selectRefs(rfs,
- DBRefSource.DNACODINGDBS));
- classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs(rfs,
- DBRefSource.DOMAINDBS));
+ classes.put(DBRefSource.PROTEINDBS,
+ jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS));
+ classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils
+ .selectRefs(rfs, DBRefSource.DNACODINGDBS));
+ classes.put(DBRefSource.DOMAINDBS,
+ jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.DOMAINDBS));
// classes.put(OTHER, )
return classes;
}
@@ -67,49 +86,58 @@ public class CrossRef
{
return findSequenceXrefTypes(dna, seqs, null);
}
+
/**
- * Indirect references are references from other sequences from the dataset to any of the direct
- * DBRefEntrys on the given sequences.
+ * Indirect references are references from other sequences from the dataset to
+ * any of the direct DBRefEntrys on the given sequences.
+ *
* @param dna
* true if seqs are DNA seqs
* @param seqs
* @return a list of sequence database cross reference source types
*/
- public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs, AlignmentI dataset)
+ public static String[] findSequenceXrefTypes(boolean dna,
+ SequenceI[] seqs, AlignmentI dataset)
{
String[] dbrefs = null;
Vector refs = new Vector();
for (int s = 0; s < seqs.length; s++)
{
- SequenceI dss = seqs[s];
- while (dss.getDatasetSequence()!=null)
- {
- dss = dss.getDatasetSequence();
- }
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
- for (int r = 0; rfs != null && r < rfs.length; r++)
+ if (seqs[s] != null)
{
- if (!refs.contains(rfs[r].getSource()))
+
+ SequenceI dss = seqs[s];
+ while (dss.getDatasetSequence() != null)
{
- refs.addElement(rfs[r].getSource());
+ dss = dss.getDatasetSequence();
}
- }
- if (dataset!=null)
- {
- // search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef());
- Vector rseqs = new Vector();
- CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, null); // don't need to specify codon frame for mapping here
- Enumeration lr = rseqs.elements();
- while (lr.hasMoreElements())
+ DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
+ for (int r = 0; rfs != null && r < rfs.length; r++)
{
- SequenceI rs = (SequenceI) lr.nextElement();
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
- for (int r=0; rfs != null && r < rfs.length; r++)
+ if (!refs.contains(rfs[r].getSource()))
{
- if (!refs.contains(rfs[r].getSource()))
+ refs.addElement(rfs[r].getSource());
+ }
+ }
+ if (dataset != null)
+ {
+ // search for references to this sequence's direct references.
+ DBRefEntry[] lrfs = CrossRef
+ .findXDbRefs(!dna, seqs[s].getDBRef());
+ Vector rseqs = new Vector();
+ CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
+ null); // don't need to specify codon frame for mapping here
+ Enumeration lr = rseqs.elements();
+ while (lr.hasMoreElements())
+ {
+ SequenceI rs = (SequenceI) lr.nextElement();
+ DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
+ for (int r = 0; rfs != null && r < rfs.length; r++)
{
- refs.addElement(rfs[r].getSource());
+ if (!refs.contains(rfs[r].getSource()))
+ {
+ refs.addElement(rfs[r].getSource());
+ }
}
}
}
@@ -152,7 +180,9 @@ public class CrossRef
{
if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
{
- // retrieve CDS dataset sequences
+ System.err
+ .println("TODO: unimplemented sequence retrieval for coding region sequence.");
+ // TODO: retrieve CDS dataset sequences
// need global dataset sequence retriever/resolver to reuse refs
// and construct Mapping entry.
// insert gaps in CDS according to peptide gaps.
@@ -196,24 +226,32 @@ public class CrossRef
{
Vector rseqs = new Vector();
Alignment ral = null;
- AlignedCodonFrame cf=new AlignedCodonFrame(0); // nominal width
+ AlignedCodonFrame cf = new AlignedCodonFrame(0); // nominal width
for (int s = 0; s < seqs.length; s++)
{
SequenceI dss = seqs[s];
- while (dss.getDatasetSequence()!=null)
+ while (dss.getDatasetSequence() != null)
{
dss = dss.getDatasetSequence();
}
boolean found = false;
DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRef());
- if ((xrfs == null || xrfs.length == 0) && dataset!=null)
+ if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
System.out.println("Attempting to find ds Xrefs refs.");
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less ambiguous would be a 'find primary dbRefEntry' method.
+ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less
+ // ambiguous
+ // would
+ // be a
+ // 'find
+ // primary
+ // dbRefEntry'
+ // method.
// filter for desired source xref here
- found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, rseqs, cf);
+ found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
+ rseqs, cf);
}
- for (int r = 0; xrfs!=null && r < xrfs.length; r++)
+ for (int r = 0; xrfs != null && r < xrfs.length; r++)
{
if (source != null && !source.equals(xrfs[r].getSource()))
continue;
@@ -223,14 +261,17 @@ public class CrossRef
{
Sequence rsq = new Sequence(xrfs[r].getMap().getTo());
rseqs.addElement(rsq);
- if (xrfs[r].getMap().getMap().getFromRatio()!=xrfs[r].getMap().getMap().getToRatio())
+ if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r]
+ .getMap().getMap().getToRatio())
{
// get sense of map correct for adding to product alignment.
if (dna)
{
// map is from dna seq to a protein product
cf.addMap(dss, rsq, xrfs[r].getMap().getMap());
- } else {
+ }
+ else
+ {
// map should be from protein seq to its coding dna
cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse());
}
@@ -238,13 +279,13 @@ public class CrossRef
found = true;
}
}
- else
+ if (!found)
{
// do a bit more work - search for sequences with references matching
// xrefs on this sequence.
if (dataset != null)
{
- found = searchDataset(dss, xrfs[r], dataset, rseqs, cf);
+ found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); // ,false,!dna);
if (found)
xrfs[r] = null; // we've recovered seqs for this one.
}
@@ -265,8 +306,10 @@ public class CrossRef
for (int r = 0; r < xrfs.length; r++)
{
// filter out any irrelevant or irretrievable references
- if (xrfs[r]==null || ((source != null && !source.equals(xrfs[r].getSource()))
- || !sftch.isFetchable(xrfs[r].getSource())))
+ if (xrfs[r] == null
+ || ((source != null && !source.equals(xrfs[r]
+ .getSource())) || !sftch.isFetchable(xrfs[r]
+ .getSource())))
{
l--;
xrfs[r] = null;
@@ -275,7 +318,7 @@ public class CrossRef
if (l > 0)
{
System.out
- .println("Attempting to retrieve cross referenced sequences.");
+ .println("Attempting to retrieve cross referenced sequences.");
DBRefEntry[] t = new DBRefEntry[l];
l = 0;
for (int r = 0; r < xrfs.length; r++)
@@ -286,18 +329,68 @@ public class CrossRef
xrfs = t;
try
{
- retrieved = sftch.getSequences(xrfs);
+ retrieved = sftch.getSequences(xrfs); // problem here is we don't
+ // know which of xrfs
+ // resulted in which
+ // retrieved element
} catch (Exception e)
{
System.err
- .println("Problem whilst retrieving cross references for Sequence : "
- + seqs[s].getName());
+ .println("Problem whilst retrieving cross references for Sequence : "
+ + seqs[s].getName());
e.printStackTrace();
}
if (retrieved != null)
{
for (int rs = 0; rs < retrieved.length; rs++)
{
+ // TODO: examine each sequence for 'redundancy'
+ jalview.datamodel.DBRefEntry[] dbr = retrieved[rs]
+ .getDBRef();
+ if (dbr != null && dbr.length > 0)
+ {
+ for (int di = 0; di < dbr.length; di++)
+ {
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ jalview.datamodel.Mapping map = dbr[di].getMap();
+ if (map != null)
+ {
+ if (map.getTo() != null && map.getMap() != null)
+ {
+ // should search the local dataset to find any existing
+ // candidates for To !
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && mappedrg.getSequenceAsString().equals(
+ loc.getSequenceAsString()))
+ {
+ System.err
+ .println("Mapping updated for retrieved crossreference");
+ // method to update all refs of existing To on
+ // retrieved sequence with dss and merge any props
+ // on To onto dss.
+ map.setTo(dss);
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
+ }
+ }
+ }
+ }
+ }
+ retrieved[rs].updatePDBIds();
rseqs.addElement(retrieved[rs]);
}
}
@@ -310,7 +403,7 @@ public class CrossRef
SequenceI[] rsqs = new SequenceI[rseqs.size()];
rseqs.copyInto(rsqs);
ral = new Alignment(rsqs);
- if (cf!=null && cf.getProtMappings()!=null)
+ if (cf != null && cf.getProtMappings() != null)
{
ral.addCodonFrame(cf);
}
@@ -319,20 +412,24 @@ public class CrossRef
}
/**
- * find references to lrfs in the cross-reference set of each sequence in dataset (that is not equal to sequenceI)
- * Identifies matching DBRefEntry based on source and accession string only - Map and Version are nulled.
+ * find references to lrfs in the cross-reference set of each sequence in
+ * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry
+ * based on source and accession string only - Map and Version are nulled.
+ *
* @param sequenceI
* @param lrfs
* @param dataset
* @param rseqs
* @return true if matches were found.
*/
- private static boolean searchDatasetXrefs(SequenceI sequenceI, boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf)
+ private static boolean searchDatasetXrefs(SequenceI sequenceI,
+ boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs,
+ AlignedCodonFrame cf)
{
- boolean found=false;
- if (lrfs==null)
+ boolean found = false;
+ if (lrfs == null)
return false;
- for (int i=0;i