package jalview.analysis;
+import jalview.analysis.CrossRef.MySequenceFeature;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
public class CrossRefs
{
+ /*
+ * A sub-class that ignores Parent attribute when comparing sequence
+ * features. This avoids 'duplicate' CDS features that only
+ * differ in their parent Transcript ids.
+ */
+ class MySequenceFeature extends SequenceFeature
+ {
+ private SequenceFeature feat;
+
+ MySequenceFeature(SequenceFeature sf)
+ {
+ this.feat = sf;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ return feat.equals(o, true);
+ }
+ }
+
/**
* Finds cross-references for sequences from a specified source database.
* These may be found in four ways:
public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna,
String source, AlignmentI dataset)
{
- List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
- AlignedCodonFrame mappings = new AlignedCodonFrame();
-
- List<DBRefEntry> sourceRefs = new ArrayList<DBRefEntry>();
-
+ /*
+ * filter to only those sequences of the right type (nucleotide/protein)
+ */
+ List<SequenceI> fromSeqs = new ArrayList<SequenceI>();
for (SequenceI seq : seqs)
{
- if (dna != Comparison.isNucleotide(seq))
+ if (dna == Comparison.isNucleotide(seq))
{
- /*
- * mixed alignment, and this sequence is of the wrong type
- */
- continue;
+ fromSeqs.add(seq);
}
+ }
+ return findXrefSequences(fromSeqs, dna, source, dataset);
+ }
+
+ /**
+ * Finds cross-references for sequences from a specified source database.
+ * These may be found in four ways:
+ * <ul>
+ * <li>as a DBRefEntry on the known sequence, which has a mapped-to sequence</li>
+ * <li>a sequence of complementary type in the alignment dataset, which has a
+ * DBRefEntry to one of the known sequence's 'direct' DBRefs</li>
+ * <li>a sequence of complementary type in the alignment, which has a
+ * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs</li>
+ * <li>by fetching the accession from the remote database</li>
+ * </ul>
+ *
+ * @param seqs
+ * the sequences whose cross-references we are searching for,
+ * filtered to only those which are of the type denoted by 'dna'
+ * @param dna
+ * true if the sequences are from a nucleotide alignment, else false
+ * @param source
+ * the database source we want cross-references to
+ * @param dataset
+ * the alignment dataset the sequences belong to
+ * @return an alignment containing cross-reference sequences, or null if none
+ * found
+ */
+ static AlignmentI findXrefSequences(List<SequenceI> fromSeqs,
+ boolean dna, String source, AlignmentI dataset)
+ {
+ List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
+ AlignedCodonFrame mappings = new AlignedCodonFrame();
- /*
- * get this sequence's dbrefs to source database (if any)
- */
- List<DBRefEntry> seqSourceRefs = DBRefUtils.searchRefsForSource(
- seq.getDBRefs(), source);
+ List<DBRefEntry> unresolvedRefs = new ArrayList<DBRefEntry>();
- /*
- * first extract any mapped sequences from sourceRefs
- */
- findMappedDbrefs(seq, seqSourceRefs, foundSeqs, mappings);
+ /*
+ * first extract any mapped sequences from sourceRefs
+ * if successful, sequence is removed from fromSeqs
+ * if unsuccessful, dbrefs are added to unresolvedRefs
+ */
+ findMappedDbrefs(fromSeqs, source, foundSeqs,
+ unresolvedRefs, mappings);
- /*
- * for remaining sourceRefs, try to match a
- * complementary sequence in the dataset
- */
- findIndirectCrossReferences(seq, source, seqSourceRefs, dataset,
- foundSeqs, mappings);
- }
+ /*
+ * then search the alignment dataset for dbref resolutions
+ */
+ findIndirectCrossReferences(fromSeqs, source, dataset, foundSeqs,
+ unresolvedRefs, mappings);
/*
* fetch any remaining sourceRefs from the source database
*/
- fetchCrossReferences(sourceRefs, foundSeqs, mappings, dna, dataset);
+ fetchCrossReferences(fromSeqs, unresolvedRefs, foundSeqs, mappings,
+ dna, dataset);
if (foundSeqs.isEmpty())
{
/**
* Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If
* found, adds the sequence to foundSeqs and removes the dbref from the list.
+ * DBRefs with no mapping are added to the 'unresolvedRefs' list (setting
+ * version number to 0 i.e. use source and accession only).
*
- * @param seq
- * the dataset sequence we are searching from
- * @param sourceRefs
- * the sequence's dbrefs to 'source'
+ * @param fromSeqs
+ * the dataset sequences we are searching from
+ * @param source
+ * the database source we are searching dbrefs for
* @param foundSeqs
- * a list of cross-references to add to
+ * a list of found sequences to add to
+ * @param unresolvedRefs
+ * a list of unresolved cross-references to add to
* @param mappings
* a set of sequence mappings to add to
* @return
*/
- static void findMappedDbrefs(SequenceI seq, List<DBRefEntry> sourceRefs,
- List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+ static void findMappedDbrefs(List<SequenceI> fromSeqs, String source,
+ List<SequenceI> foundSeqs, List<DBRefEntry> unresolvedRefs,
+ AlignedCodonFrame mappings)
{
- Iterator<DBRefEntry> refs = sourceRefs.iterator();
- while (refs.hasNext())
+ Iterator<SequenceI> it = fromSeqs.iterator();
+ while (it.hasNext())
{
- DBRefEntry dbref = refs.next();
- Mapping map = dbref.getMap();
- if (map != null)
+ SequenceI seq = it.next();
+ SequenceI dss = seq.getDatasetSequence();
+ dss = dss == null ? seq : dss;
+
+ DBRefEntry[] dbRefs = seq.getDBRefs();
+ if (dbRefs == null)
+ {
+ continue;
+ }
+ boolean resolved = false;
+ for (DBRefEntry dbref : dbRefs)
{
- SequenceI mappedTo = map.getTo();
- if (mappedTo != null)
+ if (!source.equals(dbref.getSource()))
{
- foundSeqs.add(new Sequence(mappedTo));
- refs.remove();
-
- /*
- * check mapping is not 'direct' (it shouldn't be if we reach here)
- * and add mapping (dna-to-peptide or vice versa) to the set
- */
- MapList mapList = map.getMap();
- int fromRatio = mapList.getFromRatio();
- int toRatio = mapList.getToRatio();
- if (fromRatio != toRatio)
+ continue;
+ }
+ DBRefEntry todo = new DBRefEntry(dbref.getSource(), "0",
+ dbref.getAccessionId());
+ Mapping map = dbref.getMap();
+ if (map != null)
+ {
+ unresolvedRefs.remove(todo);
+ resolved = true;
+ SequenceI mappedTo = map.getTo();
+ if (mappedTo != null)
{
- if (fromRatio == 3)
- {
- mappings.addMap(seq, mappedTo, mapList);
- }
- else
+ foundSeqs.add(new Sequence(mappedTo));
+
+ /*
+ * check mapping is not 'direct' (it shouldn't be if we reach here)
+ * and add mapping (dna-to-peptide or vice versa) to the set
+ */
+ MapList mapList = map.getMap();
+ int fromRatio = mapList.getFromRatio();
+ int toRatio = mapList.getToRatio();
+ if (fromRatio != toRatio)
{
- mappings.addMap(mappedTo, seq, mapList.getInverse());
+ if (fromRatio == 3)
+ {
+ mappings.addMap(dss, mappedTo, mapList);
+ }
+ else
+ {
+ mappings.addMap(mappedTo, dss, mapList.getInverse());
+ }
}
}
}
+ else
+ {
+ /*
+ * no mapping to resolve dbref - add source+accession to list to resolve
+ */
+ if (!unresolvedRefs.contains(todo))
+ {
+ unresolvedRefs.add(todo);
+ }
+ }
+ }
+ if (resolved)
+ {
+ it.remove();
}
}
}
* to the foundSeqs list. If found, tries to make a mapping between seq and
* the retrieved sequence and insert it into the database reference.
*
- * @param seq
+ * @param fromSeqs
* @param sourceRefs
* @param foundSeqs
* @param mappings
* @param dna
*/
- static void fetchCrossReferences(SequenceI seq,
+ static void fetchCrossReferences(List<SequenceI> fromSeqs,
List<DBRefEntry> sourceRefs, List<SequenceI> foundSeqs,
AlignedCodonFrame mappings, boolean dna, AlignmentI dataset)
{
retrieved = sftch.getSequences(sourceRefs, !dna);
} catch (Exception e)
{
- System.err
- .println("Problem whilst retrieving cross references for Sequence : "
- + seq.getName());
+ System.err.println("Problem whilst retrieving cross references: "
+ + e.getMessage());
e.printStackTrace();
return;
}
- if (retrieved != null)
+ if (retrieved == null)
{
- updateDbrefMappings(dna, seq, sourceRefs, retrieved, mappings);
+ return;
+ }
+ updateDbrefMappings(dna, fromSeqs, sourceRefs, retrieved, mappings);
- SequenceIdMatcher matcher = new SequenceIdMatcher(
- dataset.getSequences());
- List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
- CrossRef me = new CrossRef();
- for (int rs = 0; rs < retrieved.length; rs++)
+ SequenceIdMatcher matcher = new SequenceIdMatcher(
+ dataset.getSequences());
+ List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
+ CrossRefs me = new CrossRefs();
+ for (int rs = 0; rs < retrieved.length; rs++)
+ {
+ // TODO: examine each sequence for 'redundancy'
+ DBRefEntry[] dbr = retrieved[rs].getDBRefs();
+ if (dbr != null && dbr.length > 0)
{
- // TODO: examine each sequence for 'redundancy'
- DBRefEntry[] dbr = retrieved[rs].getDBRefs();
- if (dbr != null && dbr.length > 0)
+ for (int di = 0; di < dbr.length; di++)
{
- for (int di = 0; di < dbr.length; di++)
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ Mapping map = dbr[di].getMap();
+ if (map != null)
{
- // find any entry where we should put in the sequence being
- // cross-referenced into the map
- Mapping map = dbr[di].getMap();
- if (map != null)
+ if (map.getTo() != null && map.getMap() != null)
{
- if (map.getTo() != null && map.getMap() != null)
+ SequenceI matched = matcher.findIdMatch(map.getTo());
+ if (matched != null)
{
- SequenceI matched = matcher.findIdMatch(map.getTo());
- if (matched != null)
- {
- /*
- * already got an xref to this sequence; update this
- * map to point to the same sequence, and add
- * any new dbrefs to it
- */
- for (DBRefEntry ref : map.getTo().getDBRefs())
- {
- matched.addDBRef(ref); // add or update mapping
- }
- map.setTo(matched);
- }
- else
+ /*
+ * already got an xref to this sequence; update this
+ * map to point to the same sequence, and add
+ * any new dbrefs to it
+ */
+ for (DBRefEntry ref : map.getTo().getDBRefs())
{
- matcher.add(map.getTo());
+ matched.addDBRef(ref); // add or update mapping
}
- try
+ map.setTo(matched);
+ }
+ else
+ {
+ matcher.add(map.getTo());
+ }
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ // SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && ms.getSequenceAsString().equals(
+ fromSeqs.getSequenceAsString()))
+ // && mappedrg.getSequenceAsString().equals(
+ // loc.getSequenceAsString()))
{
- // compare ms with dss and replace with dss in mapping
- // if map is congruent
- SequenceI ms = map.getTo();
- int sf = map.getMap().getToLowest();
- int st = map.getMap().getToHighest();
- SequenceI mappedrg = ms.getSubSequence(sf, st);
- // SequenceI loc = dss.getSubSequence(sf, st);
- if (mappedrg.getLength() > 0
- && ms.getSequenceAsString().equals(
- seq.getSequenceAsString()))
- // && mappedrg.getSequenceAsString().equals(
- // loc.getSequenceAsString()))
+ String msg = "Mapping updated from " + ms.getName()
+ + " to retrieved crossreference "
+ + fromSeqs.getName();
+ System.out.println(msg);
+ // method to update all refs of existing To on
+ // retrieved sequence with dss and merge any props
+ // on To onto dss.
+ map.setTo(fromSeqs);
+ /*
+ * copy sequence features as well, avoiding
+ * duplication (e.g. same variation from 2
+ * transcripts)
+ */
+ SequenceFeature[] sfs = ms.getSequenceFeatures();
+ if (sfs != null)
{
- String msg = "Mapping updated from " + ms.getName()
- + " to retrieved crossreference "
- + seq.getName();
- System.out.println(msg);
- // method to update all refs of existing To on
- // retrieved sequence with dss and merge any props
- // on To onto dss.
- map.setTo(seq);
- /*
- * copy sequence features as well, avoiding
- * duplication (e.g. same variation from 2
- * transcripts)
- */
- SequenceFeature[] sfs = ms.getSequenceFeatures();
- if (sfs != null)
+ for (SequenceFeature feat : sfs)
{
- for (SequenceFeature feat : sfs)
+ /*
+ * we override SequenceFeature.equals here (but
+ * not elsewhere) to ignore Parent attribute
+ * TODO not quite working yet!
+ */
+ if (!copiedFeatures
+ .contains(me.new MySequenceFeature(feat)))
{
- /*
- * we override SequenceFeature.equals here (but
- * not elsewhere) to ignore Parent attribute
- * TODO not quite working yet!
- */
- if (!copiedFeatures
- .contains(me.new MySequenceFeature(feat)))
- {
- seq.addSequenceFeature(feat);
- copiedFeatures.add(feat);
- }
+ fromSeqs.addSequenceFeature(feat);
+ copiedFeatures.add(feat);
}
}
}
- mappings.addMap(retrieved[rs].getDatasetSequence(),
- map.getTo(), map.getMap());
- } catch (Exception e)
- {
- System.err
- .println("Exception when consolidating Mapped sequence set...");
- e.printStackTrace(System.err);
}
+ mappings.addMap(retrieved[rs].getDatasetSequence(),
+ map.getTo(), map.getMap());
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
}
}
}
}
- retrieved[rs].updatePDBIds();
- foundSeqs.add(retrieved[rs]);
}
+ retrieved[rs].updatePDBIds();
+ foundSeqs.add(retrieved[rs]);
}
}
* shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and
* removes the resolved sourceRef from the search list.
*
- * @param seq
+ * @param fromSeqs
* @param source
- * @param sourceRefs
- * @param dataset
+ * @param unresolvedRefs
* @param foundSeqs
+ * @param unresolvedRefs
* @param mappings
* @return
*/
- static void findIndirectCrossReferences(SequenceI seq, String source,
- List<DBRefEntry> sourceRefs, AlignmentI dataset,
- List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+ static void findIndirectCrossReferences(List<SequenceI> fromSeqs,
+ String source, AlignmentI dataset,
+ List<SequenceI> foundSeqs, List<DBRefEntry> unresolvedRefs,
+ AlignedCodonFrame mappings)
{
- Iterator<DBRefEntry> refs = sourceRefs.iterator();
+ Iterator<DBRefEntry> refs = unresolvedRefs.iterator();
while (refs.hasNext())
{
DBRefEntry dbref = refs.next();
- boolean found = searchDatasetForCrossReference(seq, dbref, dataset,
- foundSeqs, mappings);
+ boolean found = false;
+ // boolean found = searchDatasetForCrossReference(fromSeqs, dbref,
+ // foundSeqs,
+ // unresolvedRefs, mappings);
if (found)
{
refs.remove();
* AlignedCodonFrame
*
* @param dna
- * @param mapFrom
+ * @param fromSeqs
* @param xrefs
* @param retrieved
* @param mappings
*/
- static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
+ static void updateDbrefMappings(boolean dna, List<SequenceI> fromSeqs,
List<DBRefEntry> xrefs, SequenceI[] retrieved,
AlignedCodonFrame mappings)
{
MapList mapping = null;
if (dna)
{
- mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
+ mapping = AlignmentUtils.mapCdnaToProtein(seq, fromSeqs);
}
else
{
- mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
+ mapping = AlignmentUtils.mapCdnaToProtein(fromSeqs, seq);
if (mapping != null)
{
mapping = mapping.getInverse();
xref.setMap(new Mapping(seq, mapping));
if (dna)
{
- AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
+ AlignmentUtils.computeProteinFeatures(fromSeqs, seq, mapping);
}
if (dna)
{
- mappings.addMap(mapFrom, seq, mapping);
+ mappings.addMap(fromSeqs, seq, mapping);
}
else
{
- mappings.addMap(seq, mapFrom, mapping.getInverse());
+ mappings.addMap(seq, fromSeqs, mapping.getInverse());
}
continue;
}