boolean found = false;
DBRefEntry[] xrfs = DBRefUtils
.selectDbRefs(!fromDna, dss.getDBRefs());
+ // ENST & ENSP comes in to both Protein and nucleotide, so we need to
+ // filter them
+ // out later.
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
/*
List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
source);
Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+ // At this point, if we are retrieving Ensembl, we still don't filter out
+ // ENST when looking for protein crossrefs.
while (refIterator.hasNext())
{
DBRefEntry xref = refIterator.next();
found = false;
- if (xref.hasMap())
+ // we're only interested in coding cross-references, not
+ // locus->transcript
+ if (xref.hasMap() && xref.getMap().getMap().isTripletMap())
{
SequenceI mappedTo = xref.getMap().getTo();
if (mappedTo != null)
* but findInDataset() matches ENSP when looking for Uniprot...
*/
SequenceI matchInDataset = findInDataset(xref);
+ if (matchInDataset != null && xref.getMap().getTo() != null
+ && matchInDataset != xref.getMap().getTo())
+ {
+ System.err
+ .println("Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref."
+ + "Found:"
+ + matchInDataset
+ + "\nExpected:"
+ + xref.getMap().getTo()
+ + "\nFor xref:"
+ + xref);
+ }
/*matcher.findIdMatch(mappedTo);*/
if (matchInDataset != null)
{
{
rseqs.add(matchInDataset);
}
+ // even if rseqs contained matchInDataset - check mappings between
+ // these seqs are added
+ // need to try harder to only add unique mappings
+ if (xref.getMap().getMap().isTripletMap()
+ && dataset.getMapping(seq, matchInDataset) == null
+ && cf.getMappingBetween(seq, matchInDataset) == null)
+ {
+ // materialise a mapping for highlighting between these
+ // sequences
+ if (fromDna)
+ {
+ cf.addMap(dss, matchInDataset, xref.getMap().getMap(),
+ xref.getMap().getMappedFromId());
+ }
+ else
+ {
+ cf.addMap(matchInDataset, dss, xref.getMap().getMap()
+ .getInverse(), xref.getMap().getMappedFromId());
+ }
+ }
+
refIterator.remove();
continue;
}
+ // TODO: need to determine if this should be a deriveSequence
SequenceI rsq = new Sequence(mappedTo);
rseqs.add(rsq);
- if (xref.getMap().getMap().getFromRatio() != xref.getMap()
- .getMap().getToRatio())
+ if (xref.getMap().getMap().isTripletMap())
{
// get sense of map correct for adding to product alignment.
if (fromDna)
{
SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
+ xref.getAccessionId());
- if (matchedSeq != null)
+ // if there was a match, check it's at least the right type of
+ // molecule!
+ if (matchedSeq != null && matchedSeq.isProtein() == fromDna)
{
if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
{
SequenceI[] retrieved = null;
SequenceI dss = seq.getDatasetSequence() == null ? seq : seq
.getDatasetSequence();
+ // first filter in case we are retrieving crossrefs that have already been
+ // retrieved. this happens for cases where a database record doesn't yield
+ // protein products for CDS
+ removeAlreadyRetrievedSeqs(sourceRefs, fromDna);
+ if (sourceRefs.size() == 0)
+ {
+ // no more work to do! We already had all requested sequence records in
+ // the dataset.
+ return;
+ }
try
{
retrieved = sftch.getSequences(sourceRefs, !fromDna);
: retrievedSequence.getDatasetSequence();
DBRefEntry[] dbr = retrievedSequence.getDBRefs();
if (dbr != null)
+
+ /**
+ * Search dataset for sequences with a primary reference contained in
+ * sourceRefs.
+ *
+ * @param sourceRefs
+ * - list of references to filter.
+ * @param fromDna
+ * - type of sequence to search for matching primary reference.
+ */
+ private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
+ boolean fromDna)
+ {
+ DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ for (SequenceI sq : dataset.getSequences())
+ {
+ boolean dupeFound = false;
+ // !fromDna means we are looking only for nucleotide sequences, not
+ // protein
+ if (sq.isProtein() == fromDna)
+ {
+ for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+ {
+ for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+ {
+ sourceRefs.remove(found);
+ dupeFound = true;
+ }
+ }
+ }
+ if (dupeFound)
+ {
+ dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ }
+ }
+ }
+
{
for (DBRefEntry dbref : dbr)
{
}
else
{
- matcher.add(map.getTo());
+ if (dataset.findIndex(map.getTo()) == -1)
+ {
+ dataset.addSequence(map.getTo());
+ matcher.add(map.getTo());
+ }
}
try
{
}
retrievedSequence.updatePDBIds();
rseqs.add(retrievedDss);
- dataset.addSequence(retrievedDss);
- matcher.add(retrievedDss);
+ if (dataset.findIndex(retrievedDss) == -1)
+ {
+ dataset.addSequence(retrievedDss);
+ matcher.add(retrievedDss);
+ }
}
}
}
String name2 = xref.getSource() + "|" + name;
SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
.getDatasetSequence();
+ // first check ds if ds is directly referenced
+ if (dataset.findIndex(dss) > -1)
+ {
+ return dss;
+ }
+ ;
for (SequenceI seq : dataset.getSequences())
{
/*
DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
{
MapList mapping = null;
-
+ SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom
+ : mapFrom.getDatasetSequence();
+ SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo
+ : mapTo.getDatasetSequence();
/*
- * look for a reverse mapping, if found make its inverse
+ * look for a reverse mapping, if found make its inverse.
+ * Note - we do this on dataset sequences only.
*/
- if (mapTo.getDBRefs() != null)
+ if (dsmapTo.getDBRefs() != null)
{
- for (DBRefEntry dbref : mapTo.getDBRefs())
+ for (DBRefEntry dbref : dsmapTo.getDBRefs())
{
String name = dbref.getSource() + "|" + dbref.getAccessionId();
- if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+ if (dbref.hasMap() && dsmapFrom.getName().startsWith(name))
{
/*
* looks like we've found a map from 'mapTo' to 'mapFrom'
* - invert it to make the mapping the other way
*/
MapList reverse = dbref.getMap().getMap().getInverse();
- xref.setMap(new Mapping(mapTo, reverse));
- mappings.addMap(mapFrom, mapTo, reverse);
+ xref.setMap(new Mapping(dsmapTo, reverse));
+ mappings.addMap(mapFrom, dsmapTo, reverse);
return true;
}
}
* </ul>
* @return true if relationship found and sequence added.
*/
- boolean searchDataset(boolean fromDna, SequenceI fromSeq,
- DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
+ boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,
+ List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
boolean direct)
{
boolean found = false;
// }
if (!cands.isEmpty())
{
- if (!foundSeqs.contains(nxt))
+ if (foundSeqs.contains(nxt))
{
- found = true;
- foundSeqs.add(nxt);
- if (mappings != null && !direct)
+ continue;
+ }
+ found = true;
+ foundSeqs.add(nxt);
+ if (mappings != null && !direct)
+ {
+ /*
+ * if the matched sequence has mapped dbrefs to
+ * protein product / cdna, add equivalent mappings to
+ * our source sequence
+ */
+ for (DBRefEntry candidate : cands)
{
- /*
- * if the matched sequence has mapped dbrefs to
- * protein product / cdna, add equivalent mappings to
- * our source sequence
- */
- for (DBRefEntry candidate : cands)
+ Mapping mapping = candidate.getMap();
+ if (mapping != null)
{
- Mapping mapping = candidate.getMap();
- if (mapping != null)
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
{
- MapList map = mapping.getMap();
- if (mapping.getTo() != null
- && map.getFromRatio() != map.getToRatio())
+ /*
+ * add a mapping, as from dna to peptide sequence
+ */
+ if (map.getFromRatio() == 3)
{
- /*
- * add a mapping, as from dna to peptide sequence
- */
- if (map.getFromRatio() == 3)
- {
- mappings.addMap(nxt, fromSeq, map);
- }
- else
- {
- mappings.addMap(nxt, fromSeq, map.getInverse());
- }
+ mappings.addMap(nxt, fromSeq, map);
+ }
+ else
+ {
+ mappings.addMap(nxt, fromSeq, map.getInverse());
}
}
}