1 package jalview.analysis;
3 import jalview.datamodel.AlignedCodonFrame;
4 import jalview.datamodel.Alignment;
5 import jalview.datamodel.AlignmentI;
6 import jalview.datamodel.DBRefEntry;
7 import jalview.datamodel.Mapping;
8 import jalview.datamodel.Sequence;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.util.Comparison;
12 import jalview.util.DBRefUtils;
13 import jalview.util.MapList;
14 import jalview.ws.SequenceFetcherFactory;
15 import jalview.ws.seqfetcher.ASequenceFetcher;
17 import java.util.ArrayList;
18 import java.util.Iterator;
19 import java.util.List;
21 public class CrossRefs
24 * Finds cross-references for sequences from a specified source database.
25 * These may be found in four ways:
27 * <li>as a DBRefEntry on the known sequence, which has a mapped-to sequence</li>
28 * <li>a sequence of complementary type in the alignment dataset, which has a
29 * DBRefEntry to one of the known sequence's 'direct' DBRefs</li>
30 * <li>a sequence of complementary type in the alignment, which has a
31 * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs</li>
32 * <li>by fetching the accession from the remote database</li>
36 * the sequences whose cross-references we are searching for
38 * true if the sequences are from a nucleotide alignment, else false
40 * the database source we want cross-references to
42 * the alignment dataset the sequences belong to
43 * @return an alignment containing cross-reference sequences, or null if none
46 public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna,
47 String source, AlignmentI dataset)
49 List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
50 AlignedCodonFrame mappings = new AlignedCodonFrame();
52 List<DBRefEntry> sourceRefs = new ArrayList<DBRefEntry>();
54 for (SequenceI seq : seqs)
56 if (dna != Comparison.isNucleotide(seq))
59 * mixed alignment, and this sequence is of the wrong type
65 * get this sequence's dbrefs to source database (if any)
67 List<DBRefEntry> seqSourceRefs = DBRefUtils.searchRefsForSource(
68 seq.getDBRefs(), source);
71 * first extract any mapped sequences from sourceRefs
73 findMappedDbrefs(seq, seqSourceRefs, foundSeqs, mappings);
76 * for remaining sourceRefs, try to match a
77 * complementary sequence in the dataset
79 findIndirectCrossReferences(seq, source, seqSourceRefs, dataset,
84 * fetch any remaining sourceRefs from the source database
86 fetchCrossReferences(sourceRefs, foundSeqs, mappings, dna, dataset);
88 if (foundSeqs.isEmpty())
92 AlignmentI crossRefs = new Alignment(
93 foundSeqs.toArray(new SequenceI[foundSeqs.size()]));
94 crossRefs.addCodonFrame(mappings);
99 * Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If
100 * found, adds the sequence to foundSeqs and removes the dbref from the list.
103 * the dataset sequence we are searching from
105 * the sequence's dbrefs to 'source'
107 * a list of cross-references to add to
109 * a set of sequence mappings to add to
112 static void findMappedDbrefs(SequenceI seq, List<DBRefEntry> sourceRefs,
113 List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
115 Iterator<DBRefEntry> refs = sourceRefs.iterator();
116 while (refs.hasNext())
118 DBRefEntry dbref = refs.next();
119 Mapping map = dbref.getMap();
122 SequenceI mappedTo = map.getTo();
123 if (mappedTo != null)
125 foundSeqs.add(new Sequence(mappedTo));
129 * check mapping is not 'direct' (it shouldn't be if we reach here)
130 * and add mapping (dna-to-peptide or vice versa) to the set
132 MapList mapList = map.getMap();
133 int fromRatio = mapList.getFromRatio();
134 int toRatio = mapList.getToRatio();
135 if (fromRatio != toRatio)
139 mappings.addMap(seq, mappedTo, mapList);
143 mappings.addMap(mappedTo, seq, mapList.getInverse());
152 * Tries to fetch seq's database references to 'source' database, and add them
153 * to the foundSeqs list. If found, tries to make a mapping between seq and
154 * the retrieved sequence and insert it into the database reference.
162 static void fetchCrossReferences(SequenceI seq,
163 List<DBRefEntry> sourceRefs, List<SequenceI> foundSeqs,
164 AlignedCodonFrame mappings, boolean dna, AlignmentI dataset)
166 ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
167 SequenceI[] retrieved;
170 retrieved = sftch.getSequences(sourceRefs, !dna);
171 } catch (Exception e)
174 .println("Problem whilst retrieving cross references for Sequence : "
180 if (retrieved != null)
182 updateDbrefMappings(dna, seq, sourceRefs, retrieved, mappings);
184 SequenceIdMatcher matcher = new SequenceIdMatcher(
185 dataset.getSequences());
186 List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
187 CrossRef me = new CrossRef();
188 for (int rs = 0; rs < retrieved.length; rs++)
190 // TODO: examine each sequence for 'redundancy'
191 DBRefEntry[] dbr = retrieved[rs].getDBRefs();
192 if (dbr != null && dbr.length > 0)
194 for (int di = 0; di < dbr.length; di++)
196 // find any entry where we should put in the sequence being
197 // cross-referenced into the map
198 Mapping map = dbr[di].getMap();
201 if (map.getTo() != null && map.getMap() != null)
203 SequenceI matched = matcher.findIdMatch(map.getTo());
207 * already got an xref to this sequence; update this
208 * map to point to the same sequence, and add
209 * any new dbrefs to it
211 for (DBRefEntry ref : map.getTo().getDBRefs())
213 matched.addDBRef(ref); // add or update mapping
219 matcher.add(map.getTo());
223 // compare ms with dss and replace with dss in mapping
224 // if map is congruent
225 SequenceI ms = map.getTo();
226 int sf = map.getMap().getToLowest();
227 int st = map.getMap().getToHighest();
228 SequenceI mappedrg = ms.getSubSequence(sf, st);
229 // SequenceI loc = dss.getSubSequence(sf, st);
230 if (mappedrg.getLength() > 0
231 && ms.getSequenceAsString().equals(
232 seq.getSequenceAsString()))
233 // && mappedrg.getSequenceAsString().equals(
234 // loc.getSequenceAsString()))
236 String msg = "Mapping updated from " + ms.getName()
237 + " to retrieved crossreference "
239 System.out.println(msg);
240 // method to update all refs of existing To on
241 // retrieved sequence with dss and merge any props
245 * copy sequence features as well, avoiding
246 * duplication (e.g. same variation from 2
249 SequenceFeature[] sfs = ms.getSequenceFeatures();
252 for (SequenceFeature feat : sfs)
255 * we override SequenceFeature.equals here (but
256 * not elsewhere) to ignore Parent attribute
257 * TODO not quite working yet!
260 .contains(me.new MySequenceFeature(feat)))
262 seq.addSequenceFeature(feat);
263 copiedFeatures.add(feat);
268 mappings.addMap(retrieved[rs].getDatasetSequence(),
269 map.getTo(), map.getMap());
270 } catch (Exception e)
273 .println("Exception when consolidating Mapped sequence set...");
274 e.printStackTrace(System.err);
280 retrieved[rs].updatePDBIds();
281 foundSeqs.add(retrieved[rs]);
287 * Searches the alignment for a sequence of complementary type to 'seq' which
288 * shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and
289 * removes the resolved sourceRef from the search list.
299 static void findIndirectCrossReferences(SequenceI seq, String source,
300 List<DBRefEntry> sourceRefs, AlignmentI dataset,
301 List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
303 Iterator<DBRefEntry> refs = sourceRefs.iterator();
304 while (refs.hasNext())
306 DBRefEntry dbref = refs.next();
307 boolean found = searchDatasetForCrossReference(seq, dbref, dataset,
308 foundSeqs, mappings);
317 * Searches the dataset for a sequence of opposite type to 'excluding', which
318 * has a cross-reference matching dbref. If found, adds the sequence to
319 * foundSeqs and removes dbref from the search list.
322 * a sequence to ignore (start point of search)
324 * a cross-reference to try to match
326 * sequences to search in
328 * result list to add to
330 * a set of sequence mappings to add to
331 * @return true if relationship found and sequence added
333 static boolean searchDatasetForCrossReference(SequenceI excluding,
334 DBRefEntry dbref, AlignmentI dataset, List<SequenceI> foundSeqs,
335 AlignedCodonFrame mappings)
337 boolean fromNucleotide = Comparison.isNucleotide(excluding);
338 boolean found = false;
343 if (dataset.getSequences() == null)
348 synchronized (ds = dataset.getSequences())
350 for (SequenceI nxt : ds)
354 if (nxt.getDatasetSequence() != null)
357 .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
359 if (nxt == excluding || nxt == excluding.getDatasetSequence())
363 if (foundSeqs.contains(nxt))
366 * already added this sequence to cross-refs
370 boolean isDna = Comparison.isNucleotide(nxt);
371 if (isDna == fromNucleotide)
374 * skip this sequence - wrong molecule type
380 * check if this sequence has any dbref matching source and accession
381 * (version and mapping may differ)
383 List<DBRefEntry> candidates = DBRefUtils.searchRefs(
384 nxt.getDBRefs(), dbref);
386 if (candidates.isEmpty())
392 if (mappings != null)
394 // don't search if we aren't given a codon map object
395 for (DBRefEntry candidate : candidates)
397 if (candidate.hasMap())
399 Mapping mapping = candidate.getMap();
400 MapList map = mapping.getMap();
401 if (mapping.getTo() != null
402 && map.getFromRatio() != map.getToRatio())
406 // map is from dna seq to a protein product
407 mappings.addMap(excluding, nxt, map);
411 // map is from protein seq to its coding dna
412 mappings.addMap(nxt, excluding, map.getInverse());
425 * Updates any empty mappings in the cross-references with one to a compatible
426 * retrieved sequence if found, and adds any new mappings to the
435 static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
436 List<DBRefEntry> xrefs, SequenceI[] retrieved,
437 AlignedCodonFrame mappings)
439 SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
440 for (DBRefEntry xref : xrefs)
444 String targetSeqName = xref.getSource() + "|"
445 + xref.getAccessionId();
446 SequenceI[] matches = matcher.findAllIdMatches(targetSeqName);
451 for (SequenceI seq : matches)
453 MapList mapping = null;
456 mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
460 mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
463 mapping = mapping.getInverse();
468 xref.setMap(new Mapping(seq, mapping));
471 AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
475 mappings.addMap(mapFrom, seq, mapping);
479 mappings.addMap(seq, mapFrom, mapping.getInverse());