2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.List;
29 import jalview.analysis.AlignmentUtils;
30 import jalview.analysis.CrossRef;
31 import jalview.api.AlignmentViewPanel;
32 import jalview.api.FeatureSettingsModelI;
33 import jalview.bin.Cache;
34 import jalview.bin.Console;
35 import jalview.datamodel.Alignment;
36 import jalview.datamodel.AlignmentI;
37 import jalview.datamodel.DBRefEntry;
38 import jalview.datamodel.DBRefSource;
39 import jalview.datamodel.GeneLociI;
40 import jalview.datamodel.SequenceI;
41 import jalview.ext.ensembl.EnsemblInfo;
42 import jalview.ext.ensembl.EnsemblMap;
43 import jalview.io.gff.SequenceOntologyI;
44 import jalview.structure.StructureSelectionManager;
45 import jalview.util.DBRefUtils;
46 import jalview.util.IdUtils;
47 import jalview.util.IdUtils.IdType;
48 import jalview.util.MapList;
49 import jalview.util.MappingUtils;
50 import jalview.util.MessageManager;
51 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
52 import jalview.ws.SequenceFetcher;
55 * Factory constructor and runnable for discovering and displaying
56 * cross-references for a set of aligned sequences
61 public class CrossRefAction implements Runnable
63 private AlignFrame alignFrame;
65 private SequenceI[] sel;
67 private final boolean _odna;
69 private String source;
71 List<AlignmentViewPanel> xrefViews = new ArrayList<>();
73 List<AlignmentViewPanel> getXrefViews()
81 final long id = IdUtils.newId(IdType.PROGRESS);
82 alignFrame.setProgressBar(MessageManager.formatMessage(
83 "status.searching_for_sequences_from", new Object[]
87 AlignmentI alignment = alignFrame.getViewport().getAlignment();
88 AlignmentI dataset = alignment.getDataset() == null ? alignment
89 : alignment.getDataset();
90 boolean dna = alignment.isNucleotide();
94 .println("Conflict: showProducts for alignment originally "
95 + "thought to be " + (_odna ? "DNA" : "Protein")
96 + " now searching for " + (dna ? "DNA" : "Protein")
99 AlignmentI xrefs = new CrossRef(sel, dataset)
100 .findXrefSequences(source, dna);
107 * try to look up chromosomal coordinates for nucleotide
108 * sequences (if not already retrieved)
110 findGeneLoci(xrefs.getSequences());
113 * get display scheme (if any) to apply to features
115 FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
116 .getFeatureColourScheme(source);
118 if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
120 // override default featureColourScheme so products have Ensembl variant
122 featureColourScheme = new SequenceFetcher()
123 .getFeatureColourScheme(DBRefSource.ENSEMBL);
126 AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
130 xrefsAlignment = AlignmentUtils.makeCdsAlignment(
131 xrefsAlignment.getSequencesArray(), dataset, sel);
132 xrefsAlignment.alignAs(alignment);
136 * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
137 * sequences). If we are DNA, drop introns and update mappings
139 AlignmentI copyAlignment = null;
141 if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
143 copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
144 xrefs, xrefsAlignment);
145 if (copyAlignment == null)
152 * build AlignFrame(s) according to available alignment data
154 AlignFrame newFrame = new AlignFrame(xrefsAlignment,
155 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
156 if (Cache.getDefault("HIDE_INTRONS", true))
158 newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
160 String newtitle = String.format("%s %s %s",
161 dna ? MessageManager.getString("label.proteins")
162 : MessageManager.getString("label.nucleotides"),
163 MessageManager.getString("label.for"), alignFrame.getTitle());
164 newFrame.setTitle(newtitle);
166 if (copyAlignment == null)
169 * split frame display is turned off in preferences file
171 Desktop.addInternalFrame(newFrame, newtitle,
172 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
173 xrefViews.add(newFrame.alignPanel);
174 return; // via finally clause
177 AlignFrame copyThis = new AlignFrame(copyAlignment,
178 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
179 copyThis.setTitle(alignFrame.getTitle());
181 boolean showSequenceFeatures = alignFrame.getViewport()
182 .isShowSequenceFeatures();
183 newFrame.setShowSeqFeatures(showSequenceFeatures);
184 copyThis.setShowSeqFeatures(showSequenceFeatures);
185 FeatureRendererModel myFeatureStyling = alignFrame.alignPanel
186 .getSeqPanel().seqCanvas.getFeatureRenderer();
189 * copy feature rendering settings to split frame
191 FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
192 .getFeatureRenderer();
193 fr1.transferSettings(myFeatureStyling);
194 fr1.findAllFeatures(true);
195 FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
196 .getFeatureRenderer();
197 fr2.transferSettings(myFeatureStyling);
198 fr2.findAllFeatures(true);
201 * apply 'database source' feature configuration
202 * if any - first to the new splitframe view about to be displayed
205 newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
206 copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
209 * and for JAL-3330 also to original alignFrame view(s)
210 * this currently trashes any original settings.
212 for (AlignmentViewPanel origpanel : alignFrame.getAlignPanels())
214 origpanel.getAlignViewport()
215 .mergeFeaturesStyle(featureColourScheme);
218 SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
219 dna ? newFrame : copyThis);
221 newFrame.setVisible(true);
222 copyThis.setVisible(true);
223 String linkedTitle = MessageManager
224 .getString("label.linked_view_title");
225 Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
226 sf.adjustInitialLayout();
228 // finally add the top, then bottom frame to the view list
229 xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
230 xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
232 } catch (OutOfMemoryError e)
234 new OOMWarning("whilst fetching crossreferences", e);
235 } catch (Throwable e)
237 Console.error("Error when finding crossreferences", e);
240 alignFrame.setProgressBar(MessageManager.formatMessage(
241 "status.finished_searching_for_sequences_from", new Object[]
247 * Tries to add chromosomal coordinates to any nucleotide sequence which does
248 * not already have them. Coordinates are retrieved from Ensembl given an
249 * Ensembl identifier, either on the sequence itself or on a peptide sequence
250 * it has a reference to.
254 * - fetch EMBLCDS cross-references for Uniprot entry P30419
255 * - the EMBL sequences do not have xrefs to Ensembl
256 * - the Uniprot entry has xrefs to
257 * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
258 * - either of the transcript ids can be used to retrieve gene loci e.g.
259 * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
260 * Example (invertebrate):
261 * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
262 * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
263 * - can retrieve gene loci with
264 * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
269 public static void findGeneLoci(List<SequenceI> sequences)
271 Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
272 for (SequenceI seq : sequences)
274 findGeneLoci(seq, retrievedLoci);
279 * Tres to find chromosomal coordinates for the sequence, by searching its
280 * direct and indirect cross-references for Ensembl. If the loci have already
281 * been retrieved, just reads them out of the map of retrievedLoci; this is
282 * the case of an alternative transcript for the same protein. Otherwise calls
283 * a REST service to retrieve the loci, and if successful, adds them to the
284 * sequence and to the retrievedLoci.
287 * @param retrievedLoci
289 static void findGeneLoci(SequenceI seq,
290 Map<DBRefEntry, GeneLociI> retrievedLoci)
293 * don't replace any existing chromosomal coordinates
295 if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
296 || seq.getDBRefs() == null)
301 Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
304 * first look for direct dbrefs from sequence to Ensembl
306 String[] divisionsArray = ensemblDivisions
307 .toArray(new String[ensemblDivisions.size()]);
308 List<DBRefEntry> seqRefs = seq.getDBRefs();
309 List<DBRefEntry> directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
311 if (directEnsemblRefs != null)
313 for (DBRefEntry ensemblRef : directEnsemblRefs)
315 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
323 * else look for indirect dbrefs from sequence to Ensembl
325 for (DBRefEntry dbref : seq.getDBRefs())
327 if (dbref.getMap() != null && dbref.getMap().getTo() != null)
329 List<DBRefEntry> dbrefs = dbref.getMap().getTo().getDBRefs();
330 List<DBRefEntry> indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
332 if (indirectEnsemblRefs != null)
334 for (DBRefEntry ensemblRef : indirectEnsemblRefs)
336 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
347 * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
348 * identifier in dbref. If successful, and the sequence length matches gene
349 * loci length, then add it to the sequence, and to the retrievedLoci map.
350 * Answers true if successful, else false.
354 * @param retrievedLoci
357 static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
358 Map<DBRefEntry, GeneLociI> retrievedLoci)
360 String accession = dbref.getAccessionId();
361 String division = dbref.getSource();
364 * hack: ignore cross-references to Ensembl protein ids
365 * (or use map/translation perhaps?)
366 * todo: is there an equivalent in EnsemblGenomes?
368 if (accession.startsWith("ENSP"))
372 EnsemblMap mapper = new EnsemblMap();
375 * try CDS mapping first
377 GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
379 if (geneLoci != null)
381 MapList map = geneLoci.getMapping();
382 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
383 if (mappedFromLength == seq.getLength())
385 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
386 geneLoci.getChromosomeId(), map);
387 retrievedLoci.put(dbref, geneLoci);
393 * else try CDNA mapping
395 geneLoci = mapper.getCdnaMapping(division, accession, 1,
397 if (geneLoci != null)
399 MapList map = geneLoci.getMapping();
400 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
401 if (mappedFromLength == seq.getLength())
403 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
404 geneLoci.getChromosomeId(), map);
405 retrievedLoci.put(dbref, geneLoci);
418 * @param xrefsAlignment
421 protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
422 AlignmentI dataset, boolean dna, AlignmentI xrefs,
423 AlignmentI xrefsAlignment)
425 AlignmentI copyAlignment;
426 boolean copyAlignmentIsAligned = false;
429 copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
430 xrefsAlignment.getSequencesArray());
431 if (copyAlignment.getHeight() == 0)
433 JvOptionPane.showMessageDialog(alignFrame,
434 MessageManager.getString("label.cant_map_cds"),
435 MessageManager.getString("label.operation_failed"),
436 JvOptionPane.OK_OPTION);
437 jalview.bin.Console.errPrintln("Failed to make CDS alignment");
442 * pending getting Embl transcripts to 'align',
443 * we are only doing this for Ensembl
445 // TODO proper criteria for 'can align as cdna'
446 if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
447 || AlignmentUtils.looksLikeEnsembl(alignment))
449 copyAlignment.alignAs(alignment);
450 copyAlignmentIsAligned = true;
455 copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
456 xrefs.getSequencesArray(), dataset);
458 copyAlignment.setGapCharacter(alignFrame.viewport.getGapCharacter());
460 StructureSelectionManager ssm = StructureSelectionManager
461 .getStructureSelectionManager(Desktop.instance);
464 * register any new mappings for sequence mouseover etc
465 * (will not duplicate any previously registered mappings)
467 ssm.registerMappings(dataset.getCodonFrames());
469 if (copyAlignment.getHeight() <= 0)
472 .errPrintln("No Sequences generated for xRef type " + source);
477 * align protein to dna
479 if (dna && copyAlignmentIsAligned)
481 xrefsAlignment.alignAs(copyAlignment);
486 * align cdna to protein - currently only if
487 * fetching and aligning Ensembl transcripts!
489 // TODO: generalise for other sources of locus/transcript/cds data
490 if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
492 copyAlignment.alignAs(xrefsAlignment);
496 return copyAlignment;
500 * Makes an alignment containing the given sequences, and adds them to the
501 * given dataset, which is also set as the dataset for the new alignment
503 * TODO: refactor to DatasetI method
509 protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
512 SequenceI[] sprods = new SequenceI[seqs.getHeight()];
513 for (int s = 0; s < sprods.length; s++)
515 sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
516 if (dataset.getSequences() == null || !dataset.getSequences()
517 .contains(sprods[s].getDatasetSequence()))
519 dataset.addSequence(sprods[s].getDatasetSequence());
521 sprods[s].updatePDBIds();
523 Alignment al = new Alignment(sprods);
524 al.setDataset(dataset);
536 CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
539 this.alignFrame = af;
541 this._odna = fromDna;
542 this.source = dbSource;
545 public static CrossRefAction getHandlerFor(final SequenceI[] sel,
546 final boolean fromDna, final String source,
547 final AlignFrame alignFrame)
549 return new CrossRefAction(alignFrame, sel, fromDna, source);