2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentUtils;
24 import jalview.analysis.CrossRef;
25 import jalview.api.AlignmentViewPanel;
26 import jalview.api.FeatureSettingsModelI;
27 import jalview.bin.Cache;
28 import jalview.datamodel.Alignment;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.DBRefEntry;
31 import jalview.datamodel.DBRefSource;
32 import jalview.datamodel.GeneLociI;
33 import jalview.datamodel.SequenceI;
34 import jalview.ext.ensembl.EnsemblInfo;
35 import jalview.ext.ensembl.EnsemblMap;
36 import jalview.io.gff.SequenceOntologyI;
37 import jalview.structure.StructureSelectionManager;
38 import jalview.util.DBRefUtils;
39 import jalview.util.MapList;
40 import jalview.util.MappingUtils;
41 import jalview.util.MessageManager;
42 import jalview.ws.SequenceFetcher;
44 import java.util.ArrayList;
45 import java.util.HashMap;
46 import java.util.List;
51 * Factory constructor and runnable for discovering and displaying
52 * cross-references for a set of aligned sequences
57 public class CrossRefAction implements Runnable
59 private AlignFrame alignFrame;
61 private SequenceI[] sel;
63 private final boolean _odna;
65 private String source;
67 List<AlignmentViewPanel> xrefViews = new ArrayList<>();
69 List<AlignmentViewPanel> getXrefViews()
77 final long sttime = System.currentTimeMillis();
78 alignFrame.setProgressBar(MessageManager.formatMessage(
79 "status.searching_for_sequences_from", new Object[]
83 AlignmentI alignment = alignFrame.getViewport().getAlignment();
84 AlignmentI dataset = alignment.getDataset() == null ? alignment
85 : alignment.getDataset();
86 boolean dna = alignment.isNucleotide();
90 .println("Conflict: showProducts for alignment originally "
91 + "thought to be " + (_odna ? "DNA" : "Protein")
92 + " now searching for " + (dna ? "DNA" : "Protein")
95 AlignmentI xrefs = new CrossRef(sel, dataset)
96 .findXrefSequences(source, dna);
103 * try to look up chromosomal coordinates for nucleotide
104 * sequences (if not already retrieved)
106 findGeneLoci(xrefs.getSequences());
109 * get display scheme (if any) to apply to features
111 FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
112 .getFeatureColourScheme(source);
114 AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
118 xrefsAlignment = AlignmentUtils.makeCdsAlignment(
119 xrefsAlignment.getSequencesArray(), dataset, sel);
120 xrefsAlignment.alignAs(alignment);
124 * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
125 * sequences). If we are DNA, drop introns and update mappings
127 AlignmentI copyAlignment = null;
129 if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
131 copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
132 xrefs, xrefsAlignment);
133 if (copyAlignment == null)
140 * build AlignFrame(s) according to available alignment data
142 AlignFrame newFrame = new AlignFrame(xrefsAlignment,
143 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
144 if (Cache.getDefault("HIDE_INTRONS", true))
146 newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
148 String newtitle = String.format("%s %s %s",
149 dna ? MessageManager.getString("label.proteins")
150 : MessageManager.getString("label.nucleotides"),
151 MessageManager.getString("label.for"), alignFrame.getTitle());
152 newFrame.setTitle(newtitle);
154 if (copyAlignment == null)
157 * split frame display is turned off in preferences file
159 Desktop.addInternalFrame(newFrame, newtitle,
160 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
161 xrefViews.add(newFrame.alignPanel);
162 return; // via finally clause
165 AlignFrame copyThis = new AlignFrame(copyAlignment,
166 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
167 copyThis.setTitle(alignFrame.getTitle());
169 boolean showSequenceFeatures = alignFrame.getViewport()
170 .isShowSequenceFeatures();
171 newFrame.setShowSeqFeatures(showSequenceFeatures);
172 copyThis.setShowSeqFeatures(showSequenceFeatures);
173 FeatureRenderer myFeatureStyling = alignFrame.alignPanel
174 .getSeqPanel().seqCanvas.getFeatureRenderer();
177 * copy feature rendering settings to split frame
179 FeatureRenderer fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
180 .getFeatureRenderer();
181 fr1.transferSettings(myFeatureStyling);
182 fr1.findAllFeatures(true);
183 FeatureRenderer fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
184 .getFeatureRenderer();
185 fr2.transferSettings(myFeatureStyling);
186 fr2.findAllFeatures(true);
189 * apply 'database source' feature configuration
192 // TODO is this the feature colouring for the original
193 // alignment or the fetched xrefs? either could be Ensembl
194 newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
195 copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
197 SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
198 dna ? newFrame : copyThis);
199 newFrame.setVisible(true);
200 copyThis.setVisible(true);
201 String linkedTitle = MessageManager
202 .getString("label.linked_view_title");
203 Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
204 sf.adjustInitialLayout();
206 // finally add the top, then bottom frame to the view list
207 xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
208 xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
210 } catch (OutOfMemoryError e)
212 new OOMWarning("whilst fetching crossreferences", e);
213 } catch (Throwable e)
215 Cache.log.error("Error when finding crossreferences", e);
218 alignFrame.setProgressBar(MessageManager.formatMessage(
219 "status.finished_searching_for_sequences_from", new Object[]
220 { source }), sttime);
225 * Tries to add chromosomal coordinates to any nucleotide sequence which does
226 * not already have them. Coordinates are retrieved from Ensembl given an
227 * Ensembl identifier, either on the sequence itself or on a peptide sequence
228 * it has a reference to.
232 * - fetch EMBLCDS cross-references for Uniprot entry P30419
233 * - the EMBL sequences do not have xrefs to Ensembl
234 * - the Uniprot entry has xrefs to
235 * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
236 * - either of the transcript ids can be used to retrieve gene loci e.g.
237 * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
238 * Example (invertebrate):
239 * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
240 * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
241 * - can retrieve gene loci with
242 * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
247 public static void findGeneLoci(List<SequenceI> sequences)
249 Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
250 for (SequenceI seq : sequences)
252 findGeneLoci(seq, retrievedLoci);
257 * Tres to find chromosomal coordinates for the sequence, by searching its
258 * direct and indirect cross-references for Ensembl. If the loci have already
259 * been retrieved, just reads them out of the map of retrievedLoci; this is
260 * the case of an alternative transcript for the same protein. Otherwise calls
261 * a REST service to retrieve the loci, and if successful, adds them to the
262 * sequence and to the retrievedLoci.
265 * @param retrievedLoci
267 static void findGeneLoci(SequenceI seq,
268 Map<DBRefEntry, GeneLociI> retrievedLoci)
271 * don't replace any existing chromosomal coordinates
273 if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
274 || seq.getDBRefs() == null)
279 Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
282 * first look for direct dbrefs from sequence to Ensembl
284 String[] divisionsArray = ensemblDivisions
285 .toArray(new String[ensemblDivisions.size()]);
286 DBRefEntry[] seqRefs = seq.getDBRefs();
287 DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
289 if (directEnsemblRefs != null)
291 for (DBRefEntry ensemblRef : directEnsemblRefs)
293 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
301 * else look for indirect dbrefs from sequence to Ensembl
303 for (DBRefEntry dbref : seq.getDBRefs())
305 if (dbref.getMap() != null && dbref.getMap().getTo() != null)
307 DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs();
308 DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
310 if (indirectEnsemblRefs != null)
312 for (DBRefEntry ensemblRef : indirectEnsemblRefs)
314 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
325 * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
326 * identifier in dbref. If successful, and the sequence length matches gene
327 * loci length, then add it to the sequence, and to the retrievedLoci map.
328 * Answers true if successful, else false.
332 * @param retrievedLoci
335 static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
336 Map<DBRefEntry, GeneLociI> retrievedLoci)
338 String accession = dbref.getAccessionId();
339 String division = dbref.getSource();
342 * hack: ignore cross-references to Ensembl protein ids
343 * (or use map/translation perhaps?)
344 * todo: is there an equivalent in EnsemblGenomes?
346 if (accession.startsWith("ENSP"))
350 EnsemblMap mapper = new EnsemblMap();
353 * try CDS mapping first
355 GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
357 if (geneLoci != null)
359 MapList map = geneLoci.getMap();
360 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
361 if (mappedFromLength == seq.getLength())
363 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
364 geneLoci.getChromosomeId(), geneLoci.getMap());
365 retrievedLoci.put(dbref, geneLoci);
371 * else try CDNA mapping
373 geneLoci = mapper.getCdnaMapping(division, accession, 1,
375 if (geneLoci != null)
377 MapList map = geneLoci.getMap();
378 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
379 if (mappedFromLength == seq.getLength())
381 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
382 geneLoci.getChromosomeId(), geneLoci.getMap());
383 retrievedLoci.put(dbref, geneLoci);
396 * @param xrefsAlignment
399 protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
400 AlignmentI dataset, boolean dna, AlignmentI xrefs,
401 AlignmentI xrefsAlignment)
403 AlignmentI copyAlignment;
404 boolean copyAlignmentIsAligned = false;
407 copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
408 xrefsAlignment.getSequencesArray());
409 if (copyAlignment.getHeight() == 0)
411 JvOptionPane.showMessageDialog(alignFrame,
412 MessageManager.getString("label.cant_map_cds"),
413 MessageManager.getString("label.operation_failed"),
414 JvOptionPane.OK_OPTION);
415 System.err.println("Failed to make CDS alignment");
420 * pending getting Embl transcripts to 'align',
421 * we are only doing this for Ensembl
423 // TODO proper criteria for 'can align as cdna'
424 if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
425 || AlignmentUtils.looksLikeEnsembl(alignment))
427 copyAlignment.alignAs(alignment);
428 copyAlignmentIsAligned = true;
433 copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
434 xrefs.getSequencesArray(), dataset);
437 .setGapCharacter(alignFrame.viewport.getGapCharacter());
439 StructureSelectionManager ssm = StructureSelectionManager
440 .getStructureSelectionManager(Desktop.instance);
443 * register any new mappings for sequence mouseover etc
444 * (will not duplicate any previously registered mappings)
446 ssm.registerMappings(dataset.getCodonFrames());
448 if (copyAlignment.getHeight() <= 0)
451 "No Sequences generated for xRef type " + source);
456 * align protein to dna
458 if (dna && copyAlignmentIsAligned)
460 xrefsAlignment.alignAs(copyAlignment);
465 * align cdna to protein - currently only if
466 * fetching and aligning Ensembl transcripts!
468 // TODO: generalise for other sources of locus/transcript/cds data
469 if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
471 copyAlignment.alignAs(xrefsAlignment);
475 return copyAlignment;
479 * Makes an alignment containing the given sequences, and adds them to the
480 * given dataset, which is also set as the dataset for the new alignment
482 * TODO: refactor to DatasetI method
488 protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
491 SequenceI[] sprods = new SequenceI[seqs.getHeight()];
492 for (int s = 0; s < sprods.length; s++)
494 sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
495 if (dataset.getSequences() == null || !dataset.getSequences()
496 .contains(sprods[s].getDatasetSequence()))
498 dataset.addSequence(sprods[s].getDatasetSequence());
500 sprods[s].updatePDBIds();
502 Alignment al = new Alignment(sprods);
503 al.setDataset(dataset);
515 CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
518 this.alignFrame = af;
520 this._odna = fromDna;
521 this.source = dbSource;
524 public static CrossRefAction getHandlerFor(final SequenceI[] sel,
525 final boolean fromDna, final String source,
526 final AlignFrame alignFrame)
528 return new CrossRefAction(alignFrame, sel, fromDna, source);