X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fgui%2FCrossRefAction.java;h=51ac2eebb7cf7747dd938fcb225af90bc393c833;hb=cbeb7ad59d51b468c54ca3db2a2a7693060a2509;hp=f0b0891615d8fc71ae0b3aaa90c833d3c2496036;hpb=72d146eb50571b97a0f1190cbfa01d5d90428764;p=jalview.git diff --git a/src/jalview/gui/CrossRefAction.java b/src/jalview/gui/CrossRefAction.java index f0b0891..51ac2ee 100644 --- a/src/jalview/gui/CrossRefAction.java +++ b/src/jalview/gui/CrossRefAction.java @@ -27,17 +27,25 @@ import jalview.api.FeatureSettingsModelI; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.EnsemblInfo; +import jalview.ext.ensembl.EnsemblMap; import jalview.io.gff.SequenceOntologyI; import jalview.structure.StructureSelectionManager; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.util.MappingUtils; import jalview.util.MessageManager; import jalview.ws.SequenceFetcher; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; - -import javax.swing.JOptionPane; +import java.util.Map; +import java.util.Set; /** * Factory constructor and runnable for discovering and displaying @@ -52,13 +60,13 @@ public class CrossRefAction implements Runnable private SequenceI[] sel; - private boolean _odna; + private final boolean _odna; private String source; - List xrefViews = new ArrayList(); + List xrefViews = new ArrayList<>(); - public List getXrefViews() + List getXrefViews() { return xrefViews; } @@ -67,10 +75,9 @@ public class CrossRefAction implements Runnable public void run() { final long sttime = System.currentTimeMillis(); - alignFrame.setProgressBar( - MessageManager.formatMessage( - "status.searching_for_sequences_from", - new Object[] { source }), sttime); + alignFrame.setProgressBar(MessageManager.formatMessage( + "status.searching_for_sequences_from", new Object[] + { source }), sttime); try { AlignmentI alignment = alignFrame.getViewport().getAlignment(); @@ -85,12 +92,19 @@ public class CrossRefAction implements Runnable + " now searching for " + (dna ? "DNA" : "Protein") + " Context."); } - AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences( - source, dna); + AlignmentI xrefs = new CrossRef(sel, dataset) + .findXrefSequences(source, dna); if (xrefs == null) { return; } + + /* + * try to look up chromosomal coordinates for nucleotide + * sequences (if not already retrieved) + */ + findGeneLoci(xrefs.getSequences()); + /* * get display scheme (if any) to apply to features */ @@ -114,75 +128,14 @@ public class CrossRefAction implements Runnable if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) { - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, - xrefsAlignment.getSequencesArray()); - if (copyAlignment.getHeight() == 0) - { - JOptionPane.showMessageDialog(alignFrame, - MessageManager.getString("label.cant_map_cds"), - MessageManager.getString("label.operation_failed"), - JOptionPane.OK_OPTION); - System.err.println("Failed to make CDS alignment"); - } - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else + copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna, + xrefs, xrefsAlignment); + if (copyAlignment == null) { - copyAlignment = AlignmentUtils.makeCopyAlignment(sel, - xrefs.getSequencesArray(), dataset); - } - copyAlignment - .setGapCharacter(alignFrame.viewport.getGapCharacter()); - - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - - /* - * register any new mappings for sequence mouseover etc - * (will not duplicate any previously registered mappings) - */ - ssm.registerMappings(dataset.getCodonFrames()); - - if (copyAlignment.getHeight() <= 0) - { - System.err.println("No Sequences generated for xRef type " - + source); - return; - } - /* - * align protein to dna - */ - if (dna && copyAlignmentIsAligned) - { - xrefsAlignment.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - // TODO: generalise for other sources of locus/transcript/cds data - if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(xrefsAlignment); - } + return; // failed } } + /* * build AlignFrame(s) according to available alignment data */ @@ -192,8 +145,9 @@ public class CrossRefAction implements Runnable { newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); } - String newtitle = String.format("%s %s %s", MessageManager - .getString(dna ? "label.proteins" : "label.nucleotides"), + String newtitle = String.format("%s %s %s", + dna ? MessageManager.getString("label.proteins") + : MessageManager.getString("label.nucleotides"), MessageManager.getString("label.for"), alignFrame.getTitle()); newFrame.setTitle(newtitle); @@ -207,6 +161,7 @@ public class CrossRefAction implements Runnable xrefViews.add(newFrame.alignPanel); return; // via finally clause } + AlignFrame copyThis = new AlignFrame(copyAlignment, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); copyThis.setTitle(alignFrame.getTitle()); @@ -221,10 +176,14 @@ public class CrossRefAction implements Runnable /* * copy feature rendering settings to split frame */ - newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() - .transferSettings(myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() - .transferSettings(myFeatureStyling); + FeatureRenderer fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); + fr1.transferSettings(myFeatureStyling); + fr1.findAllFeatures(true); + FeatureRenderer fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); + fr2.transferSettings(myFeatureStyling); + fr2.findAllFeatures(true); /* * apply 'database source' feature configuration @@ -242,7 +201,7 @@ public class CrossRefAction implements Runnable String linkedTitle = MessageManager .getString("label.linked_view_title"); Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); + sf.adjustInitialLayout(); // finally add the top, then bottom frame to the view list xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel); @@ -257,12 +216,266 @@ public class CrossRefAction implements Runnable } finally { alignFrame.setProgressBar(MessageManager.formatMessage( - "status.finished_searching_for_sequences_from", - new Object[] { source }), sttime); + "status.finished_searching_for_sequences_from", new Object[] + { source }), sttime); } } /** + * Tries to add chromosomal coordinates to any nucleotide sequence which does + * not already have them. Coordinates are retrieved from Ensembl given an + * Ensembl identifier, either on the sequence itself or on a peptide sequence + * it has a reference to. + * + *
+   * Example (human):
+   * - fetch EMBLCDS cross-references for Uniprot entry P30419
+   * - the EMBL sequences do not have xrefs to Ensembl
+   * - the Uniprot entry has xrefs to 
+   *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
+   * - either of the transcript ids can be used to retrieve gene loci e.g.
+   *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
+   * Example (invertebrate):
+   * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
+   * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
+   * - can retrieve gene loci with
+   *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
+   * 
+ * + * @param sequences + */ + public static void findGeneLoci(List sequences) + { + Map retrievedLoci = new HashMap<>(); + for (SequenceI seq : sequences) + { + findGeneLoci(seq, retrievedLoci); + } + } + + /** + * Tres to find chromosomal coordinates for the sequence, by searching its + * direct and indirect cross-references for Ensembl. If the loci have already + * been retrieved, just reads them out of the map of retrievedLoci; this is + * the case of an alternative transcript for the same protein. Otherwise calls + * a REST service to retrieve the loci, and if successful, adds them to the + * sequence and to the retrievedLoci. + * + * @param seq + * @param retrievedLoci + */ + static void findGeneLoci(SequenceI seq, + Map retrievedLoci) + { + /* + * don't replace any existing chromosomal coordinates + */ + if (seq == null || seq.isProtein() || seq.getGeneLoci() != null + || seq.getDBRefs() == null) + { + return; + } + + Set ensemblDivisions = new EnsemblInfo().getDivisions(); + + /* + * first look for direct dbrefs from sequence to Ensembl + */ + String[] divisionsArray = ensemblDivisions + .toArray(new String[ensemblDivisions.size()]); + DBRefEntry[] seqRefs = seq.getDBRefs(); + DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs, + divisionsArray); + if (directEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : directEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + + /* + * else look for indirect dbrefs from sequence to Ensembl + */ + for (DBRefEntry dbref : seq.getDBRefs()) + { + if (dbref.getMap() != null && dbref.getMap().getTo() != null) + { + DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs(); + DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs, + divisionsArray); + if (indirectEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : indirectEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + } + } + } + + /** + * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes) + * identifier in dbref. If successful, and the sequence length matches gene + * loci length, then add it to the sequence, and to the retrievedLoci map. + * Answers true if successful, else false. + * + * @param seq + * @param dbref + * @param retrievedLoci + * @return + */ + static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref, + Map retrievedLoci) + { + String accession = dbref.getAccessionId(); + String division = dbref.getSource(); + + /* + * hack: ignore cross-references to Ensembl protein ids + * (or use map/translation perhaps?) + * todo: is there an equivalent in EnsemblGenomes? + */ + if (accession.startsWith("ENSP")) + { + return false; + } + EnsemblMap mapper = new EnsemblMap(); + + /* + * try CDS mapping first + */ + GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMapping(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), map); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + /* + * else try CDNA mapping + */ + geneLoci = mapper.getCdnaMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMapping(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), map); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + return false; + } + + /** + * @param alignment + * @param dataset + * @param dna + * @param xrefs + * @param xrefsAlignment + * @return + */ + protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment, + AlignmentI dataset, boolean dna, AlignmentI xrefs, + AlignmentI xrefsAlignment) + { + AlignmentI copyAlignment; + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, + xrefsAlignment.getSequencesArray()); + if (copyAlignment.getHeight() == 0) + { + JvOptionPane.showMessageDialog(alignFrame, + MessageManager.getString("label.cant_map_cds"), + MessageManager.getString("label.operation_failed"), + JvOptionPane.OK_OPTION); + System.err.println("Failed to make CDS alignment"); + return null; + } + + /* + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl + */ + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) + { + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; + } + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment(sel, + xrefs.getSequencesArray(), dataset); + } + copyAlignment + .setGapCharacter(alignFrame.viewport.getGapCharacter()); + + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println( + "No Sequences generated for xRef type " + source); + return null; + } + + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { + /* + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! + */ + // TODO: generalise for other sources of locus/transcript/cds data + if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + { + copyAlignment.alignAs(xrefsAlignment); + } + } + + return copyAlignment; + } + + /** * Makes an alignment containing the given sequences, and adds them to the * given dataset, which is also set as the dataset for the new alignment * @@ -279,9 +492,8 @@ public class CrossRefAction implements Runnable for (int s = 0; s < sprods.length; s++) { sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); - if (dataset.getSequences() == null - || !dataset.getSequences().contains( - sprods[s].getDatasetSequence())) + if (dataset.getSequences() == null || !dataset.getSequences() + .contains(sprods[s].getDatasetSequence())) { dataset.addSequence(sprods[s].getDatasetSequence()); } @@ -292,20 +504,28 @@ public class CrossRefAction implements Runnable return al; } - public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel, - boolean _odna, String source) + /** + * Constructor + * + * @param af + * @param seqs + * @param fromDna + * @param dbSource + */ + CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna, + String dbSource) { - this.alignFrame = alignFrame; - this.sel = sel; - this._odna = _odna; - this.source = source; + this.alignFrame = af; + this.sel = seqs; + this._odna = fromDna; + this.source = dbSource; } - public static CrossRefAction showProductsFor(final SequenceI[] sel, - final boolean _odna, final String source, + public static CrossRefAction getHandlerFor(final SequenceI[] sel, + final boolean fromDna, final String source, final AlignFrame alignFrame) { - return new CrossRefAction(alignFrame, sel, _odna, source); + return new CrossRefAction(alignFrame, sel, fromDna, source); } }