src/jalview/gui/CrossRefAction.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.gui;
  22
  23 import java.util.ArrayList;
  24 import java.util.HashMap;
  25 import java.util.List;
  26 import java.util.Map;
  27 import java.util.Set;
  28
  29 import jalview.analysis.AlignmentUtils;
  30 import jalview.analysis.CrossRef;
  31 import jalview.api.AlignmentViewPanel;
  32 import jalview.api.FeatureSettingsModelI;
  33 import jalview.bin.Cache;
  34 import jalview.bin.Console;
  35 import jalview.datamodel.Alignment;
  36 import jalview.datamodel.AlignmentI;
  37 import jalview.datamodel.DBRefEntry;
  38 import jalview.datamodel.DBRefSource;
  39 import jalview.datamodel.GeneLociI;
  40 import jalview.datamodel.SequenceI;
  41 import jalview.ext.ensembl.EnsemblInfo;
  42 import jalview.ext.ensembl.EnsemblMap;
  43 import jalview.io.gff.SequenceOntologyI;
  44 import jalview.structure.StructureSelectionManager;
  45 import jalview.util.DBRefUtils;
  46 import jalview.util.IdUtils;
  47 import jalview.util.IdUtils.IdType;
  48 import jalview.util.MapList;
  49 import jalview.util.MappingUtils;
  50 import jalview.util.MessageManager;
  51 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
  52 import jalview.ws.SequenceFetcher;
  53
  54 /**
  55  * Factory constructor and runnable for discovering and displaying
  56  * cross-references for a set of aligned sequences
  57  *
  58  * @author jprocter
  59  *
  60  */
  61 public class CrossRefAction implements Runnable
  62 {
  63   private AlignFrame alignFrame;
  64
  65   private SequenceI[] sel;
  66
  67   private final boolean _odna;
  68
  69   private String source;
  70
  71   List<AlignmentViewPanel> xrefViews = new ArrayList<>();
  72
  73   List<AlignmentViewPanel> getXrefViews()
  74   {
  75     return xrefViews;
  76   }
  77
  78   @Override
  79   public void run()
  80   {
  81     final long id = IdUtils.newId(IdType.PROGRESS);
  82     alignFrame.setProgressBar(MessageManager.formatMessage(
  83             "status.searching_for_sequences_from", new Object[]
  84             { source }), id);
  85     try
  86     {
  87       AlignmentI alignment = alignFrame.getViewport().getAlignment();
  88       AlignmentI dataset = alignment.getDataset() == null ? alignment
  89               : alignment.getDataset();
  90       boolean dna = alignment.isNucleotide();
  91       if (_odna != dna)
  92       {
  93         System.err
  94                 .println("Conflict: showProducts for alignment originally "
  95                         + "thought to be " + (_odna ? "DNA" : "Protein")
  96                         + " now searching for " + (dna ? "DNA" : "Protein")
  97                         + " Context.");
  98       }
  99       AlignmentI xrefs = new CrossRef(sel, dataset)
 100               .findXrefSequences(source, dna);
 101       if (xrefs == null)
 102       {
 103         return;
 104       }
 105
 106       /*
 107        * try to look up chromosomal coordinates for nucleotide
 108        * sequences (if not already retrieved)
 109        */
 110       findGeneLoci(xrefs.getSequences());
 111
 112       /*
 113        * get display scheme (if any) to apply to features
 114        */
 115       FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
 116               .getFeatureColourScheme(source);
 117
 118       if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
 119       {
 120         // override default featureColourScheme so products have Ensembl variant
 121         // colours
 122         featureColourScheme = new SequenceFetcher()
 123                 .getFeatureColourScheme(DBRefSource.ENSEMBL);
 124       }
 125
 126       AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
 127               xrefs);
 128       if (!dna)
 129       {
 130         xrefsAlignment = AlignmentUtils.makeCdsAlignment(
 131                 xrefsAlignment.getSequencesArray(), dataset, sel);
 132         xrefsAlignment.alignAs(alignment);
 133       }
 134
 135       /*
 136        * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
 137        * sequences). If we are DNA, drop introns and update mappings
 138        */
 139       AlignmentI copyAlignment = null;
 140
 141       if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
 142       {
 143         copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
 144                 xrefs, xrefsAlignment);
 145         if (copyAlignment == null)
 146         {
 147           return; // failed
 148         }
 149       }
 150
 151       /*
 152        * build AlignFrame(s) according to available alignment data
 153        */
 154       AlignFrame newFrame = new AlignFrame(xrefsAlignment,
 155               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 156       if (Cache.getDefault("HIDE_INTRONS", true))
 157       {
 158         newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
 159       }
 160       String newtitle = String.format("%s %s %s",
 161               dna ? MessageManager.getString("label.proteins")
 162                       : MessageManager.getString("label.nucleotides"),
 163               MessageManager.getString("label.for"), alignFrame.getTitle());
 164       newFrame.setTitle(newtitle);
 165
 166       if (copyAlignment == null)
 167       {
 168         /*
 169          * split frame display is turned off in preferences file
 170          */
 171         Desktop.addInternalFrame(newFrame, newtitle,
 172                 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 173         xrefViews.add(newFrame.alignPanel);
 174         return; // via finally clause
 175       }
 176
 177       AlignFrame copyThis = new AlignFrame(copyAlignment,
 178               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 179       copyThis.setTitle(alignFrame.getTitle());
 180
 181       boolean showSequenceFeatures = alignFrame.getViewport()
 182               .isShowSequenceFeatures();
 183       newFrame.setShowSeqFeatures(showSequenceFeatures);
 184       copyThis.setShowSeqFeatures(showSequenceFeatures);
 185       FeatureRendererModel myFeatureStyling = alignFrame.alignPanel
 186               .getSeqPanel().seqCanvas.getFeatureRenderer();
 187
 188       /*
 189        * copy feature rendering settings to split frame
 190        */
 191       FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
 192               .getFeatureRenderer();
 193       fr1.transferSettings(myFeatureStyling);
 194       fr1.findAllFeatures(true);
 195       FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
 196               .getFeatureRenderer();
 197       fr2.transferSettings(myFeatureStyling);
 198       fr2.findAllFeatures(true);
 199
 200       /*
 201        * apply 'database source' feature configuration
 202        * if any - first to the new splitframe view about to be displayed
 203        */
 204
 205       newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
 206       copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
 207
 208       /*
 209        * and for JAL-3330 also to original alignFrame view(s)
 210        * this currently trashes any original settings.
 211        */
 212       for (AlignmentViewPanel origpanel : alignFrame.getAlignPanels())
 213       {
 214         origpanel.getAlignViewport()
 215                 .mergeFeaturesStyle(featureColourScheme);
 216       }
 217
 218       SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
 219               dna ? newFrame : copyThis);
 220
 221       newFrame.setVisible(true);
 222       copyThis.setVisible(true);
 223       String linkedTitle = MessageManager
 224               .getString("label.linked_view_title");
 225       Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
 226       sf.adjustInitialLayout();
 227
 228       // finally add the top, then bottom frame to the view list
 229       xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
 230       xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
 231
 232     } catch (OutOfMemoryError e)
 233     {
 234       new OOMWarning("whilst fetching crossreferences", e);
 235     } catch (Throwable e)
 236     {
 237       Console.error("Error when finding crossreferences", e);
 238     } finally
 239     {
 240       alignFrame.setProgressBar(MessageManager.formatMessage(
 241               "status.finished_searching_for_sequences_from", new Object[]
 242               { source }), id);
 243     }
 244   }
 245
 246   /**
 247    * Tries to add chromosomal coordinates to any nucleotide sequence which does
 248    * not already have them. Coordinates are retrieved from Ensembl given an
 249    * Ensembl identifier, either on the sequence itself or on a peptide sequence
 250    * it has a reference to.
 251    *
 252    * <pre>
 253    * Example (human):
 254    * - fetch EMBLCDS cross-references for Uniprot entry P30419
 255    * - the EMBL sequences do not have xrefs to Ensembl
 256    * - the Uniprot entry has xrefs to
 257    *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
 258    * - either of the transcript ids can be used to retrieve gene loci e.g.
 259    *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
 260    * Example (invertebrate):
 261    * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
 262    * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
 263    * - can retrieve gene loci with
 264    *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
 265    * </pre>
 266    *
 267    * @param sequences
 268    */
 269   public static void findGeneLoci(List<SequenceI> sequences)
 270   {
 271     Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
 272     for (SequenceI seq : sequences)
 273     {
 274       findGeneLoci(seq, retrievedLoci);
 275     }
 276   }
 277
 278   /**
 279    * Tres to find chromosomal coordinates for the sequence, by searching its
 280    * direct and indirect cross-references for Ensembl. If the loci have already
 281    * been retrieved, just reads them out of the map of retrievedLoci; this is
 282    * the case of an alternative transcript for the same protein. Otherwise calls
 283    * a REST service to retrieve the loci, and if successful, adds them to the
 284    * sequence and to the retrievedLoci.
 285    *
 286    * @param seq
 287    * @param retrievedLoci
 288    */
 289   static void findGeneLoci(SequenceI seq,
 290           Map<DBRefEntry, GeneLociI> retrievedLoci)
 291   {
 292     /*
 293      * don't replace any existing chromosomal coordinates
 294      */
 295     if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
 296             || seq.getDBRefs() == null)
 297     {
 298       return;
 299     }
 300
 301     Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
 302
 303     /*
 304      * first look for direct dbrefs from sequence to Ensembl
 305      */
 306     String[] divisionsArray = ensemblDivisions
 307             .toArray(new String[ensemblDivisions.size()]);
 308     List<DBRefEntry> seqRefs = seq.getDBRefs();
 309     List<DBRefEntry> directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
 310             divisionsArray);
 311     if (directEnsemblRefs != null)
 312     {
 313       for (DBRefEntry ensemblRef : directEnsemblRefs)
 314       {
 315         if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 316         {
 317           return;
 318         }
 319       }
 320     }
 321
 322     /*
 323      * else look for indirect dbrefs from sequence to Ensembl
 324      */
 325     for (DBRefEntry dbref : seq.getDBRefs())
 326     {
 327       if (dbref.getMap() != null && dbref.getMap().getTo() != null)
 328       {
 329         List<DBRefEntry> dbrefs = dbref.getMap().getTo().getDBRefs();
 330         List<DBRefEntry> indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
 331                 divisionsArray);
 332         if (indirectEnsemblRefs != null)
 333         {
 334           for (DBRefEntry ensemblRef : indirectEnsemblRefs)
 335           {
 336             if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 337             {
 338               return;
 339             }
 340           }
 341         }
 342       }
 343     }
 344   }
 345
 346   /**
 347    * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
 348    * identifier in dbref. If successful, and the sequence length matches gene
 349    * loci length, then add it to the sequence, and to the retrievedLoci map.
 350    * Answers true if successful, else false.
 351    *
 352    * @param seq
 353    * @param dbref
 354    * @param retrievedLoci
 355    * @return
 356    */
 357   static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
 358           Map<DBRefEntry, GeneLociI> retrievedLoci)
 359   {
 360     String accession = dbref.getAccessionId();
 361     String division = dbref.getSource();
 362
 363     /*
 364      * hack: ignore cross-references to Ensembl protein ids
 365      * (or use map/translation perhaps?)
 366      * todo: is there an equivalent in EnsemblGenomes?
 367      */
 368     if (accession.startsWith("ENSP"))
 369     {
 370       return false;
 371     }
 372     EnsemblMap mapper = new EnsemblMap();
 373
 374     /*
 375      * try CDS mapping first
 376      */
 377     GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
 378             seq.getLength());
 379     if (geneLoci != null)
 380     {
 381       MapList map = geneLoci.getMapping();
 382       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 383       if (mappedFromLength == seq.getLength())
 384       {
 385         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 386                 geneLoci.getChromosomeId(), map);
 387         retrievedLoci.put(dbref, geneLoci);
 388         return true;
 389       }
 390     }
 391
 392     /*
 393      * else try CDNA mapping
 394      */
 395     geneLoci = mapper.getCdnaMapping(division, accession, 1,
 396             seq.getLength());
 397     if (geneLoci != null)
 398     {
 399       MapList map = geneLoci.getMapping();
 400       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 401       if (mappedFromLength == seq.getLength())
 402       {
 403         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 404                 geneLoci.getChromosomeId(), map);
 405         retrievedLoci.put(dbref, geneLoci);
 406         return true;
 407       }
 408     }
 409
 410     return false;
 411   }
 412
 413   /**
 414    * @param alignment
 415    * @param dataset
 416    * @param dna
 417    * @param xrefs
 418    * @param xrefsAlignment
 419    * @return
 420    */
 421   protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
 422           AlignmentI dataset, boolean dna, AlignmentI xrefs,
 423           AlignmentI xrefsAlignment)
 424   {
 425     AlignmentI copyAlignment;
 426     boolean copyAlignmentIsAligned = false;
 427     if (dna)
 428     {
 429       copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
 430               xrefsAlignment.getSequencesArray());
 431       if (copyAlignment.getHeight() == 0)
 432       {
 433         JvOptionPane.showMessageDialog(alignFrame,
 434                 MessageManager.getString("label.cant_map_cds"),
 435                 MessageManager.getString("label.operation_failed"),
 436                 JvOptionPane.OK_OPTION);
 437         jalview.bin.Console.errPrintln("Failed to make CDS alignment");
 438         return null;
 439       }
 440
 441       /*
 442        * pending getting Embl transcripts to 'align',
 443        * we are only doing this for Ensembl
 444        */
 445       // TODO proper criteria for 'can align as cdna'
 446       if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
 447               || AlignmentUtils.looksLikeEnsembl(alignment))
 448       {
 449         copyAlignment.alignAs(alignment);
 450         copyAlignmentIsAligned = true;
 451       }
 452     }
 453     else
 454     {
 455       copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
 456               xrefs.getSequencesArray(), dataset);
 457     }
 458     copyAlignment.setGapCharacter(alignFrame.viewport.getGapCharacter());
 459
 460     StructureSelectionManager ssm = StructureSelectionManager
 461             .getStructureSelectionManager(Desktop.instance);
 462
 463     /*
 464      * register any new mappings for sequence mouseover etc
 465      * (will not duplicate any previously registered mappings)
 466      */
 467     ssm.registerMappings(dataset.getCodonFrames());
 468
 469     if (copyAlignment.getHeight() <= 0)
 470     {
 471       jalview.bin.Console
 472               .errPrintln("No Sequences generated for xRef type " + source);
 473       return null;
 474     }
 475
 476     /*
 477      * align protein to dna
 478      */
 479     if (dna && copyAlignmentIsAligned)
 480     {
 481       xrefsAlignment.alignAs(copyAlignment);
 482     }
 483     else
 484     {
 485       /*
 486        * align cdna to protein - currently only if
 487        * fetching and aligning Ensembl transcripts!
 488        */
 489       // TODO: generalise for other sources of locus/transcript/cds data
 490       if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
 491       {
 492         copyAlignment.alignAs(xrefsAlignment);
 493       }
 494     }
 495
 496     return copyAlignment;
 497   }
 498
 499   /**
 500    * Makes an alignment containing the given sequences, and adds them to the
 501    * given dataset, which is also set as the dataset for the new alignment
 502    *
 503    * TODO: refactor to DatasetI method
 504    *
 505    * @param dataset
 506    * @param seqs
 507    * @return
 508    */
 509   protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
 510           AlignmentI seqs)
 511   {
 512     SequenceI[] sprods = new SequenceI[seqs.getHeight()];
 513     for (int s = 0; s < sprods.length; s++)
 514     {
 515       sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
 516       if (dataset.getSequences() == null || !dataset.getSequences()
 517               .contains(sprods[s].getDatasetSequence()))
 518       {
 519         dataset.addSequence(sprods[s].getDatasetSequence());
 520       }
 521       sprods[s].updatePDBIds();
 522     }
 523     Alignment al = new Alignment(sprods);
 524     al.setDataset(dataset);
 525     return al;
 526   }
 527
 528   /**
 529    * Constructor
 530    *
 531    * @param af
 532    * @param seqs
 533    * @param fromDna
 534    * @param dbSource
 535    */
 536   CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
 537           String dbSource)
 538   {
 539     this.alignFrame = af;
 540     this.sel = seqs;
 541     this._odna = fromDna;
 542     this.source = dbSource;
 543   }
 544
 545   public static CrossRefAction getHandlerFor(final SequenceI[] sel,
 546           final boolean fromDna, final String source,
 547           final AlignFrame alignFrame)
 548   {
 549     return new CrossRefAction(alignFrame, sel, fromDna, source);
 550   }
 551
 552 }