src/jalview/gui/CrossRefAction.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.gui;
  22
  23 import jalview.analysis.AlignmentUtils;
  24 import jalview.analysis.CrossRef;
  25 import jalview.api.AlignmentViewPanel;
  26 import jalview.api.FeatureSettingsModelI;
  27 import jalview.bin.Cache;
  28 import jalview.datamodel.Alignment;
  29 import jalview.datamodel.AlignmentI;
  30 import jalview.datamodel.DBRefEntry;
  31 import jalview.datamodel.DBRefSource;
  32 import jalview.datamodel.GeneLociI;
  33 import jalview.datamodel.SequenceI;
  34 import jalview.ext.ensembl.EnsemblInfo;
  35 import jalview.ext.ensembl.EnsemblMap;
  36 import jalview.io.gff.SequenceOntologyI;
  37 import jalview.structure.StructureSelectionManager;
  38 import jalview.util.DBRefUtils;
  39 import jalview.util.MapList;
  40 import jalview.util.MappingUtils;
  41 import jalview.util.MessageManager;
  42 import jalview.ws.SequenceFetcher;
  43
  44 import java.util.ArrayList;
  45 import java.util.HashMap;
  46 import java.util.List;
  47 import java.util.Map;
  48 import java.util.Set;
  49
  50 /**
  51  * Factory constructor and runnable for discovering and displaying
  52  * cross-references for a set of aligned sequences
  53  *
  54  * @author jprocter
  55  *
  56  */
  57 public class CrossRefAction implements Runnable
  58 {
  59   private AlignFrame alignFrame;
  60
  61   private SequenceI[] sel;
  62
  63   private final boolean _odna;
  64
  65   private String source;
  66
  67   List<AlignmentViewPanel> xrefViews = new ArrayList<>();
  68
  69   List<AlignmentViewPanel> getXrefViews()
  70   {
  71     return xrefViews;
  72   }
  73
  74   @Override
  75   public void run()
  76   {
  77     final long sttime = System.currentTimeMillis();
  78     alignFrame.setProgressBar(MessageManager.formatMessage(
  79             "status.searching_for_sequences_from", new Object[]
  80             { source }), sttime);
  81     try
  82     {
  83       AlignmentI alignment = alignFrame.getViewport().getAlignment();
  84       AlignmentI dataset = alignment.getDataset() == null ? alignment
  85               : alignment.getDataset();
  86       boolean dna = alignment.isNucleotide();
  87       if (_odna != dna)
  88       {
  89         System.err
  90                 .println("Conflict: showProducts for alignment originally "
  91                         + "thought to be " + (_odna ? "DNA" : "Protein")
  92                         + " now searching for " + (dna ? "DNA" : "Protein")
  93                         + " Context.");
  94       }
  95       AlignmentI xrefs = new CrossRef(sel, dataset)
  96               .findXrefSequences(source, dna);
  97       if (xrefs == null)
  98       {
  99         return;
 100       }
 101
 102       /*
 103        * try to look up chromosomal coordinates for nucleotide
 104        * sequences (if not already retrieved)
 105        */
 106       findGeneLoci(xrefs.getSequences());
 107
 108       /*
 109        * get display scheme (if any) to apply to features
 110        */
 111       FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
 112               .getFeatureColourScheme(source);
 113
 114       if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
 115       {
 116         // override default featureColourScheme so products have Ensembl variant colours
 117         featureColourScheme = new SequenceFetcher()
 118                 .getFeatureColourScheme(DBRefSource.ENSEMBL);
 119       }
 120
 121       AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
 122               xrefs);
 123       if (!dna)
 124       {
 125         xrefsAlignment = AlignmentUtils.makeCdsAlignment(
 126                 xrefsAlignment.getSequencesArray(), dataset, sel);
 127         xrefsAlignment.alignAs(alignment);
 128       }
 129
 130       /*
 131        * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
 132        * sequences). If we are DNA, drop introns and update mappings
 133        */
 134       AlignmentI copyAlignment = null;
 135
 136       if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
 137       {
 138         copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
 139                 xrefs, xrefsAlignment);
 140         if (copyAlignment == null)
 141         {
 142           return; // failed
 143         }
 144       }
 145
 146       /*
 147        * build AlignFrame(s) according to available alignment data
 148        */
 149       AlignFrame newFrame = new AlignFrame(xrefsAlignment,
 150               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 151       if (Cache.getDefault("HIDE_INTRONS", true))
 152       {
 153         newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
 154       }
 155       String newtitle = String.format("%s %s %s",
 156               dna ? MessageManager.getString("label.proteins")
 157                       : MessageManager.getString("label.nucleotides"),
 158               MessageManager.getString("label.for"), alignFrame.getTitle());
 159       newFrame.setTitle(newtitle);
 160
 161       if (copyAlignment == null)
 162       {
 163         /*
 164          * split frame display is turned off in preferences file
 165          */
 166         Desktop.addInternalFrame(newFrame, newtitle,
 167                 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 168         xrefViews.add(newFrame.alignPanel);
 169         return; // via finally clause
 170       }
 171
 172       AlignFrame copyThis = new AlignFrame(copyAlignment,
 173               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 174       copyThis.setTitle(alignFrame.getTitle());
 175
 176       boolean showSequenceFeatures = alignFrame.getViewport()
 177               .isShowSequenceFeatures();
 178       newFrame.setShowSeqFeatures(showSequenceFeatures);
 179       copyThis.setShowSeqFeatures(showSequenceFeatures);
 180       FeatureRenderer myFeatureStyling = alignFrame.alignPanel
 181               .getSeqPanel().seqCanvas.getFeatureRenderer();
 182
 183       /*
 184        * copy feature rendering settings to split frame
 185        */
 186       FeatureRenderer fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
 187               .getFeatureRenderer();
 188       fr1.transferSettings(myFeatureStyling);
 189       fr1.findAllFeatures(true);
 190       FeatureRenderer fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
 191               .getFeatureRenderer();
 192       fr2.transferSettings(myFeatureStyling);
 193       fr2.findAllFeatures(true);
 194
 195       /*
 196        * apply 'database source' feature configuration
 197        * if any - first to the new splitframe view about to be displayed
 198        */
 199
 200       newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
 201       copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
 202
 203       /*
 204        * and for JAL-3330 also to original alignFrame view(s)
 205        * this currently trashes any original settings.
 206        */
 207       for (AlignmentViewPanel origpanel: alignFrame.getAlignPanels()) {
 208         origpanel.getAlignViewport()
 209                 .mergeFeaturesStyle(featureColourScheme);
 210       }
 211
 212       SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
 213               dna ? newFrame : copyThis);
 214
 215       newFrame.setVisible(true);
 216       copyThis.setVisible(true);
 217       String linkedTitle = MessageManager
 218               .getString("label.linked_view_title");
 219       Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
 220       sf.adjustInitialLayout();
 221
 222       // finally add the top, then bottom frame to the view list
 223       xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
 224       xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
 225
 226     } catch (OutOfMemoryError e)
 227     {
 228       new OOMWarning("whilst fetching crossreferences", e);
 229     } catch (Throwable e)
 230     {
 231       Cache.log.error("Error when finding crossreferences", e);
 232     } finally
 233     {
 234       alignFrame.setProgressBar(MessageManager.formatMessage(
 235               "status.finished_searching_for_sequences_from", new Object[]
 236               { source }), sttime);
 237     }
 238   }
 239
 240   /**
 241    * Tries to add chromosomal coordinates to any nucleotide sequence which does
 242    * not already have them. Coordinates are retrieved from Ensembl given an
 243    * Ensembl identifier, either on the sequence itself or on a peptide sequence
 244    * it has a reference to.
 245    *
 246    * <pre>
 247    * Example (human):
 248    * - fetch EMBLCDS cross-references for Uniprot entry P30419
 249    * - the EMBL sequences do not have xrefs to Ensembl
 250    * - the Uniprot entry has xrefs to
 251    *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
 252    * - either of the transcript ids can be used to retrieve gene loci e.g.
 253    *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
 254    * Example (invertebrate):
 255    * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
 256    * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
 257    * - can retrieve gene loci with
 258    *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
 259    * </pre>
 260    *
 261    * @param sequences
 262    */
 263   public static void findGeneLoci(List<SequenceI> sequences)
 264   {
 265     Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
 266     for (SequenceI seq : sequences)
 267     {
 268       findGeneLoci(seq, retrievedLoci);
 269     }
 270   }
 271
 272   /**
 273    * Tres to find chromosomal coordinates for the sequence, by searching its
 274    * direct and indirect cross-references for Ensembl. If the loci have already
 275    * been retrieved, just reads them out of the map of retrievedLoci; this is
 276    * the case of an alternative transcript for the same protein. Otherwise calls
 277    * a REST service to retrieve the loci, and if successful, adds them to the
 278    * sequence and to the retrievedLoci.
 279    *
 280    * @param seq
 281    * @param retrievedLoci
 282    */
 283   static void findGeneLoci(SequenceI seq,
 284           Map<DBRefEntry, GeneLociI> retrievedLoci)
 285   {
 286     /*
 287      * don't replace any existing chromosomal coordinates
 288      */
 289     if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
 290             || seq.getDBRefs() == null)
 291     {
 292       return;
 293     }
 294
 295     Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
 296
 297     /*
 298      * first look for direct dbrefs from sequence to Ensembl
 299      */
 300     String[] divisionsArray = ensemblDivisions
 301             .toArray(new String[ensemblDivisions.size()]);
 302     DBRefEntry[] seqRefs = seq.getDBRefs();
 303     DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
 304             divisionsArray);
 305     if (directEnsemblRefs != null)
 306     {
 307       for (DBRefEntry ensemblRef : directEnsemblRefs)
 308       {
 309         if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 310         {
 311           return;
 312         }
 313       }
 314     }
 315
 316     /*
 317      * else look for indirect dbrefs from sequence to Ensembl
 318      */
 319     for (DBRefEntry dbref : seq.getDBRefs())
 320     {
 321       if (dbref.getMap() != null && dbref.getMap().getTo() != null)
 322       {
 323         DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs();
 324         DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
 325                 divisionsArray);
 326         if (indirectEnsemblRefs != null)
 327         {
 328           for (DBRefEntry ensemblRef : indirectEnsemblRefs)
 329           {
 330             if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 331             {
 332               return;
 333             }
 334           }
 335         }
 336       }
 337     }
 338   }
 339
 340   /**
 341    * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
 342    * identifier in dbref. If successful, and the sequence length matches gene
 343    * loci length, then add it to the sequence, and to the retrievedLoci map.
 344    * Answers true if successful, else false.
 345    *
 346    * @param seq
 347    * @param dbref
 348    * @param retrievedLoci
 349    * @return
 350    */
 351   static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
 352           Map<DBRefEntry, GeneLociI> retrievedLoci)
 353   {
 354     String accession = dbref.getAccessionId();
 355     String division = dbref.getSource();
 356
 357     /*
 358      * hack: ignore cross-references to Ensembl protein ids
 359      * (or use map/translation perhaps?)
 360      * todo: is there an equivalent in EnsemblGenomes?
 361      */
 362     if (accession.startsWith("ENSP"))
 363     {
 364       return false;
 365     }
 366     EnsemblMap mapper = new EnsemblMap();
 367
 368     /*
 369      * try CDS mapping first
 370      */
 371     GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
 372             seq.getLength());
 373     if (geneLoci != null)
 374     {
 375       MapList map = geneLoci.getMapping();
 376       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 377       if (mappedFromLength == seq.getLength())
 378       {
 379         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 380                 geneLoci.getChromosomeId(), map);
 381         retrievedLoci.put(dbref, geneLoci);
 382         return true;
 383       }
 384     }
 385
 386     /*
 387      * else try CDNA mapping
 388      */
 389     geneLoci = mapper.getCdnaMapping(division, accession, 1,
 390             seq.getLength());
 391     if (geneLoci != null)
 392     {
 393       MapList map = geneLoci.getMapping();
 394       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 395       if (mappedFromLength == seq.getLength())
 396       {
 397         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 398                 geneLoci.getChromosomeId(), map);
 399         retrievedLoci.put(dbref, geneLoci);
 400         return true;
 401       }
 402     }
 403
 404     return false;
 405   }
 406
 407   /**
 408    * @param alignment
 409    * @param dataset
 410    * @param dna
 411    * @param xrefs
 412    * @param xrefsAlignment
 413    * @return
 414    */
 415   protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
 416           AlignmentI dataset, boolean dna, AlignmentI xrefs,
 417           AlignmentI xrefsAlignment)
 418   {
 419     AlignmentI copyAlignment;
 420     boolean copyAlignmentIsAligned = false;
 421     if (dna)
 422     {
 423       copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
 424               xrefsAlignment.getSequencesArray());
 425       if (copyAlignment.getHeight() == 0)
 426       {
 427         JvOptionPane.showMessageDialog(alignFrame,
 428                 MessageManager.getString("label.cant_map_cds"),
 429                 MessageManager.getString("label.operation_failed"),
 430                 JvOptionPane.OK_OPTION);
 431         System.err.println("Failed to make CDS alignment");
 432         return null;
 433       }
 434
 435       /*
 436        * pending getting Embl transcripts to 'align',
 437        * we are only doing this for Ensembl
 438        */
 439       // TODO proper criteria for 'can align as cdna'
 440       if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
 441               || AlignmentUtils.looksLikeEnsembl(alignment))
 442       {
 443         copyAlignment.alignAs(alignment);
 444         copyAlignmentIsAligned = true;
 445       }
 446     }
 447     else
 448     {
 449       copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
 450               xrefs.getSequencesArray(), dataset);
 451     }
 452     copyAlignment
 453             .setGapCharacter(alignFrame.viewport.getGapCharacter());
 454
 455     StructureSelectionManager ssm = StructureSelectionManager
 456             .getStructureSelectionManager(Desktop.instance);
 457
 458     /*
 459      * register any new mappings for sequence mouseover etc
 460      * (will not duplicate any previously registered mappings)
 461      */
 462     ssm.registerMappings(dataset.getCodonFrames());
 463
 464     if (copyAlignment.getHeight() <= 0)
 465     {
 466       System.err.println(
 467               "No Sequences generated for xRef type " + source);
 468       return null;
 469     }
 470
 471     /*
 472      * align protein to dna
 473      */
 474     if (dna && copyAlignmentIsAligned)
 475     {
 476       xrefsAlignment.alignAs(copyAlignment);
 477     }
 478     else
 479     {
 480       /*
 481        * align cdna to protein - currently only if
 482        * fetching and aligning Ensembl transcripts!
 483        */
 484       // TODO: generalise for other sources of locus/transcript/cds data
 485       if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
 486       {
 487         copyAlignment.alignAs(xrefsAlignment);
 488       }
 489     }
 490
 491     return copyAlignment;
 492   }
 493
 494   /**
 495    * Makes an alignment containing the given sequences, and adds them to the
 496    * given dataset, which is also set as the dataset for the new alignment
 497    *
 498    * TODO: refactor to DatasetI method
 499    *
 500    * @param dataset
 501    * @param seqs
 502    * @return
 503    */
 504   protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
 505           AlignmentI seqs)
 506   {
 507     SequenceI[] sprods = new SequenceI[seqs.getHeight()];
 508     for (int s = 0; s < sprods.length; s++)
 509     {
 510       sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
 511       if (dataset.getSequences() == null || !dataset.getSequences()
 512               .contains(sprods[s].getDatasetSequence()))
 513       {
 514         dataset.addSequence(sprods[s].getDatasetSequence());
 515       }
 516       sprods[s].updatePDBIds();
 517     }
 518     Alignment al = new Alignment(sprods);
 519     al.setDataset(dataset);
 520     return al;
 521   }
 522
 523   /**
 524    * Constructor
 525    *
 526    * @param af
 527    * @param seqs
 528    * @param fromDna
 529    * @param dbSource
 530    */
 531   CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
 532           String dbSource)
 533   {
 534     this.alignFrame = af;
 535     this.sel = seqs;
 536     this._odna = fromDna;
 537     this.source = dbSource;
 538   }
 539
 540   public static CrossRefAction getHandlerFor(final SequenceI[] sel,
 541           final boolean fromDna, final String source,
 542           final AlignFrame alignFrame)
 543   {
 544     return new CrossRefAction(alignFrame, sel, fromDna, source);
 545   }
 546
 547 }