src/jalview/gui/CrossRefAction.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.gui;
  22
  23 import jalview.analysis.AlignmentUtils;
  24 import jalview.analysis.CrossRef;
  25 import jalview.api.AlignmentViewPanel;
  26 import jalview.api.FeatureSettingsModelI;
  27 import jalview.bin.Cache;
  28 import jalview.datamodel.Alignment;
  29 import jalview.datamodel.AlignmentI;
  30 import jalview.datamodel.DBRefEntry;
  31 import jalview.datamodel.DBRefSource;
  32 import jalview.datamodel.GeneLociI;
  33 import jalview.datamodel.SequenceI;
  34 import jalview.ext.ensembl.EnsemblInfo;
  35 import jalview.ext.ensembl.EnsemblMap;
  36 import jalview.io.gff.SequenceOntologyI;
  37 import jalview.structure.StructureSelectionManager;
  38 import jalview.util.DBRefUtils;
  39 import jalview.util.MapList;
  40 import jalview.util.MappingUtils;
  41 import jalview.util.MessageManager;
  42 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
  43 import jalview.ws.SequenceFetcher;
  44
  45 import java.util.ArrayList;
  46 import java.util.HashMap;
  47 import java.util.List;
  48 import java.util.Map;
  49 import java.util.Set;
  50
  51 /**
  52  * Factory constructor and runnable for discovering and displaying
  53  * cross-references for a set of aligned sequences
  54  *
  55  * @author jprocter
  56  *
  57  */
  58 public class CrossRefAction implements Runnable
  59 {
  60   private AlignFrame alignFrame;
  61
  62   private SequenceI[] sel;
  63
  64   private final boolean _odna;
  65
  66   private String source;
  67
  68   List<AlignmentViewPanel> xrefViews = new ArrayList<>();
  69
  70   List<AlignmentViewPanel> getXrefViews()
  71   {
  72     return xrefViews;
  73   }
  74
  75   @Override
  76   public void run()
  77   {
  78     final long sttime = System.currentTimeMillis();
  79     alignFrame.setProgressBar(MessageManager.formatMessage(
  80             "status.searching_for_sequences_from", new Object[]
  81             { source }), sttime);
  82     try
  83     {
  84       AlignmentI alignment = alignFrame.getViewport().getAlignment();
  85       AlignmentI dataset = alignment.getDataset() == null ? alignment
  86               : alignment.getDataset();
  87       boolean dna = alignment.isNucleotide();
  88       if (_odna != dna)
  89       {
  90         System.err
  91                 .println("Conflict: showProducts for alignment originally "
  92                         + "thought to be " + (_odna ? "DNA" : "Protein")
  93                         + " now searching for " + (dna ? "DNA" : "Protein")
  94                         + " Context.");
  95       }
  96       AlignmentI xrefs = new CrossRef(sel, dataset)
  97               .findXrefSequences(source, dna);
  98       if (xrefs == null)
  99       {
 100         return;
 101       }
 102
 103       /*
 104        * try to look up chromosomal coordinates for nucleotide
 105        * sequences (if not already retrieved)
 106        */
 107       findGeneLoci(xrefs.getSequences());
 108
 109       /*
 110        * get display scheme (if any) to apply to features
 111        */
 112       FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
 113               .getFeatureColourScheme(source);
 114
 115       if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
 116       {
 117         // override default featureColourScheme so products have Ensembl variant colours
 118         featureColourScheme = new SequenceFetcher()
 119                 .getFeatureColourScheme(DBRefSource.ENSEMBL);
 120       }
 121
 122       AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
 123               xrefs);
 124       if (!dna)
 125       {
 126         xrefsAlignment = AlignmentUtils.makeCdsAlignment(
 127                 xrefsAlignment.getSequencesArray(), dataset, sel);
 128         xrefsAlignment.alignAs(alignment);
 129       }
 130
 131       /*
 132        * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
 133        * sequences). If we are DNA, drop introns and update mappings
 134        */
 135       AlignmentI copyAlignment = null;
 136
 137       if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
 138       {
 139         copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
 140                 xrefs, xrefsAlignment);
 141         if (copyAlignment == null)
 142         {
 143           return; // failed
 144         }
 145       }
 146
 147       /*
 148        * build AlignFrame(s) according to available alignment data
 149        */
 150       AlignFrame newFrame = new AlignFrame(xrefsAlignment,
 151               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 152       if (Cache.getDefault("HIDE_INTRONS", true))
 153       {
 154         newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
 155       }
 156       String newtitle = String.format("%s %s %s",
 157               dna ? MessageManager.getString("label.proteins")
 158                       : MessageManager.getString("label.nucleotides"),
 159               MessageManager.getString("label.for"), alignFrame.getTitle());
 160       newFrame.setTitle(newtitle);
 161
 162       if (copyAlignment == null)
 163       {
 164         /*
 165          * split frame display is turned off in preferences file
 166          */
 167         Desktop.addInternalFrame(newFrame, newtitle,
 168                 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 169         xrefViews.add(newFrame.alignPanel);
 170         return; // via finally clause
 171       }
 172
 173       AlignFrame copyThis = new AlignFrame(copyAlignment,
 174               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 175       copyThis.setTitle(alignFrame.getTitle());
 176
 177       boolean showSequenceFeatures = alignFrame.getViewport()
 178               .isShowSequenceFeatures();
 179       newFrame.setShowSeqFeatures(showSequenceFeatures);
 180       copyThis.setShowSeqFeatures(showSequenceFeatures);
 181       FeatureRendererModel myFeatureStyling = alignFrame.alignPanel
 182               .getSeqPanel().seqCanvas.getFeatureRenderer();
 183
 184       /*
 185        * copy feature rendering settings to split frame
 186        */
 187       FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
 188               .getFeatureRenderer();
 189       fr1.transferSettings(myFeatureStyling);
 190       fr1.findAllFeatures(true);
 191       FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
 192               .getFeatureRenderer();
 193       fr2.transferSettings(myFeatureStyling);
 194       fr2.findAllFeatures(true);
 195
 196       /*
 197        * apply 'database source' feature configuration
 198        * if any - first to the new splitframe view about to be displayed
 199        */
 200
 201       newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
 202       copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
 203
 204       /*
 205        * and for JAL-3330 also to original alignFrame view(s)
 206        * this currently trashes any original settings.
 207        */
 208       for (AlignmentViewPanel origpanel: alignFrame.getAlignPanels()) {
 209         origpanel.getAlignViewport()
 210                 .mergeFeaturesStyle(featureColourScheme);
 211       }
 212
 213       SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
 214               dna ? newFrame : copyThis);
 215
 216       newFrame.setVisible(true);
 217       copyThis.setVisible(true);
 218       String linkedTitle = MessageManager
 219               .getString("label.linked_view_title");
 220       Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
 221       sf.adjustInitialLayout();
 222
 223       // finally add the top, then bottom frame to the view list
 224       xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
 225       xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
 226
 227     } catch (OutOfMemoryError e)
 228     {
 229       new OOMWarning("whilst fetching crossreferences", e);
 230     } catch (Throwable e)
 231     {
 232       Cache.log.error("Error when finding crossreferences", e);
 233     } finally
 234     {
 235       alignFrame.setProgressBar(MessageManager.formatMessage(
 236               "status.finished_searching_for_sequences_from", new Object[]
 237               { source }), sttime);
 238     }
 239   }
 240
 241   /**
 242    * Tries to add chromosomal coordinates to any nucleotide sequence which does
 243    * not already have them. Coordinates are retrieved from Ensembl given an
 244    * Ensembl identifier, either on the sequence itself or on a peptide sequence
 245    * it has a reference to.
 246    *
 247    * <pre>
 248    * Example (human):
 249    * - fetch EMBLCDS cross-references for Uniprot entry P30419
 250    * - the EMBL sequences do not have xrefs to Ensembl
 251    * - the Uniprot entry has xrefs to
 252    *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
 253    * - either of the transcript ids can be used to retrieve gene loci e.g.
 254    *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
 255    * Example (invertebrate):
 256    * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
 257    * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
 258    * - can retrieve gene loci with
 259    *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
 260    * </pre>
 261    *
 262    * @param sequences
 263    */
 264   public static void findGeneLoci(List<SequenceI> sequences)
 265   {
 266     Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
 267     for (SequenceI seq : sequences)
 268     {
 269       findGeneLoci(seq, retrievedLoci);
 270     }
 271   }
 272
 273   /**
 274    * Tres to find chromosomal coordinates for the sequence, by searching its
 275    * direct and indirect cross-references for Ensembl. If the loci have already
 276    * been retrieved, just reads them out of the map of retrievedLoci; this is
 277    * the case of an alternative transcript for the same protein. Otherwise calls
 278    * a REST service to retrieve the loci, and if successful, adds them to the
 279    * sequence and to the retrievedLoci.
 280    *
 281    * @param seq
 282    * @param retrievedLoci
 283    */
 284   static void findGeneLoci(SequenceI seq,
 285           Map<DBRefEntry, GeneLociI> retrievedLoci)
 286   {
 287     /*
 288      * don't replace any existing chromosomal coordinates
 289      */
 290     if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
 291             || seq.getDBRefs() == null)
 292     {
 293       return;
 294     }
 295
 296     Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
 297
 298     /*
 299      * first look for direct dbrefs from sequence to Ensembl
 300      */
 301     String[] divisionsArray = ensemblDivisions
 302             .toArray(new String[ensemblDivisions.size()]);
 303     List<DBRefEntry> seqRefs = seq.getDBRefs();
 304     List<DBRefEntry> directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
 305             divisionsArray);
 306     if (directEnsemblRefs != null)
 307     {
 308       for (DBRefEntry ensemblRef : directEnsemblRefs)
 309       {
 310         if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 311         {
 312           return;
 313         }
 314       }
 315     }
 316
 317     /*
 318      * else look for indirect dbrefs from sequence to Ensembl
 319      */
 320     for (DBRefEntry dbref : seq.getDBRefs())
 321     {
 322       if (dbref.getMap() != null && dbref.getMap().getTo() != null)
 323       {
 324         List<DBRefEntry> dbrefs = dbref.getMap().getTo().getDBRefs();
 325         List<DBRefEntry> indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
 326                 divisionsArray);
 327         if (indirectEnsemblRefs != null)
 328         {
 329           for (DBRefEntry ensemblRef : indirectEnsemblRefs)
 330           {
 331             if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 332             {
 333               return;
 334             }
 335           }
 336         }
 337       }
 338     }
 339   }
 340
 341   /**
 342    * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
 343    * identifier in dbref. If successful, and the sequence length matches gene
 344    * loci length, then add it to the sequence, and to the retrievedLoci map.
 345    * Answers true if successful, else false.
 346    *
 347    * @param seq
 348    * @param dbref
 349    * @param retrievedLoci
 350    * @return
 351    */
 352   static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
 353           Map<DBRefEntry, GeneLociI> retrievedLoci)
 354   {
 355     String accession = dbref.getAccessionId();
 356     String division = dbref.getSource();
 357
 358     /*
 359      * hack: ignore cross-references to Ensembl protein ids
 360      * (or use map/translation perhaps?)
 361      * todo: is there an equivalent in EnsemblGenomes?
 362      */
 363     if (accession.startsWith("ENSP"))
 364     {
 365       return false;
 366     }
 367     EnsemblMap mapper = new EnsemblMap();
 368
 369     /*
 370      * try CDS mapping first
 371      */
 372     GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
 373             seq.getLength());
 374     if (geneLoci != null)
 375     {
 376       MapList map = geneLoci.getMapping();
 377       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 378       if (mappedFromLength == seq.getLength())
 379       {
 380         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 381                 geneLoci.getChromosomeId(), map);
 382         retrievedLoci.put(dbref, geneLoci);
 383         return true;
 384       }
 385     }
 386
 387     /*
 388      * else try CDNA mapping
 389      */
 390     geneLoci = mapper.getCdnaMapping(division, accession, 1,
 391             seq.getLength());
 392     if (geneLoci != null)
 393     {
 394       MapList map = geneLoci.getMapping();
 395       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 396       if (mappedFromLength == seq.getLength())
 397       {
 398         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 399                 geneLoci.getChromosomeId(), map);
 400         retrievedLoci.put(dbref, geneLoci);
 401         return true;
 402       }
 403     }
 404
 405     return false;
 406   }
 407
 408   /**
 409    * @param alignment
 410    * @param dataset
 411    * @param dna
 412    * @param xrefs
 413    * @param xrefsAlignment
 414    * @return
 415    */
 416   protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
 417           AlignmentI dataset, boolean dna, AlignmentI xrefs,
 418           AlignmentI xrefsAlignment)
 419   {
 420     AlignmentI copyAlignment;
 421     boolean copyAlignmentIsAligned = false;
 422     if (dna)
 423     {
 424       copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
 425               xrefsAlignment.getSequencesArray());
 426       if (copyAlignment.getHeight() == 0)
 427       {
 428         JvOptionPane.showMessageDialog(alignFrame,
 429                 MessageManager.getString("label.cant_map_cds"),
 430                 MessageManager.getString("label.operation_failed"),
 431                 JvOptionPane.OK_OPTION);
 432         System.err.println("Failed to make CDS alignment");
 433         return null;
 434       }
 435
 436       /*
 437        * pending getting Embl transcripts to 'align',
 438        * we are only doing this for Ensembl
 439        */
 440       // TODO proper criteria for 'can align as cdna'
 441       if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
 442               || AlignmentUtils.looksLikeEnsembl(alignment))
 443       {
 444         copyAlignment.alignAs(alignment);
 445         copyAlignmentIsAligned = true;
 446       }
 447     }
 448     else
 449     {
 450       copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
 451               xrefs.getSequencesArray(), dataset);
 452     }
 453     copyAlignment
 454             .setGapCharacter(alignFrame.viewport.getGapCharacter());
 455
 456     StructureSelectionManager ssm = StructureSelectionManager
 457             .getStructureSelectionManager(Desktop.instance);
 458
 459     /*
 460      * register any new mappings for sequence mouseover etc
 461      * (will not duplicate any previously registered mappings)
 462      */
 463     ssm.registerMappings(dataset.getCodonFrames());
 464
 465     if (copyAlignment.getHeight() <= 0)
 466     {
 467       System.err.println(
 468               "No Sequences generated for xRef type " + source);
 469       return null;
 470     }
 471
 472     /*
 473      * align protein to dna
 474      */
 475     if (dna && copyAlignmentIsAligned)
 476     {
 477       xrefsAlignment.alignAs(copyAlignment);
 478     }
 479     else
 480     {
 481       /*
 482        * align cdna to protein - currently only if
 483        * fetching and aligning Ensembl transcripts!
 484        */
 485       // TODO: generalise for other sources of locus/transcript/cds data
 486       if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
 487       {
 488         copyAlignment.alignAs(xrefsAlignment);
 489       }
 490     }
 491
 492     return copyAlignment;
 493   }
 494
 495   /**
 496    * Makes an alignment containing the given sequences, and adds them to the
 497    * given dataset, which is also set as the dataset for the new alignment
 498    *
 499    * TODO: refactor to DatasetI method
 500    *
 501    * @param dataset
 502    * @param seqs
 503    * @return
 504    */
 505   protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
 506           AlignmentI seqs)
 507   {
 508     SequenceI[] sprods = new SequenceI[seqs.getHeight()];
 509     for (int s = 0; s < sprods.length; s++)
 510     {
 511       sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
 512       if (dataset.getSequences() == null || !dataset.getSequences()
 513               .contains(sprods[s].getDatasetSequence()))
 514       {
 515         dataset.addSequence(sprods[s].getDatasetSequence());
 516       }
 517       sprods[s].updatePDBIds();
 518     }
 519     Alignment al = new Alignment(sprods);
 520     al.setDataset(dataset);
 521     return al;
 522   }
 523
 524   /**
 525    * Constructor
 526    *
 527    * @param af
 528    * @param seqs
 529    * @param fromDna
 530    * @param dbSource
 531    */
 532   CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
 533           String dbSource)
 534   {
 535     this.alignFrame = af;
 536     this.sel = seqs;
 537     this._odna = fromDna;
 538     this.source = dbSource;
 539   }
 540
 541   public static CrossRefAction getHandlerFor(final SequenceI[] sel,
 542           final boolean fromDna, final String source,
 543           final AlignFrame alignFrame)
 544   {
 545     return new CrossRefAction(alignFrame, sel, fromDna, source);
 546   }
 547
 548 }