src/jalview/gui/CrossRefAction.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.gui;
  22
  23 import jalview.analysis.AlignmentUtils;
  24 import jalview.analysis.CrossRef;
  25 import jalview.api.AlignmentViewPanel;
  26 import jalview.api.FeatureSettingsModelI;
  27 import jalview.bin.Cache;
  28 import jalview.bin.Console;
  29 import jalview.datamodel.Alignment;
  30 import jalview.datamodel.AlignmentI;
  31 import jalview.datamodel.DBRefEntry;
  32 import jalview.datamodel.DBRefSource;
  33 import jalview.datamodel.GeneLociI;
  34 import jalview.datamodel.SequenceI;
  35 import jalview.ext.ensembl.EnsemblInfo;
  36 import jalview.ext.ensembl.EnsemblMap;
  37 import jalview.io.gff.SequenceOntologyI;
  38 import jalview.structure.StructureSelectionManager;
  39 import jalview.util.DBRefUtils;
  40 import jalview.util.MapList;
  41 import jalview.util.MappingUtils;
  42 import jalview.util.MessageManager;
  43 import jalview.viewmodel.seqfeatures.FeatureRendererModel;
  44 import jalview.ws.SequenceFetcher;
  45
  46 import java.util.ArrayList;
  47 import java.util.HashMap;
  48 import java.util.List;
  49 import java.util.Map;
  50 import java.util.Set;
  51
  52 /**
  53  * Factory constructor and runnable for discovering and displaying
  54  * cross-references for a set of aligned sequences
  55  *
  56  * @author jprocter
  57  *
  58  */
  59 public class CrossRefAction implements Runnable
  60 {
  61   private AlignFrame alignFrame;
  62
  63   private SequenceI[] sel;
  64
  65   private final boolean _odna;
  66
  67   private String source;
  68
  69   List<AlignmentViewPanel> xrefViews = new ArrayList<>();
  70
  71   List<AlignmentViewPanel> getXrefViews()
  72   {
  73     return xrefViews;
  74   }
  75
  76   @Override
  77   public void run()
  78   {
  79     final long sttime = System.currentTimeMillis();
  80     alignFrame.setProgressBar(MessageManager.formatMessage(
  81             "status.searching_for_sequences_from", new Object[]
  82             { source }), sttime);
  83     try
  84     {
  85       AlignmentI alignment = alignFrame.getViewport().getAlignment();
  86       AlignmentI dataset = alignment.getDataset() == null ? alignment
  87               : alignment.getDataset();
  88       boolean dna = alignment.isNucleotide();
  89       if (_odna != dna)
  90       {
  91         System.err
  92                 .println("Conflict: showProducts for alignment originally "
  93                         + "thought to be " + (_odna ? "DNA" : "Protein")
  94                         + " now searching for " + (dna ? "DNA" : "Protein")
  95                         + " Context.");
  96       }
  97       AlignmentI xrefs = new CrossRef(sel, dataset)
  98               .findXrefSequences(source, dna);
  99       if (xrefs == null)
 100       {
 101         return;
 102       }
 103
 104       /*
 105        * try to look up chromosomal coordinates for nucleotide
 106        * sequences (if not already retrieved)
 107        */
 108       findGeneLoci(xrefs.getSequences());
 109
 110       /*
 111        * get display scheme (if any) to apply to features
 112        */
 113       FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
 114               .getFeatureColourScheme(source);
 115
 116       if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
 117       {
 118         // override default featureColourScheme so products have Ensembl variant
 119         // colours
 120         featureColourScheme = new SequenceFetcher()
 121                 .getFeatureColourScheme(DBRefSource.ENSEMBL);
 122       }
 123
 124       AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
 125               xrefs);
 126       if (!dna)
 127       {
 128         xrefsAlignment = AlignmentUtils.makeCdsAlignment(
 129                 xrefsAlignment.getSequencesArray(), dataset, sel);
 130         xrefsAlignment.alignAs(alignment);
 131       }
 132
 133       /*
 134        * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
 135        * sequences). If we are DNA, drop introns and update mappings
 136        */
 137       AlignmentI copyAlignment = null;
 138
 139       if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
 140       {
 141         copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
 142                 xrefs, xrefsAlignment);
 143         if (copyAlignment == null)
 144         {
 145           return; // failed
 146         }
 147       }
 148
 149       /*
 150        * build AlignFrame(s) according to available alignment data
 151        */
 152       AlignFrame newFrame = new AlignFrame(xrefsAlignment,
 153               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 154       if (Cache.getDefault("HIDE_INTRONS", true))
 155       {
 156         newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
 157       }
 158       String newtitle = String.format("%s %s %s",
 159               dna ? MessageManager.getString("label.proteins")
 160                       : MessageManager.getString("label.nucleotides"),
 161               MessageManager.getString("label.for"), alignFrame.getTitle());
 162       newFrame.setTitle(newtitle);
 163
 164       if (copyAlignment == null)
 165       {
 166         /*
 167          * split frame display is turned off in preferences file
 168          */
 169         Desktop.addInternalFrame(newFrame, newtitle,
 170                 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 171         xrefViews.add(newFrame.alignPanel);
 172         return; // via finally clause
 173       }
 174
 175       AlignFrame copyThis = new AlignFrame(copyAlignment,
 176               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
 177       copyThis.setTitle(alignFrame.getTitle());
 178
 179       boolean showSequenceFeatures = alignFrame.getViewport()
 180               .isShowSequenceFeatures();
 181       newFrame.setShowSeqFeatures(showSequenceFeatures);
 182       copyThis.setShowSeqFeatures(showSequenceFeatures);
 183       FeatureRendererModel myFeatureStyling = alignFrame.alignPanel
 184               .getSeqPanel().seqCanvas.getFeatureRenderer();
 185
 186       /*
 187        * copy feature rendering settings to split frame
 188        */
 189       FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
 190               .getFeatureRenderer();
 191       fr1.transferSettings(myFeatureStyling);
 192       fr1.findAllFeatures(true);
 193       FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
 194               .getFeatureRenderer();
 195       fr2.transferSettings(myFeatureStyling);
 196       fr2.findAllFeatures(true);
 197
 198       /*
 199        * apply 'database source' feature configuration
 200        * if any - first to the new splitframe view about to be displayed
 201        */
 202
 203       newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
 204       copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
 205
 206       /*
 207        * and for JAL-3330 also to original alignFrame view(s)
 208        * this currently trashes any original settings.
 209        */
 210       for (AlignmentViewPanel origpanel : alignFrame.getAlignPanels())
 211       {
 212         origpanel.getAlignViewport()
 213                 .mergeFeaturesStyle(featureColourScheme);
 214       }
 215
 216       SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
 217               dna ? newFrame : copyThis);
 218
 219       newFrame.setVisible(true);
 220       copyThis.setVisible(true);
 221       String linkedTitle = MessageManager
 222               .getString("label.linked_view_title");
 223       Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
 224       sf.adjustInitialLayout();
 225
 226       // finally add the top, then bottom frame to the view list
 227       xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
 228       xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
 229
 230     } catch (OutOfMemoryError e)
 231     {
 232       new OOMWarning("whilst fetching crossreferences", e);
 233     } catch (Throwable e)
 234     {
 235       Console.error("Error when finding crossreferences", e);
 236     } finally
 237     {
 238       alignFrame.setProgressBar(MessageManager.formatMessage(
 239               "status.finished_searching_for_sequences_from", new Object[]
 240               { source }), sttime);
 241     }
 242   }
 243
 244   /**
 245    * Tries to add chromosomal coordinates to any nucleotide sequence which does
 246    * not already have them. Coordinates are retrieved from Ensembl given an
 247    * Ensembl identifier, either on the sequence itself or on a peptide sequence
 248    * it has a reference to.
 249    *
 250    * <pre>
 251    * Example (human):
 252    * - fetch EMBLCDS cross-references for Uniprot entry P30419
 253    * - the EMBL sequences do not have xrefs to Ensembl
 254    * - the Uniprot entry has xrefs to
 255    *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
 256    * - either of the transcript ids can be used to retrieve gene loci e.g.
 257    *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
 258    * Example (invertebrate):
 259    * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
 260    * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
 261    * - can retrieve gene loci with
 262    *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
 263    * </pre>
 264    *
 265    * @param sequences
 266    */
 267   public static void findGeneLoci(List<SequenceI> sequences)
 268   {
 269     Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
 270     for (SequenceI seq : sequences)
 271     {
 272       findGeneLoci(seq, retrievedLoci);
 273     }
 274   }
 275
 276   /**
 277    * Tres to find chromosomal coordinates for the sequence, by searching its
 278    * direct and indirect cross-references for Ensembl. If the loci have already
 279    * been retrieved, just reads them out of the map of retrievedLoci; this is
 280    * the case of an alternative transcript for the same protein. Otherwise calls
 281    * a REST service to retrieve the loci, and if successful, adds them to the
 282    * sequence and to the retrievedLoci.
 283    *
 284    * @param seq
 285    * @param retrievedLoci
 286    */
 287   static void findGeneLoci(SequenceI seq,
 288           Map<DBRefEntry, GeneLociI> retrievedLoci)
 289   {
 290     /*
 291      * don't replace any existing chromosomal coordinates
 292      */
 293     if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
 294             || seq.getDBRefs() == null)
 295     {
 296       return;
 297     }
 298
 299     Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
 300
 301     /*
 302      * first look for direct dbrefs from sequence to Ensembl
 303      */
 304     String[] divisionsArray = ensemblDivisions
 305             .toArray(new String[ensemblDivisions.size()]);
 306     List<DBRefEntry> seqRefs = seq.getDBRefs();
 307     List<DBRefEntry> directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
 308             divisionsArray);
 309     if (directEnsemblRefs != null)
 310     {
 311       for (DBRefEntry ensemblRef : directEnsemblRefs)
 312       {
 313         if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 314         {
 315           return;
 316         }
 317       }
 318     }
 319
 320     /*
 321      * else look for indirect dbrefs from sequence to Ensembl
 322      */
 323     for (DBRefEntry dbref : seq.getDBRefs())
 324     {
 325       if (dbref.getMap() != null && dbref.getMap().getTo() != null)
 326       {
 327         List<DBRefEntry> dbrefs = dbref.getMap().getTo().getDBRefs();
 328         List<DBRefEntry> indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
 329                 divisionsArray);
 330         if (indirectEnsemblRefs != null)
 331         {
 332           for (DBRefEntry ensemblRef : indirectEnsemblRefs)
 333           {
 334             if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
 335             {
 336               return;
 337             }
 338           }
 339         }
 340       }
 341     }
 342   }
 343
 344   /**
 345    * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
 346    * identifier in dbref. If successful, and the sequence length matches gene
 347    * loci length, then add it to the sequence, and to the retrievedLoci map.
 348    * Answers true if successful, else false.
 349    *
 350    * @param seq
 351    * @param dbref
 352    * @param retrievedLoci
 353    * @return
 354    */
 355   static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
 356           Map<DBRefEntry, GeneLociI> retrievedLoci)
 357   {
 358     String accession = dbref.getAccessionId();
 359     String division = dbref.getSource();
 360
 361     /*
 362      * hack: ignore cross-references to Ensembl protein ids
 363      * (or use map/translation perhaps?)
 364      * todo: is there an equivalent in EnsemblGenomes?
 365      */
 366     if (accession.startsWith("ENSP"))
 367     {
 368       return false;
 369     }
 370     EnsemblMap mapper = new EnsemblMap();
 371
 372     /*
 373      * try CDS mapping first
 374      */
 375     GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
 376             seq.getLength());
 377     if (geneLoci != null)
 378     {
 379       MapList map = geneLoci.getMapping();
 380       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 381       if (mappedFromLength == seq.getLength())
 382       {
 383         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 384                 geneLoci.getChromosomeId(), map);
 385         retrievedLoci.put(dbref, geneLoci);
 386         return true;
 387       }
 388     }
 389
 390     /*
 391      * else try CDNA mapping
 392      */
 393     geneLoci = mapper.getCdnaMapping(division, accession, 1,
 394             seq.getLength());
 395     if (geneLoci != null)
 396     {
 397       MapList map = geneLoci.getMapping();
 398       int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
 399       if (mappedFromLength == seq.getLength())
 400       {
 401         seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
 402                 geneLoci.getChromosomeId(), map);
 403         retrievedLoci.put(dbref, geneLoci);
 404         return true;
 405       }
 406     }
 407
 408     return false;
 409   }
 410
 411   /**
 412    * @param alignment
 413    * @param dataset
 414    * @param dna
 415    * @param xrefs
 416    * @param xrefsAlignment
 417    * @return
 418    */
 419   protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
 420           AlignmentI dataset, boolean dna, AlignmentI xrefs,
 421           AlignmentI xrefsAlignment)
 422   {
 423     AlignmentI copyAlignment;
 424     boolean copyAlignmentIsAligned = false;
 425     if (dna)
 426     {
 427       copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
 428               xrefsAlignment.getSequencesArray());
 429       if (copyAlignment.getHeight() == 0)
 430       {
 431         JvOptionPane.showMessageDialog(alignFrame,
 432                 MessageManager.getString("label.cant_map_cds"),
 433                 MessageManager.getString("label.operation_failed"),
 434                 JvOptionPane.OK_OPTION);
 435         System.err.println("Failed to make CDS alignment");
 436         return null;
 437       }
 438
 439       /*
 440        * pending getting Embl transcripts to 'align',
 441        * we are only doing this for Ensembl
 442        */
 443       // TODO proper criteria for 'can align as cdna'
 444       if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
 445               || AlignmentUtils.looksLikeEnsembl(alignment))
 446       {
 447         copyAlignment.alignAs(alignment);
 448         copyAlignmentIsAligned = true;
 449       }
 450     }
 451     else
 452     {
 453       copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
 454               xrefs.getSequencesArray(), dataset);
 455     }
 456     copyAlignment.setGapCharacter(alignFrame.viewport.getGapCharacter());
 457
 458     StructureSelectionManager ssm = StructureSelectionManager
 459             .getStructureSelectionManager(Desktop.getInstance());
 460
 461     /*
 462      * register any new mappings for sequence mouseover etc
 463      * (will not duplicate any previously registered mappings)
 464      */
 465     ssm.registerMappings(dataset.getCodonFrames());
 466
 467     if (copyAlignment.getHeight() <= 0)
 468     {
 469       System.err.println("No Sequences generated for xRef type " + source);
 470       return null;
 471     }
 472
 473     /*
 474      * align protein to dna
 475      */
 476     if (dna && copyAlignmentIsAligned)
 477     {
 478       xrefsAlignment.alignAs(copyAlignment);
 479     }
 480     else
 481     {
 482       /*
 483        * align cdna to protein - currently only if
 484        * fetching and aligning Ensembl transcripts!
 485        */
 486       // TODO: generalise for other sources of locus/transcript/cds data
 487       if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
 488       {
 489         copyAlignment.alignAs(xrefsAlignment);
 490       }
 491     }
 492
 493     return copyAlignment;
 494   }
 495
 496   /**
 497    * Makes an alignment containing the given sequences, and adds them to the
 498    * given dataset, which is also set as the dataset for the new alignment
 499    *
 500    * TODO: refactor to DatasetI method
 501    *
 502    * @param dataset
 503    * @param seqs
 504    * @return
 505    */
 506   protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
 507           AlignmentI seqs)
 508   {
 509     SequenceI[] sprods = new SequenceI[seqs.getHeight()];
 510     for (int s = 0; s < sprods.length; s++)
 511     {
 512       sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
 513       if (dataset.getSequences() == null || !dataset.getSequences()
 514               .contains(sprods[s].getDatasetSequence()))
 515       {
 516         dataset.addSequence(sprods[s].getDatasetSequence());
 517       }
 518       sprods[s].updatePDBIds();
 519     }
 520     Alignment al = new Alignment(sprods);
 521     al.setDataset(dataset);
 522     return al;
 523   }
 524
 525   /**
 526    * Constructor
 527    *
 528    * @param af
 529    * @param seqs
 530    * @param fromDna
 531    * @param dbSource
 532    */
 533   CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
 534           String dbSource)
 535   {
 536     this.alignFrame = af;
 537     this.sel = seqs;
 538     this._odna = fromDna;
 539     this.source = dbSource;
 540   }
 541
 542   public static CrossRefAction getHandlerFor(final SequenceI[] sel,
 543           final boolean fromDna, final String source,
 544           final AlignFrame alignFrame)
 545   {
 546     return new CrossRefAction(alignFrame, sel, fromDna, source);
 547   }
 548
 549 }