JAL-3949 Complete new abstracted logging framework in jalview.log. Updated log calls...
[jalview.git] / src / jalview / gui / CrossRefAction.java
index 32af226..8c64c32 100644 (file)
@@ -27,17 +27,26 @@ import jalview.api.FeatureSettingsModelI;
 import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
+import jalview.datamodel.GeneLociI;
 import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblInfo;
+import jalview.ext.ensembl.EnsemblMap;
 import jalview.io.gff.SequenceOntologyI;
 import jalview.structure.StructureSelectionManager;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
 import jalview.util.MessageManager;
+import jalview.viewmodel.seqfeatures.FeatureRendererModel;
 import jalview.ws.SequenceFetcher;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
-
-import javax.swing.JOptionPane;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * Factory constructor and runnable for discovering and displaying
@@ -52,13 +61,13 @@ public class CrossRefAction implements Runnable
 
   private SequenceI[] sel;
 
-  private boolean _odna;
+  private final boolean _odna;
 
   private String source;
 
-  List<AlignmentViewPanel> xrefViews = new ArrayList<AlignmentViewPanel>();
+  List<AlignmentViewPanel> xrefViews = new ArrayList<>();
 
-  public List<jalview.api.AlignmentViewPanel> getXrefViews()
+  List<AlignmentViewPanel> getXrefViews()
   {
     return xrefViews;
   }
@@ -67,10 +76,9 @@ public class CrossRefAction implements Runnable
   public void run()
   {
     final long sttime = System.currentTimeMillis();
-    alignFrame.setProgressBar(
-            MessageManager.formatMessage(
-                    "status.searching_for_sequences_from",
-                    new Object[] { source }), sttime);
+    alignFrame.setProgressBar(MessageManager.formatMessage(
+            "status.searching_for_sequences_from", new Object[]
+            { source }), sttime);
     try
     {
       AlignmentI alignment = alignFrame.getViewport().getAlignment();
@@ -85,18 +93,32 @@ public class CrossRefAction implements Runnable
                         + " now searching for " + (dna ? "DNA" : "Protein")
                         + " Context.");
       }
-      AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences(
-              source, dna);
+      AlignmentI xrefs = new CrossRef(sel, dataset)
+              .findXrefSequences(source, dna);
       if (xrefs == null)
       {
         return;
       }
+
+      /*
+       * try to look up chromosomal coordinates for nucleotide
+       * sequences (if not already retrieved)
+       */
+      findGeneLoci(xrefs.getSequences());
+
       /*
        * get display scheme (if any) to apply to features
        */
       FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
               .getFeatureColourScheme(source);
 
+      if (dna && AlignmentUtils.looksLikeEnsembl(alignment))
+      {
+        // override default featureColourScheme so products have Ensembl variant colours
+        featureColourScheme = new SequenceFetcher()
+                .getFeatureColourScheme(DBRefSource.ENSEMBL);
+      }
+
       AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
               xrefs);
       if (!dna)
@@ -114,75 +136,14 @@ public class CrossRefAction implements Runnable
 
       if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
       {
-        boolean copyAlignmentIsAligned = false;
-        if (dna)
+        copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
+                xrefs, xrefsAlignment);
+        if (copyAlignment == null)
         {
-          copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
-                  xrefsAlignment.getSequencesArray());
-          if (copyAlignment.getHeight() == 0)
-          {
-            JOptionPane.showMessageDialog(alignFrame,
-                    MessageManager.getString("label.cant_map_cds"),
-                    MessageManager.getString("label.operation_failed"),
-                    JOptionPane.OK_OPTION);
-            System.err.println("Failed to make CDS alignment");
-          }
-
-          /*
-           * pending getting Embl transcripts to 'align', 
-           * we are only doing this for Ensembl
-           */
-          // TODO proper criteria for 'can align as cdna'
-          if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
-                  || AlignmentUtils.looksLikeEnsembl(alignment))
-          {
-            copyAlignment.alignAs(alignment);
-            copyAlignmentIsAligned = true;
-          }
-        }
-        else
-        {
-          copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
-                  xrefs.getSequencesArray(), dataset);
-        }
-        copyAlignment
-                .setGapCharacter(alignFrame.viewport.getGapCharacter());
-
-        StructureSelectionManager ssm = StructureSelectionManager
-                .getStructureSelectionManager(Desktop.instance);
-
-        /*
-         * register any new mappings for sequence mouseover etc
-         * (will not duplicate any previously registered mappings)
-         */
-        ssm.registerMappings(dataset.getCodonFrames());
-
-        if (copyAlignment.getHeight() <= 0)
-        {
-          System.err.println("No Sequences generated for xRef type "
-                  + source);
-          return;
-        }
-        /*
-         * align protein to dna
-         */
-        if (dna && copyAlignmentIsAligned)
-        {
-          xrefsAlignment.alignAs(copyAlignment);
-        }
-        else
-        {
-          /*
-           * align cdna to protein - currently only if 
-           * fetching and aligning Ensembl transcripts!
-           */
-          // TODO: generalise for other sources of locus/transcript/cds data
-          if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
-          {
-            copyAlignment.alignAs(xrefsAlignment);
-          }
+          return; // failed
         }
       }
+
       /*
        * build AlignFrame(s) according to available alignment data
        */
@@ -208,6 +169,7 @@ public class CrossRefAction implements Runnable
         xrefViews.add(newFrame.alignPanel);
         return; // via finally clause
       }
+
       AlignFrame copyThis = new AlignFrame(copyAlignment,
               AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
       copyThis.setTitle(alignFrame.getTitle());
@@ -216,34 +178,47 @@ public class CrossRefAction implements Runnable
               .isShowSequenceFeatures();
       newFrame.setShowSeqFeatures(showSequenceFeatures);
       copyThis.setShowSeqFeatures(showSequenceFeatures);
-      FeatureRenderer myFeatureStyling = alignFrame.alignPanel
+      FeatureRendererModel myFeatureStyling = alignFrame.alignPanel
               .getSeqPanel().seqCanvas.getFeatureRenderer();
 
       /*
        * copy feature rendering settings to split frame
        */
-      newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
-              .transferSettings(myFeatureStyling);
-      copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
-              .transferSettings(myFeatureStyling);
+      FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
+              .getFeatureRenderer();
+      fr1.transferSettings(myFeatureStyling);
+      fr1.findAllFeatures(true);
+      FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
+              .getFeatureRenderer();
+      fr2.transferSettings(myFeatureStyling);
+      fr2.findAllFeatures(true);
 
       /*
        * apply 'database source' feature configuration
-       * if any was found
+       * if any - first to the new splitframe view about to be displayed
        */
-      // TODO is this the feature colouring for the original
-      // alignment or the fetched xrefs? either could be Ensembl
+
       newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
       copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
 
+      /*
+       * and for JAL-3330 also to original alignFrame view(s)
+       * this currently trashes any original settings.
+       */
+      for (AlignmentViewPanel origpanel: alignFrame.getAlignPanels()) {
+        origpanel.getAlignViewport()
+                .mergeFeaturesStyle(featureColourScheme);
+      }
+
       SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
               dna ? newFrame : copyThis);
+
       newFrame.setVisible(true);
       copyThis.setVisible(true);
       String linkedTitle = MessageManager
               .getString("label.linked_view_title");
       Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
-      sf.adjustDivider();
+      sf.adjustInitialLayout();
 
       // finally add the top, then bottom frame to the view list
       xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
@@ -254,16 +229,270 @@ public class CrossRefAction implements Runnable
       new OOMWarning("whilst fetching crossreferences", e);
     } catch (Throwable e)
     {
-      Cache.log.error("Error when finding crossreferences", e);
+      Cache.error("Error when finding crossreferences", e);
     } finally
     {
       alignFrame.setProgressBar(MessageManager.formatMessage(
-              "status.finished_searching_for_sequences_from",
-              new Object[] { source }), sttime);
+              "status.finished_searching_for_sequences_from", new Object[]
+              { source }), sttime);
+    }
+  }
+
+  /**
+   * Tries to add chromosomal coordinates to any nucleotide sequence which does
+   * not already have them. Coordinates are retrieved from Ensembl given an
+   * Ensembl identifier, either on the sequence itself or on a peptide sequence
+   * it has a reference to.
+   * 
+   * <pre>
+   * Example (human):
+   * - fetch EMBLCDS cross-references for Uniprot entry P30419
+   * - the EMBL sequences do not have xrefs to Ensembl
+   * - the Uniprot entry has xrefs to 
+   *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
+   * - either of the transcript ids can be used to retrieve gene loci e.g.
+   *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
+   * Example (invertebrate):
+   * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
+   * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
+   * - can retrieve gene loci with
+   *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
+   * </pre>
+   * 
+   * @param sequences
+   */
+  public static void findGeneLoci(List<SequenceI> sequences)
+  {
+    Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
+    for (SequenceI seq : sequences)
+    {
+      findGeneLoci(seq, retrievedLoci);
+    }
+  }
+
+  /**
+   * Tres to find chromosomal coordinates for the sequence, by searching its
+   * direct and indirect cross-references for Ensembl. If the loci have already
+   * been retrieved, just reads them out of the map of retrievedLoci; this is
+   * the case of an alternative transcript for the same protein. Otherwise calls
+   * a REST service to retrieve the loci, and if successful, adds them to the
+   * sequence and to the retrievedLoci.
+   * 
+   * @param seq
+   * @param retrievedLoci
+   */
+  static void findGeneLoci(SequenceI seq,
+          Map<DBRefEntry, GeneLociI> retrievedLoci)
+  {
+    /*
+     * don't replace any existing chromosomal coordinates
+     */
+    if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
+            || seq.getDBRefs() == null)
+    {
+      return;
+    }
+    
+    Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
+    
+    /*
+     * first look for direct dbrefs from sequence to Ensembl
+     */
+    String[] divisionsArray = ensemblDivisions
+            .toArray(new String[ensemblDivisions.size()]);
+    List<DBRefEntry> seqRefs = seq.getDBRefs();
+    List<DBRefEntry> directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
+            divisionsArray);
+    if (directEnsemblRefs != null)
+    {
+      for (DBRefEntry ensemblRef : directEnsemblRefs)
+      {
+        if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+        {
+          return;
+        }
+      }
+    }
+
+    /*
+     * else look for indirect dbrefs from sequence to Ensembl
+     */
+    for (DBRefEntry dbref : seq.getDBRefs())
+    {
+      if (dbref.getMap() != null && dbref.getMap().getTo() != null)
+      {
+        List<DBRefEntry> dbrefs = dbref.getMap().getTo().getDBRefs();
+        List<DBRefEntry> indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
+                divisionsArray);
+        if (indirectEnsemblRefs != null)
+        {
+          for (DBRefEntry ensemblRef : indirectEnsemblRefs)
+          {
+            if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+            {
+              return;
+            }
+          }
+        }
+      }
     }
   }
 
   /**
+   * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
+   * identifier in dbref. If successful, and the sequence length matches gene
+   * loci length, then add it to the sequence, and to the retrievedLoci map.
+   * Answers true if successful, else false.
+   * 
+   * @param seq
+   * @param dbref
+   * @param retrievedLoci
+   * @return
+   */
+  static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
+          Map<DBRefEntry, GeneLociI> retrievedLoci)
+  {
+    String accession = dbref.getAccessionId();
+    String division = dbref.getSource();
+
+    /*
+     * hack: ignore cross-references to Ensembl protein ids
+     * (or use map/translation perhaps?)
+     * todo: is there an equivalent in EnsemblGenomes?
+     */
+    if (accession.startsWith("ENSP"))
+    {
+      return false;
+    }
+    EnsemblMap mapper = new EnsemblMap();
+
+    /*
+     * try CDS mapping first
+     */
+    GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
+            seq.getLength());
+    if (geneLoci != null)
+    {
+      MapList map = geneLoci.getMapping();
+      int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+      if (mappedFromLength == seq.getLength())
+      {
+        seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+                geneLoci.getChromosomeId(), map);
+        retrievedLoci.put(dbref, geneLoci);
+        return true;
+      }
+    }
+
+    /*
+     * else try CDNA mapping
+     */
+    geneLoci = mapper.getCdnaMapping(division, accession, 1,
+            seq.getLength());
+    if (geneLoci != null)
+    {
+      MapList map = geneLoci.getMapping();
+      int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+      if (mappedFromLength == seq.getLength())
+      {
+        seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+                geneLoci.getChromosomeId(), map);
+        retrievedLoci.put(dbref, geneLoci);
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * @param alignment
+   * @param dataset
+   * @param dna
+   * @param xrefs
+   * @param xrefsAlignment
+   * @return
+   */
+  protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
+          AlignmentI dataset, boolean dna, AlignmentI xrefs,
+          AlignmentI xrefsAlignment)
+  {
+    AlignmentI copyAlignment;
+    boolean copyAlignmentIsAligned = false;
+    if (dna)
+    {
+      copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+              xrefsAlignment.getSequencesArray());
+      if (copyAlignment.getHeight() == 0)
+      {
+        JvOptionPane.showMessageDialog(alignFrame,
+                MessageManager.getString("label.cant_map_cds"),
+                MessageManager.getString("label.operation_failed"),
+                JvOptionPane.OK_OPTION);
+        System.err.println("Failed to make CDS alignment");
+        return null;
+      }
+
+      /*
+       * pending getting Embl transcripts to 'align', 
+       * we are only doing this for Ensembl
+       */
+      // TODO proper criteria for 'can align as cdna'
+      if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+              || AlignmentUtils.looksLikeEnsembl(alignment))
+      {
+        copyAlignment.alignAs(alignment);
+        copyAlignmentIsAligned = true;
+      }
+    }
+    else
+    {
+      copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+              xrefs.getSequencesArray(), dataset);
+    }
+    copyAlignment
+            .setGapCharacter(alignFrame.viewport.getGapCharacter());
+
+    StructureSelectionManager ssm = StructureSelectionManager
+            .getStructureSelectionManager(Desktop.instance);
+
+    /*
+     * register any new mappings for sequence mouseover etc
+     * (will not duplicate any previously registered mappings)
+     */
+    ssm.registerMappings(dataset.getCodonFrames());
+
+    if (copyAlignment.getHeight() <= 0)
+    {
+      System.err.println(
+              "No Sequences generated for xRef type " + source);
+      return null;
+    }
+
+    /*
+     * align protein to dna
+     */
+    if (dna && copyAlignmentIsAligned)
+    {
+      xrefsAlignment.alignAs(copyAlignment);
+    }
+    else
+    {
+      /*
+       * align cdna to protein - currently only if 
+       * fetching and aligning Ensembl transcripts!
+       */
+      // TODO: generalise for other sources of locus/transcript/cds data
+      if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
+      {
+        copyAlignment.alignAs(xrefsAlignment);
+      }
+    }
+
+    return copyAlignment;
+  }
+
+  /**
    * Makes an alignment containing the given sequences, and adds them to the
    * given dataset, which is also set as the dataset for the new alignment
    * 
@@ -280,9 +509,8 @@ public class CrossRefAction implements Runnable
     for (int s = 0; s < sprods.length; s++)
     {
       sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
-      if (dataset.getSequences() == null
-              || !dataset.getSequences().contains(
-                      sprods[s].getDatasetSequence()))
+      if (dataset.getSequences() == null || !dataset.getSequences()
+              .contains(sprods[s].getDatasetSequence()))
       {
         dataset.addSequence(sprods[s].getDatasetSequence());
       }
@@ -293,20 +521,28 @@ public class CrossRefAction implements Runnable
     return al;
   }
 
-  public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel,
-          boolean _odna, String source)
+  /**
+   * Constructor
+   * 
+   * @param af
+   * @param seqs
+   * @param fromDna
+   * @param dbSource
+   */
+  CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
+          String dbSource)
   {
-    this.alignFrame = alignFrame;
-    this.sel = sel;
-    this._odna = _odna;
-    this.source = source;
+    this.alignFrame = af;
+    this.sel = seqs;
+    this._odna = fromDna;
+    this.source = dbSource;
   }
 
-  public static CrossRefAction showProductsFor(final SequenceI[] sel,
-          final boolean _odna, final String source,
+  public static CrossRefAction getHandlerFor(final SequenceI[] sel,
+          final boolean fromDna, final String source,
           final AlignFrame alignFrame)
   {
-    return new CrossRefAction(alignFrame, sel, _odna, source);
+    return new CrossRefAction(alignFrame, sel, fromDna, source);
   }
 
 }