JAL-4366 allow a 3di alignment to be loaded and shown as a split frame with existing...
authorJames Procter <j.procter@dundee.ac.uk>
Thu, 11 Jan 2024 14:44:32 +0000 (14:44 +0000)
committerJames Procter <j.procter@dundee.ac.uk>
Thu, 11 Jan 2024 14:44:32 +0000 (14:44 +0000)
src/jalview/analysis/AlignmentUtils.java
src/jalview/gui/AlignViewport.java
test/jalview/analysis/AlignmentUtilsTests.java

index be5133f..af3669f 100644 (file)
@@ -1353,7 +1353,7 @@ public class AlignmentUtils
    */
   public static boolean isMappable(AlignmentI al1, AlignmentI al2)
   {
-    if (al1 == null || al2 == null)
+    if (al1 == null || al2 == null || al1 == al2)
     {
       return false;
     }
@@ -1363,7 +1363,12 @@ public class AlignmentUtils
      */
     if (al1.isNucleotide() == al2.isNucleotide())
     {
-      return false;
+      // or if protein - check if alternate coding
+      if (al1.isNucleotide())
+      {
+        return false;
+      }
+      return check3diPeptideMapping(al1,al2);
     }
     AlignmentI dna = al1.isNucleotide() ? al1 : al2;
     AlignmentI protein = dna == al1 ? al2 : al1;
@@ -1380,6 +1385,132 @@ public class AlignmentUtils
     }
     return false;
   }
+  public static boolean check3diPeptideMapping(AlignmentI al1, AlignmentI al2)
+  {
+    if (al1.getHeight()!=al2.getHeight())
+    { return false;
+    }
+    int mappable=0;
+    for (SequenceI al1seq: al1.getSequences())
+    {
+      boolean foundMappable = false;
+      for (SequenceI al2seq:al2.getSequences())
+      {
+        if (canBuild3diMapping(al1seq,al2seq))
+        {
+          foundMappable = true;
+          break;
+        }
+      }
+      if (foundMappable)
+      {
+        mappable++;
+      }
+    }
+    if (mappable == al1.getHeight())
+    {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * exact name, start-end, and identical length non-gap sequences
+   * @param al1seq
+   * @param al2seq
+   * @return
+   */
+  public static boolean canBuild3diMapping(SequenceI al1seq,
+          SequenceI al2seq)
+  {
+    if (!al1seq.getDisplayId(true)
+            .equalsIgnoreCase(al2seq.getDisplayId(true))) {
+      return false;
+    }
+    String s1 = AlignSeq
+                    .extractGaps(jalview.util.Comparison.GapChars,
+                            al1seq.getSequenceAsString());
+    String s2 = AlignSeq.extractGaps(
+                            jalview.util.Comparison.GapChars,
+                            al2seq.getSequenceAsString());
+    return s1.length()==s2.length();
+  }
+  
+  public static boolean map3diPeptideToProteinAligment(
+          AlignmentI proteinAlignment, AlignmentI tdiAlignment)
+  {
+    if (proteinAlignment==null || tdiAlignment==null)
+    {
+      return false;
+    }
+    Set<SequenceI> mappedDna = new HashSet<>();
+    Set<SequenceI> mappedProtein = new HashSet<>();
+
+    /*
+     * First pass - map sequences where cross-references exist. This include
+     * 1-to-many mappings to support, for example, variant cDNA.
+     */
+    boolean mappingPerformed = mapProteinToTdiAlignment(proteinAlignment,
+            tdiAlignment, mappedDna, mappedProtein);
+    return mappingPerformed;
+
+    
+  }
+  
+
+  /**
+   * Make mappings between compatible sequences (ids are identical, length of seqs are identical).
+   * 
+   * @param proteinAlignment
+   * @param tdiAlignment
+   * @param mappedTdiSeq
+   *          a set of mapped DNA sequences (to add to)
+   * @param mappedProtein
+   *          a set of mapped Protein sequences (to add to)
+   * @param xrefsOnly
+   *          if true, only map sequences where xrefs exist
+   * @return
+   */
+  protected static boolean mapProteinToTdiAlignment(
+          final AlignmentI proteinAlignment, final AlignmentI tdiAlignment,
+          Set<SequenceI> mappedTdiSeq, Set<SequenceI> mappedProtein)
+  {
+    boolean mappingExistsOrAdded = false;
+    List<SequenceI> thisSeqs = proteinAlignment.getSequences();
+    for (SequenceI aaSeq : thisSeqs)
+    {
+      boolean proteinMapped = false;
+      AlignedCodonFrame acf = new AlignedCodonFrame();
+
+      for (SequenceI cdnaSeq : tdiAlignment.getSequences())
+      {
+        
+        if (mappingExists(proteinAlignment.getCodonFrames(),
+                aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))
+        {
+          mappingExistsOrAdded = true;
+        }
+        else
+        {
+          if (canBuild3diMapping(aaSeq, cdnaSeq))
+          {
+            MapList map = new MapList(new int[] { aaSeq.getStart(),aaSeq.getEnd()},new int[] { cdnaSeq.getStart(),cdnaSeq.getEnd()},1,1);
+            acf.addMap(cdnaSeq, aaSeq, map);
+            mappingExistsOrAdded = true;
+            proteinMapped = true;
+            mappedTdiSeq.add(cdnaSeq);
+            mappedProtein.add(aaSeq);
+          }
+        }
+      }
+      if (proteinMapped)
+      {
+        proteinAlignment.addCodonFrame(acf);
+      }
+    }
+    return mappingExistsOrAdded;
+  }
+
 
   /**
    * Returns true if the dna sequence is mapped, or could be mapped, to the
index a8bc815..d56764b 100644 (file)
@@ -830,18 +830,31 @@ public class AlignViewport extends AlignmentViewport
     AlignmentI thisAlignment = newWindowOrSplitPane
             ? new Alignment(getAlignment())
             : getAlignment();
-    AlignmentI protein = al.isNucleotide() ? thisAlignment : al;
-    final AlignmentI cdna = al.isNucleotide() ? al : thisAlignment;
-
-    /*
-     * Map sequences. At least one should get mapped as we have already passed
-     * the test for 'mappability'. Any mappings made will be added to the
-     * protein alignment. Note creating dataset sequences on the new alignment
-     * is a pre-requisite for building mappings.
-     */
+    
+    // always create dataset for imported alignment before doing anything else..
     al.setDataset(null);
-    AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna);
+    
+    if (!al.isNucleotide() && !thisAlignment.isNucleotide())
+    {
+      // link AA to 3di or other kind of 'alternative' 1:1 mapping alignment
+      AlignmentUtils.map3diPeptideToProteinAligment(thisAlignment,al);
 
+    }
+    else
+    {
+      // link CODON triplets to Protein
+      AlignmentI protein = al.isNucleotide() ? thisAlignment : al;
+      final AlignmentI cdna = al.isNucleotide() ? al : thisAlignment;
+
+      /*
+       * Map sequences. At least one should get mapped as we have already passed
+       * the test for 'mappability'. Any mappings made will be added to the
+       * protein alignment. Note creating dataset sequences on the new alignment
+       * is a pre-requisite for building mappings.
+       */
+      AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna);
+    }
+    
     /*
      * Create the AlignFrame for the added alignment. If it is protein, mappings
      * are registered with StructureSelectionManager as a side-effect.
@@ -876,7 +889,7 @@ public class AlignViewport extends AlignmentViewport
     if (newWindowOrSplitPane)
     {
       al.alignAs(thisAlignment);
-      protein = openSplitFrame(newAlignFrame, thisAlignment);
+      AlignmentI mapped = openSplitFrame(newAlignFrame, thisAlignment);
     }
   }
 
index f017662..044f2ef 100644 (file)
@@ -1438,8 +1438,12 @@ public class AlignmentUtilsTests
   {
     SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
     SequenceI aa1 = new Sequence("aa1", "RSG");
+    SequenceI td1 = new Sequence("aa1", "QRV");
+    SequenceI td2 = new Sequence("aa2", "QRV");
     AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
     AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
+    AlignmentI al3 = new Alignment(new SequenceI[] { td1 });
+    AlignmentI al4 = new Alignment(new SequenceI[] { td2 });
 
     assertFalse(AlignmentUtils.isMappable(null, null));
     assertFalse(AlignmentUtils.isMappable(al1, null));
@@ -1447,8 +1451,19 @@ public class AlignmentUtilsTests
     assertFalse(AlignmentUtils.isMappable(al1, al1));
     assertFalse(AlignmentUtils.isMappable(al2, al2));
 
+   
     assertTrue(AlignmentUtils.isMappable(al1, al2));
     assertTrue(AlignmentUtils.isMappable(al2, al1));
+
+
+    // test 3di/peptide mappability 
+    assertFalse(AlignmentUtils.isMappable(al1, al3));
+    assertFalse(AlignmentUtils.isMappable(al2, al4));
+    assertFalse(AlignmentUtils.isMappable(al3, al4));
+
+    
+    assertTrue(AlignmentUtils.isMappable(al2, al3));
+    assertTrue(AlignmentUtils.isMappable(al3, al2));
   }
 
   /**