JAL-1705 refine feature copying to including 5' and 3' overlaps
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 28 Jan 2016 16:43:51 +0000 (16:43 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 28 Jan 2016 16:43:51 +0000 (16:43 +0000)
src/jalview/analysis/AlignmentUtils.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
test/jalview/analysis/AlignmentUtilsTests.java

index fe95dca..41538eb 100644 (file)
@@ -34,6 +34,7 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntology;
 import jalview.schemes.ResidueProperties;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
@@ -1406,23 +1407,27 @@ public class AlignmentUtils
       /*
        * transfer any features on dna that overlap the CDS
        */
-      transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */);
+      transferFeatures(dnaSeq, cds, dnaToCds, null, "CDS" /* SequenceOntology.CDS */);
     }
     return cdsSequences;
   }
 
   /**
-   * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the
+   * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
    * feature start/end ranges, optionally omitting specified feature types.
+   * Returns the number of features copied.
    * 
    * @param fromSeq
    * @param toSeq
+   * @param select
+   *          if not null, only features of this type are copied (including
+   *          subtypes in the Sequence Ontology)
    * @param mapping
    *          the mapping from 'fromSeq' to 'toSeq'
    * @param omitting
    */
-  protected static void transferFeatures(SequenceI fromSeq,
-          SequenceI toSeq, MapList mapping, String... omitting)
+  public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+          MapList mapping, String select, String... omitting)
   {
     SequenceI copyTo = toSeq;
     while (copyTo.getDatasetSequence() != null)
@@ -1430,12 +1435,18 @@ public class AlignmentUtils
       copyTo = copyTo.getDatasetSequence();
     }
 
+    SequenceOntology so = SequenceOntology.getInstance();
+    int count = 0;
     SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
     if (sfs != null)
     {
       for (SequenceFeature sf : sfs)
       {
         String type = sf.getType();
+        if (select != null && !so.isA(type, select))
+        {
+          continue;
+        }
         boolean omit = false;
         for (String toOmit : omitting)
         {
@@ -1453,16 +1464,48 @@ public class AlignmentUtils
          * locate the mapped range - null if either start or end is
          * not mapped (no partial overlaps are calculated)
          */
-        int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+        int start = sf.getBegin();
+        int end = sf.getEnd();
+        int[] mappedTo = mapping.locateInTo(start, end);
+        /*
+         * if whole exon range doesn't map, try interpreting it
+         * as 5' or 3' exon overlapping the CDS range
+         */
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(end, end);
+          if (mappedTo != null)
+          {
+            /*
+             * end of exon is in CDS range - 5' overlap
+             * to a range from the start of the peptide
+             */
+            mappedTo[0] = 1;
+          }
+        }
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(start, start);
+          if (mappedTo != null)
+          {
+            /*
+             * start of exon is in CDS range - 3' overlap
+             * to a range up to the end of the peptide
+             */
+            mappedTo[1] = toSeq.getLength();
+          }
+        }
         if (mappedTo != null)
         {
           SequenceFeature copy = new SequenceFeature(sf);
           copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
           copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
           copyTo.addSequenceFeature(copy);
+          count++;
         }
       }
     }
+    return count;
   }
 
   /**
index e241874..cbeaae9 100644 (file)
@@ -1,5 +1,6 @@
 package jalview.ext.ensembl;
 
+import jalview.analysis.AlignmentUtils;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -847,7 +848,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       peptide = peptide.getDatasetSequence();
     }
   
-    mapExonFeaturesToProtein(dnaSeq, peptide, dnaToProtein);
+    AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein,
+            SequenceOntology.EXON);
 
     LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
             dnaSeq, dnaToProtein);
@@ -878,47 +880,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Transfers exon features to the corresponding mapped regions of the protein
-   * sequence. This is useful because it allows visualisation of exon boundaries
-   * on the peptide (using 'colour by label' for the exon name). Returns the
-   * number of features written.
-   * 
-   * @param dnaSeq
-   * @param peptide
-   * @param dnaToProtein
-   */
-  static int mapExonFeaturesToProtein(SequenceI dnaSeq, SequenceI peptide,
-          MapList dnaToProtein)
-  {
-    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
-    if (sfs == null)
-    {
-      return 0;
-    }
-
-    SequenceOntology so = SequenceOntology.getInstance();
-    int count = 0;
-
-    for (SequenceFeature sf : sfs)
-    {
-      if (so.isA(sf.getType(), SequenceOntology.EXON))
-      {
-        int start = sf.getBegin();
-        int end = sf.getEnd();
-        int[] mapsTo = dnaToProtein.locateInTo(start, end);
-        if (mapsTo != null)
-        {
-          SequenceFeature copy = new SequenceFeature(SequenceOntology.EXON,
-                  sf.getDescription(), mapsTo[0], mapsTo[1], 0f, null);
-          peptide.addSequenceFeature(copy);
-          count++;
-        }
-      }
-    }
-    return count;
-  }
-
-  /**
    * Builds a map whose key is position in the protein sequence, and value is an
    * array of all variants for the coding codon positions
    * 
index 09bd64e..a82a881 100644 (file)
@@ -1342,6 +1342,9 @@ public class AlignmentUtilsTests
             "AAA---CCCTTT---");
   }
 
+  /**
+   * Tests for transferring features between mapped sequences
+   */
   @Test(groups = { "Functional" })
   public void testTransferFeatures()
   {
@@ -1380,34 +1383,111 @@ public class AlignmentUtilsTests
             new int[] { 1, 6 }, 1, 1);
 
     /*
-     * behaviour of transferFeatures depends on MapList.locateInTo()
-     * if start and end positions are mapped, returns the mapped region
-     * if either is not mapped, does _not_ search for overlapped region 
+     * transferFeatures() will build 'partial overlap' for regions
+     * that partially overlap 5' or 3' (start or end) of target sequence
      */
-    AlignmentUtils.transferFeatures(dna, cds, map);
+    AlignmentUtils.transferFeatures(dna, cds, map, null);
     SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(4, sfs.length);
+    assertEquals(6, sfs.length);
 
     SequenceFeature sf = sfs[0];
+    assertEquals("type2", sf.getType());
+    assertEquals("desc2", sf.getDescription());
+    assertEquals(2f, sf.getScore());
+    assertEquals(1, sf.getBegin());
+    assertEquals(1, sf.getEnd());
+
+    sf = sfs[1];
     assertEquals("type3", sf.getType());
     assertEquals("desc3", sf.getDescription());
     assertEquals(3f, sf.getScore());
     assertEquals(1, sf.getBegin());
     assertEquals(3, sf.getEnd());
 
-    sf = sfs[1];
+    sf = sfs[2];
     assertEquals("type4", sf.getType());
     assertEquals(2, sf.getBegin());
     assertEquals(5, sf.getEnd());
 
-    sf = sfs[2];
+    sf = sfs[3];
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
 
-    sf = sfs[3];
+    sf = sfs[4];
     assertEquals("type8", sf.getType());
     assertEquals(6, sf.getBegin());
     assertEquals(6, sf.getEnd());
+
+    sf = sfs[5];
+    assertEquals("type9", sf.getType());
+    assertEquals(6, sf.getBegin());
+    assertEquals(6, sf.getEnd());
+  }
+
+  /**
+   * Tests for transferring features between mapped sequences
+   */
+  @Test(groups = { "Functional" })
+  public void testTransferFeatures_withOmit()
+  {
+    SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
+    SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
+
+    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+            new int[] { 1, 6 }, 1, 1);
+  
+    // [5, 11] maps to [2, 5]
+    dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
+            null));
+    // [4, 12] maps to [1, 6]
+    dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
+            null));
+    // [12, 12] maps to [6, 6]
+    dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
+            8f, null));
+  
+    // desc4 and desc8 are the 'omit these' varargs
+    AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
+    SequenceFeature[] sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.length);
+  
+    SequenceFeature sf = sfs[0];
+    assertEquals("type5", sf.getType());
+    assertEquals(1, sf.getBegin());
+    assertEquals(6, sf.getEnd());
+  }
+
+  /**
+   * Tests for transferring features between mapped sequences
+   */
+  @Test(groups = { "Functional" })
+  public void testTransferFeatures_withSelect()
+  {
+    SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
+    SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
+  
+    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+            new int[] { 1, 6 }, 1, 1);
+  
+    // [5, 11] maps to [2, 5]
+    dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
+            null));
+    // [4, 12] maps to [1, 6]
+    dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
+            null));
+    // [12, 12] maps to [6, 6]
+    dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
+            8f, null));
+  
+    // "type5" is the 'select this type' argument
+    AlignmentUtils.transferFeatures(dna, cds, map, "type5");
+    SequenceFeature[] sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.length);
+  
+    SequenceFeature sf = sfs[0];
+    assertEquals("type5", sf.getType());
+    assertEquals(1, sf.getBegin());
+    assertEquals(6, sf.getEnd());
   }
 }