JAL-1705 align CDS and peptide products to transcripts
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 1 Mar 2016 12:09:47 +0000 (12:09 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 1 Mar 2016 12:09:47 +0000 (12:09 +0000)
12 files changed:
src/jalview/datamodel/Sequence.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/gui/AlignFrame.java
src/jalview/schemes/FeatureColourScheme.java [deleted file]
src/jalview/schemes/FeatureSettingsAdapter.java [new file with mode: 0644]
src/jalview/util/MapList.java
src/jalview/util/MappingUtils.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/datamodel/SequenceTest.java
test/jalview/util/MapListTest.java
test/jalview/util/MappingUtilsTest.java

index 7b05649..3ea510b 100755 (executable)
@@ -611,17 +611,15 @@ public class Sequence extends ASequence implements SequenceI
   }
 
   /**
-   * DOCUMENT ME!
-   * 
-   * @param i
-   *          DOCUMENT ME!
+   * Returns the character of the aligned sequence at the given position (base
+   * zero), or space if the position is not within the sequence's bounds
    * 
-   * @return DOCUMENT ME!
+   * @return
    */
   @Override
   public char getCharAt(int i)
   {
-    if (i < sequence.length)
+    if (i >= 0 && i < sequence.length)
     {
       return sequence[i];
     }
index df4e45a..6507ff5 100644 (file)
@@ -1,5 +1,6 @@
 package jalview.ext.ensembl;
 
+import jalview.api.FeatureColourI;
 import jalview.api.FeatureSettingsI;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Sequence;
@@ -7,10 +8,12 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
-import jalview.schemes.FeatureColourScheme;
+import jalview.schemes.FeatureColourAdapter;
+import jalview.schemes.FeatureSettingsAdapter;
 import jalview.util.MapList;
 import jalview.util.StringUtils;
 
+import java.awt.Color;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -117,6 +120,9 @@ public class EnsemblGene extends EnsemblSeqProxy
       return getSequenceRecords(theIds);
     }
 
+    /*
+     * fetch the gene sequence(s) with features and xrefs
+     */
     AlignmentI al = super.getSequenceRecords(query);
 
     /*
@@ -173,6 +179,36 @@ public class EnsemblGene extends EnsemblSeqProxy
     {
       makeTranscript(transcriptFeature, al, gene);
     }
+
+    clearGeneFeatures(gene);
+  }
+
+  /**
+   * Remove unwanted features (transcript, exon, CDS) from the gene sequence
+   * after we have used them to derive transcripts and transfer features
+   * 
+   * @param gene
+   */
+  protected void clearGeneFeatures(SequenceI gene)
+  {
+    SequenceFeature[] sfs = gene.getSequenceFeatures();
+    if (sfs != null)
+    {
+      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+      List<SequenceFeature> filtered = new ArrayList<SequenceFeature>();
+      for (SequenceFeature sf : sfs)
+      {
+        String type = sf.getType();
+        if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON)
+                && !so.isA(type, SequenceOntologyI.CDS))
+        {
+          filtered.add(sf);
+        }
+      }
+      gene.setSequenceFeatures(filtered
+              .toArray(new SequenceFeature[filtered
+              .size()]));
+    }
   }
 
   /**
@@ -362,13 +398,13 @@ public class EnsemblGene extends EnsemblSeqProxy
   @Override
   protected boolean retainFeature(SequenceFeature sf, String accessionId)
   {
-    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-            SequenceOntologyI.GENE))
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+    String type = sf.getType();
+    if (so.isA(type, SequenceOntologyI.GENE))
     {
       return false;
     }
-
-    if (isTranscript(sf.getType()))
+    if (isTranscript(type))
     {
       String parent = (String) sf.getValue(PARENT);
       if (!(GENE_PREFIX + accessionId).equals(parent))
@@ -417,7 +453,70 @@ public class EnsemblGene extends EnsemblSeqProxy
   @Override
   public FeatureSettingsI getFeatureColourScheme()
   {
-    return FeatureColourScheme.EnsemblVariants;
+    return new FeatureSettingsAdapter()
+    {
+      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+      @Override
+      public boolean isFeatureDisplayed(String type)
+      {
+        return (so.isA(type, SequenceOntologyI.EXON) || so.isA(type,
+                SequenceOntologyI.SEQUENCE_VARIANT));
+      }
+
+      @Override
+      public FeatureColourI getFeatureColour(String type)
+      {
+        if (so.isA(type, SequenceOntologyI.EXON))
+        {
+          return new FeatureColourAdapter()
+          {
+            @Override
+            public boolean isColourByLabel()
+            {
+              return true;
+            }
+          };
+        }
+        if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT))
+        {
+          return new FeatureColourAdapter()
+          {
+
+            @Override
+            public Color getColour()
+            {
+              return Color.RED;
+            }
+          };
+        }
+        return null;
+      }
+
+      /**
+       * order to render sequence_variant after exon after the rest
+       */
+      @Override
+      public int compare(String feature1, String feature2)
+      {
+        if (so.isA(feature1, SequenceOntologyI.SEQUENCE_VARIANT))
+        {
+          return +1;
+        }
+        if (so.isA(feature2, SequenceOntologyI.SEQUENCE_VARIANT))
+        {
+          return -1;
+        }
+        if (so.isA(feature1, SequenceOntologyI.EXON))
+        {
+          return +1;
+        }
+        if (so.isA(feature2, SequenceOntologyI.EXON))
+        {
+          return -1;
+        }
+        return 0;
+      }
+    };
   }
 
 }
index 77263ff..869a702 100644 (file)
@@ -343,6 +343,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     int mappedDnaLength = getCdsRanges(dnaSeq, ranges);
 
     int proteinLength = proteinSeq.getLength();
+    int proteinEnd = proteinLength;
     int proteinStart = 1;
 
     /*
@@ -367,7 +368,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     }
     if (codesForResidues == proteinLength)
     {
-      proteinRange.add(new int[] { proteinStart, proteinLength });
+      proteinRange.add(new int[] { proteinStart, proteinEnd });
       return new MapList(ranges, proteinRange, 3, 1);
     }
     return null;
index 7777324..8d54f08 100644 (file)
@@ -4780,11 +4780,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               AlignmentI copyAlignment = null;
               final SequenceI[] sequenceSelection = AlignFrame.this.viewport
                       .getSequenceSelection();
+              final char gapChar = AlignFrame.this.viewport
+                      .getGapCharacter();
               List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
               if (dna)
               {
                 copyAlignment = AlignmentUtils.makeCdsAlignment(
-                        sequenceSelection, cf);
+                        sequenceSelection, cf, gapChar);
                 al.getCodonFrames().clear();
                 al.getCodonFrames().addAll(cf);
               }
@@ -4793,6 +4795,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                 copyAlignment = new Alignment(new Alignment(
                         sequenceSelection));
                 copyAlignment.getCodonFrames().addAll(cf);
+                copyAlignment.setGapCharacter(gapChar);
               }
               StructureSelectionManager ssm = StructureSelectionManager
                       .getStructureSelectionManager(Desktop.instance);
@@ -4810,15 +4813,14 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               /*
                * align protein to dna
                */
-              // TODO needs debugging
-              // if (dna)
-              // {
-              // al.alignAs(copyAlignment);
-              // }
-              // else
-              // {
-              // copyAlignment.alignAs(al);
-              // }
+              if (dna)
+              {
+                al.alignAs(copyAlignment);
+              }
+              else
+              {
+                copyAlignment.alignAs(al);
+              }
 
               AlignFrame copyThis = new AlignFrame(copyAlignment,
                       AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
diff --git a/src/jalview/schemes/FeatureColourScheme.java b/src/jalview/schemes/FeatureColourScheme.java
deleted file mode 100644 (file)
index 308495c..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-package jalview.schemes;
-
-import jalview.api.FeatureColourI;
-import jalview.api.FeatureSettingsI;
-
-import java.awt.Color;
-
-/**
- * Pre-set configurations for feature settings
- * 
- * @author gmcarstairs
- *
- */
-public enum FeatureColourScheme implements FeatureSettingsI
-{
-  /**
-   * Show sequence variants in red, on top of exons coloured by label
-   */
-  EnsemblVariants
-  {
-
-    @Override
-    public boolean isFeatureDisplayed(String type)
-    {
-      // TODO accept SO sub-types of these features
-      // if (SequenceOntologyFactory.getInstance().isA(SequenceOntologyI.EXON...
-      return (EXON.equals(type) || SEQUENCE_VARIANT.equals(type));
-    }
-
-    @Override
-    public boolean isGroupDisplayed(String group)
-    {
-      return true;
-    }
-
-    @Override
-    public FeatureColourI getFeatureColour(String type)
-    {
-      if (EXON.equals(type))
-      {
-        return new FeatureColourAdapter()
-        {
-          @Override
-          public boolean isColourByLabel()
-          {
-            return true;
-          }
-        };
-      }
-      if (SEQUENCE_VARIANT.equals(type))
-      {
-        return new FeatureColourAdapter()
-        {
-
-          @Override
-          public Color getColour()
-          {
-            return Color.RED;
-          }
-        };
-      }
-      return null;
-    }
-
-    @Override
-    public float getTransparency()
-    {
-      return 1f;
-    }
-
-    /**
-     * order to render sequence_variant after exon after the rest
-     */
-    @Override
-    public int compare(String feature1, String feature2)
-    {
-      if (SEQUENCE_VARIANT.equals(feature1))
-      {
-        return +1;
-      }
-      if (SEQUENCE_VARIANT.equals(feature2))
-      {
-        return -1;
-      }
-      if (EXON.equals(feature1))
-      {
-        return +1;
-      }
-      if (EXON.equals(feature2))
-      {
-        return -1;
-      }
-      return 0;
-    }
-
-    @Override
-    public boolean optimiseOrder()
-    {
-      return false;
-    };
-
-  };
-
-  // SequenceOntologyI.SEQUENCE_VARIANT
-  private static final String SEQUENCE_VARIANT = "sequence_variant";
-
-  // SequenceOntologyI.EXON
-  private static final String EXON = "exon";
-}
diff --git a/src/jalview/schemes/FeatureSettingsAdapter.java b/src/jalview/schemes/FeatureSettingsAdapter.java
new file mode 100644 (file)
index 0000000..52b01ce
--- /dev/null
@@ -0,0 +1,48 @@
+package jalview.schemes;
+
+import jalview.api.FeatureColourI;
+import jalview.api.FeatureSettingsI;
+
+/**
+ * An adapter class that may be extended to instantiate feature colour schemes
+ */
+public class FeatureSettingsAdapter implements FeatureSettingsI
+{
+
+  @Override
+  public boolean isFeatureDisplayed(String type)
+  {
+    return false;
+  }
+
+  @Override
+  public boolean isGroupDisplayed(String group)
+  {
+    return true;
+  }
+
+  @Override
+  public FeatureColourI getFeatureColour(String type)
+  {
+    return null;
+  }
+
+  @Override
+  public float getTransparency()
+  {
+    return 1f;
+  }
+
+  @Override
+  public int compare(String feature1, String feature2)
+  {
+    return 0;
+  }
+
+  @Override
+  public boolean optimiseOrder()
+  {
+    return false;
+  }
+
+}
index bf66b91..34a8926 100644 (file)
@@ -322,12 +322,14 @@ public class MapList
   }
 
   /**
-   * Consolidates a list of ranges so that any contiguous ranges are merged
+   * Consolidates a list of ranges so that any contiguous ranges are merged.
+   * This assumes the ranges are already in start order (does not sort them).
    * 
    * @param ranges
-   * @return
+   * @return the same list (if unchanged), else a new merged list, leaving the
+   *         input list unchanged
    */
-  public static List<int[]> coalesceRanges(List<int[]> ranges)
+  public static List<int[]> coalesceRanges(final List<int[]> ranges)
   {
     if (ranges == null || ranges.size() < 2) {
       return ranges;
@@ -337,31 +339,56 @@ public class MapList
     List<int[]> merged = new ArrayList<int[]>();
     int[] lastRange = ranges.get(0);
     int lastDirection = lastRange[1] >= lastRange[0] ? 1 : -1;
+    lastRange = new int[] { lastRange[0], lastRange[1] };
     merged.add(lastRange);
+    boolean first = true;
     
-    for (int[] range : ranges)
+    for (final int[] range : ranges)
     {
-      if (range == lastRange)
+      if (first)
       {
+        first = false;
         continue;
       }
+      if (range[0] == lastRange[0] && range[1] == lastRange[1])
+      {
+        // drop duplicate range
+        changed = true;
+        continue;
+      }
+
+      /*
+       * drop this range if it lies within the last range
+       */
+      if ((lastDirection == 1 && range[0] >= lastRange[0]
+              && range[0] <= lastRange[1] && range[1] >= lastRange[0] && range[1] <= lastRange[1])
+              || (lastDirection == -1 && range[0] <= lastRange[0]
+                      && range[0] >= lastRange[1]
+                      && range[1] <= lastRange[0] && range[1] >= lastRange[1]))
+      {
+        changed = true;
+        continue;
+      }
+
       int direction = range[1] >= range[0] ? 1 : -1;
 
       /*
        * if next range is in the same direction as last and contiguous,
        * just update the end position of the last range
        */
-      if ((range[1] == range[0] || direction == lastDirection)
-              && (range[0] == lastRange[1] || range[0] == lastRange[1]
-                      + lastDirection))
+      boolean sameDirection = range[1] == range[0] || direction == lastDirection;
+      boolean extending = range[0] == lastRange[1] + lastDirection;
+      boolean overlapping = (lastDirection == 1 && range[0] >= lastRange[0] && range[0] <= lastRange[1])
+              || (lastDirection == -1 && range[0] <= lastRange[0] && range[0] >= lastRange[1]);
+      if (sameDirection && (overlapping || extending))
       {
         lastRange[1] = range[1];
         changed = true;
       }
       else
       {
-        merged.add(range);
-        lastRange = range;
+        lastRange = new int[] { range[0], range[1] };
+        merged.add(lastRange);
         // careful: merging [5, 5] after [7, 6] should keep negative direction
         lastDirection = (range[1] == range[0]) ? lastDirection : direction;
       }
index 267e871..16db13a 100644 (file)
@@ -819,4 +819,57 @@ public final class MappingUtils
       it.remove();
     }
   }
+
+  /**
+   * Returns the total length of the supplied ranges
+   * 
+   * @param ranges
+   * @return
+   */
+  public static int getLength(List<int[]> ranges)
+  {
+    if (ranges == null)
+    {
+      return 0;
+    }
+    int length = 0;
+    for (int[] range : ranges)
+    {
+      length += Math.abs(range[1] - range[0]) + 1;
+    }
+    return length;
+  }
+
+  /**
+   * Answers true if any range includes the given value
+   * 
+   * @param ranges
+   * @param value
+   * @return
+   */
+  public static boolean contains(List<int[]> ranges, int value)
+  {
+    if (ranges == null)
+    {
+      return false;
+    }
+    for (int[] range : ranges)
+    {
+      if (range[1] >= range[0] && value >= range[0] && value <= range[1])
+      {
+        /*
+         * value within ascending range
+         */
+        return true;
+      }
+      if (range[1] < range[0] && value <= range[0] && value >= range[1])
+      {
+        /*
+         * value within descending range
+         */
+        return true;
+      }
+    }
+    return false;
+  }
 }
index a82a881..818267d 100644 (file)
@@ -1014,6 +1014,16 @@ public class AlignmentUtilsTests
     dna2.createDatasetSequence();
     pep1.createDatasetSequence();
     pep2.createDatasetSequence();
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
+            null));
 
     List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     MapList map = new MapList(new int[] { 4, 6, 10, 12 },
@@ -1028,10 +1038,12 @@ public class AlignmentUtilsTests
     mappings.add(acf);
 
     AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
-        dna1, dna2 }, mappings);
+        dna1, dna2 }, mappings, '-');
     assertEquals(2, cds.getSequences().size());
-    assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
-    assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+    assertEquals("---GGG---TTT---", cds.getSequenceAt(0)
+            .getSequenceAsString());
+    assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1)
+            .getSequenceAsString());
 
     /*
      * Verify updated mappings
@@ -1048,14 +1060,14 @@ public class AlignmentUtilsTests
     SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
     assertEquals(1, sr.getResults().size());
     Match m = sr.getResults().get(0);
-    assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+    assertSame(cds.getSequenceAt(0).getDatasetSequence(),
             m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
     // map F to TTT
     sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
     m = sr.getResults().get(0);
-    assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+    assertSame(cds.getSequenceAt(0).getDatasetSequence(),
             m.getSequence());
     assertEquals(4, m.getStart());
     assertEquals(6, m.getEnd());
@@ -1070,21 +1082,21 @@ public class AlignmentUtilsTests
     sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
     assertEquals(1, sr.getResults().size());
     m = sr.getResults().get(0);
-    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+    assertSame(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
     // map F to TTT
     sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
     m = sr.getResults().get(0);
-    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+    assertSame(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(4, m.getStart());
     assertEquals(6, m.getEnd());
     // map P to CCC
     sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
     m = sr.getResults().get(0);
-    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+    assertSame(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(7, m.getStart());
     assertEquals(9, m.getEnd());
@@ -1118,8 +1130,12 @@ public class AlignmentUtilsTests
     mappings.add(acf);
 
     AlignedCodonFrame newMapping = new AlignedCodonFrame();
+    List<int[]> ungappedColumns = new ArrayList<int[]>();
+    ungappedColumns.add(new int[] { 4, 6 });
+    ungappedColumns.add(new int[] { 10, 12 });
     List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
-            newMapping);
+            ungappedColumns,
+            newMapping, '-');
     assertEquals(1, cdsSeqs.size());
     SequenceI cdsSeq = cdsSeqs.get(0);
 
@@ -1148,6 +1164,18 @@ public class AlignmentUtilsTests
     pep1.createDatasetSequence();
     pep2.createDatasetSequence();
     pep3.createDatasetSequence();
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
+            null));
     pep1.getDatasetSequence().addDBRef(
             new DBRefEntry("EMBLCDS", "2", "A12345"));
     pep2.getDatasetSequence().addDBRef(
@@ -1156,9 +1184,7 @@ public class AlignmentUtilsTests
             new DBRefEntry("EMBLCDS", "4", "A12347"));
 
     /*
-     * Make the mappings from dna to protein. Using LinkedHashset is a
-     * convenience so results are in the input order. There is no assertion that
-     * the generated exon sequences are in any particular order.
+     * Make the mappings from dna to protein
      */
     List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     // map ...GGG...TTT to GF
@@ -1185,7 +1211,7 @@ public class AlignmentUtilsTests
      * exon-to-protein and exon-to-dna mappings
      */
     AlignmentI exal = AlignmentUtils.makeCdsAlignment(
-            new SequenceI[] { dna1 }, mappings);
+            new SequenceI[] { dna1 }, mappings, '-');
 
     /*
      * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
@@ -1194,7 +1220,7 @@ public class AlignmentUtilsTests
     assertEquals(3, cds.size());
 
     SequenceI cdsSeq = cds.get(0);
-    assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+    assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString());
     assertEquals("dna1|A12345", cdsSeq.getName());
     assertEquals(1, cdsSeq.getDBRefs().length);
     DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
@@ -1203,7 +1229,7 @@ public class AlignmentUtilsTests
     assertEquals("A12345", cdsRef.getAccessionId());
 
     cdsSeq = cds.get(1);
-    assertEquals("aaaccc", cdsSeq.getSequenceAsString());
+    assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString());
     assertEquals("dna1|A12346", cdsSeq.getName());
     assertEquals(1, cdsSeq.getDBRefs().length);
     cdsRef = cdsSeq.getDBRefs()[0];
@@ -1212,7 +1238,7 @@ public class AlignmentUtilsTests
     assertEquals("A12346", cdsRef.getAccessionId());
 
     cdsSeq = cds.get(2);
-    assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
+    assertEquals("aaa------TTT", cdsSeq.getSequenceAsString());
     assertEquals("dna1|A12347", cdsSeq.getName());
     assertEquals(1, cdsSeq.getDBRefs().length);
     cdsRef = cdsSeq.getDBRefs()[0];
@@ -1490,4 +1516,184 @@ public class AlignmentUtilsTests
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
   }
+
+  /**
+   * Test the method that extracts the cds-only part of a dna alignment, for the
+   * case where the cds should be aligned to match its nucleotide sequence.
+   */
+  @Test(groups = { "Functional" })
+  public void testMakeCdsAlignment_alternativeTranscripts()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
+    // alternative transcript of same dna skips CCC codon
+    SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
+    // dna3 has no mapping (protein product) so should be ignored here
+    SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
+    SequenceI pep1 = new Sequence("pep1", "GPFG");
+    SequenceI pep2 = new Sequence("pep2", "GPG");
+    dna1.createDatasetSequence();
+    dna2.createDatasetSequence();
+    dna3.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep2.createDatasetSequence();
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
+            null));
+    dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
+            null));
+    dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
+            null));
+  
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
+    MapList map = new MapList(new int[] { 4, 12, 16, 18 },
+            new int[] { 1, 4 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+    map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
+            new int[] { 1, 3 },
+            3, 1);
+    acf = new AlignedCodonFrame();
+    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    mappings.add(acf);
+  
+    AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+        dna1, dna2, dna3 }, mappings, '-');
+    assertEquals(2, cds.getSequences().size());
+    assertEquals("GGGCCCTTTGGG", cds.getSequenceAt(0).getSequenceAsString());
+    assertEquals("GGGCC---TGGG", cds.getSequenceAt(1).getSequenceAsString());
+  
+    /*
+     * Verify updated mappings
+     */
+    assertEquals(2, mappings.size());
+  
+    /*
+     * Mapping from pep1 to GGGTTT in first new CDS sequence
+     */
+    List<AlignedCodonFrame> pep1Mapping = MappingUtils
+            .findMappingsForSequence(pep1, mappings);
+    assertEquals(1, pep1Mapping.size());
+    /*
+     * maps GPFG to 1-3,4-6,7-9,10-12
+     */
+    SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    Match m = sr.getResults().get(0);
+    assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+    sr = MappingUtils.buildSearchResults(pep1, 3, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(7, m.getStart());
+    assertEquals(9, m.getEnd());
+    sr = MappingUtils.buildSearchResults(pep1, 4, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(10, m.getStart());
+    assertEquals(12, m.getEnd());
+  
+    /*
+     * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
+     */
+    List<AlignedCodonFrame> pep2Mapping = MappingUtils
+            .findMappingsForSequence(pep2, mappings);
+    assertEquals(1, pep2Mapping.size());
+    sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    m = sr.getResults().get(0);
+    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+    sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(7, m.getStart());
+    assertEquals(9, m.getEnd());
+  }
+
+  /**
+   * Tests for gapped column in sequences
+   */
+  @Test(groups = { "Functional" })
+  public void testIsGappedColumn()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g");
+    SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g");
+    SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-");
+    List<SequenceI> seqs = Arrays
+            .asList(new SequenceI[] { seq1, seq2, seq3 });
+    // the column number is base 1
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 1));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 2));
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, 3));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 4));
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, 5));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 6));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 7));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 8));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 9));
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, 10));
+    assertFalse(AlignmentUtils.isGappedColumn(seqs, 11));
+    // out of bounds:
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, 0));
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, 100));
+    assertTrue(AlignmentUtils.isGappedColumn(seqs, -100));
+    assertTrue(AlignmentUtils.isGappedColumn(null, 0));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testFindCdsColumns()
+  {
+    // TODO target method belongs in a general-purpose alignment
+    // analysis method to find columns for feature
+
+    /*
+     * NB this method assumes CDS ranges are contiguous (no introns)
+     */
+    SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt");
+    SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-");
+    SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt");
+    seq1.createDatasetSequence();
+    seq2.createDatasetSequence();
+    seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f,
+            null));
+    seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f,
+            null));
+    seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f,
+            null));
+    seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f,
+            null));
+    seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f,
+            null));
+    seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f,
+            null));
+    seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f,
+            null));
+    seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
+            null));
+    seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f,
+            null));
+
+    List<int[]> cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] {
+        seq1, seq2 });
+    assertEquals(4, cdsColumns.size());
+    assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0)));
+    assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1)));
+    assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2)));
+    assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3)));
+  }
 }
index 0d40037..b8116f5 100644 (file)
@@ -523,4 +523,14 @@ public class SequenceTest
     assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
     assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
   }
+
+  @Test(groups = "Functional")
+  public void testGetCharAt()
+  {
+    SequenceI sq = new Sequence("", "abcde");
+    assertEquals('a', sq.getCharAt(0));
+    assertEquals('e', sq.getCharAt(4));
+    assertEquals(' ', sq.getCharAt(5));
+    assertEquals(' ', sq.getCharAt(-1));
+  }
 }
index 2520de0..c442b6f 100644 (file)
@@ -697,6 +697,12 @@ public class MapListTest
     List<int[]> merged = MapList.coalesceRanges(ranges);
     assertEquals(1, merged.size());
     assertArrayEquals(new int[] { 1, 7 }, merged.get(0));
+    // verify input list is unchanged
+    assertEquals(4, ranges.size());
+    assertArrayEquals(new int[] { 1, 3 }, ranges.get(0));
+    assertArrayEquals(new int[] { 4, 5 }, ranges.get(1));
+    assertArrayEquals(new int[] { 5, 5 }, ranges.get(2));
+    assertArrayEquals(new int[] { 5, 7 }, ranges.get(3));
 
     // merging in reverse direction:
     ranges.clear();
@@ -723,4 +729,62 @@ public class MapListTest
     assertArrayEquals(new int[] { 1, 6 }, merged.get(0));
     assertArrayEquals(new int[] { 12, 7 }, merged.get(1));
   }
+
+  /**
+   * Test the method that merges a list of ranges where possible
+   */
+  @Test(groups = { "Functional" })
+  public void testCoalesceRanges_withOverlap()
+  {
+    List<int[]> ranges = new ArrayList<int[]>();
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 2, 5 });
+
+    /*
+     * [2, 5] should extend [1, 3]
+     */
+    List<int[]> merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 1, 5 }, merged.get(0));
+
+    /*
+     * a subsumed interval should be dropped
+     */
+    ranges.clear();
+    ranges.add(new int[] { 1, 6 });
+    ranges.add(new int[] { 2, 4 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 1, 6 }, merged.get(0));
+
+    ranges.clear();
+    ranges.add(new int[] { 1, 5 });
+    ranges.add(new int[] { 1, 6 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 1, 6 }, merged.get(0));
+
+    /*
+     * merge duplicate ranges
+     */
+    ranges.clear();
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 1, 3 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 1, 3 }, merged.get(0));
+
+    /*
+     * reverse direction
+     */
+    ranges.clear();
+    ranges.add(new int[] { 9, 5 });
+    ranges.add(new int[] { 9, 4 });
+    ranges.add(new int[] { 8, 3 });
+    ranges.add(new int[] { 3, 2 });
+    ranges.add(new int[] { 1, 0 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 9, 0 }, merged.get(0));
+  }
 }
index 095ab1b..3c4d4f8 100644 (file)
@@ -21,6 +21,7 @@
 package jalview.util;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
@@ -911,4 +912,58 @@ public class MappingUtilsTest
     assertEquals(1, ranges.size());
     assertArrayEquals(new int[] { 12, 7 }, ranges.get(0));
   }
+
+  @Test(groups = { "Functional" })
+  public void testGetLength()
+  {
+    assertEquals(0, MappingUtils.getLength(null));
+    List<int[]> ranges = new ArrayList<int[]>();
+    assertEquals(0, MappingUtils.getLength(ranges));
+    ranges.add(new int[] { 1, 1 });
+    assertEquals(1, MappingUtils.getLength(ranges));
+    ranges.add(new int[] { 2, 10 });
+    assertEquals(10, MappingUtils.getLength(ranges));
+    ranges.add(new int[] { 20, 10 });
+    assertEquals(21, MappingUtils.getLength(ranges));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testContains()
+  {
+    assertFalse(MappingUtils.contains(null, 1));
+    List<int[]> ranges = new ArrayList<int[]>();
+    assertFalse(MappingUtils.contains(ranges, 1));
+
+    ranges.add(new int[] { 1, 4 });
+    ranges.add(new int[] { 6, 6 });
+    ranges.add(new int[] { 8, 10 });
+    ranges.add(new int[] { 30, 20 });
+    ranges.add(new int[] { -16, -44 });
+
+    assertFalse(MappingUtils.contains(ranges, 0));
+    assertTrue(MappingUtils.contains(ranges, 1));
+    assertTrue(MappingUtils.contains(ranges, 2));
+    assertTrue(MappingUtils.contains(ranges, 3));
+    assertTrue(MappingUtils.contains(ranges, 4));
+    assertFalse(MappingUtils.contains(ranges, 5));
+
+    assertTrue(MappingUtils.contains(ranges, 6));
+    assertFalse(MappingUtils.contains(ranges, 7));
+
+    assertTrue(MappingUtils.contains(ranges, 8));
+    assertTrue(MappingUtils.contains(ranges, 9));
+    assertTrue(MappingUtils.contains(ranges, 10));
+
+    assertFalse(MappingUtils.contains(ranges, 31));
+    assertTrue(MappingUtils.contains(ranges, 30));
+    assertTrue(MappingUtils.contains(ranges, 29));
+    assertTrue(MappingUtils.contains(ranges, 20));
+    assertFalse(MappingUtils.contains(ranges, 19));
+
+    assertFalse(MappingUtils.contains(ranges, -15));
+    assertTrue(MappingUtils.contains(ranges, -16));
+    assertTrue(MappingUtils.contains(ranges, -44));
+    assertFalse(MappingUtils.contains(ranges, -45));
+  }
+
 }