JAL-1793 spike branch updated to latest

[jalview.git] / src / jalview / analysis / AlignmentUtils.java
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 1b8f84f..bef667d 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -29,20 +29,22 @@ import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
  import jalview.datamodel.IncompleteCodonException;
  import jalview.datamodel.Mapping;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntologyFactory;
+import jalview.datamodel.features.SequenceFeatures;
+import jalview.io.gff.Gff3Helper;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.Comparison;
  import jalview.util.DBRefUtils;
+import jalview.util.IntRangeComparator;
  import jalview.util.MapList;
  import jalview.util.MappingUtils;
-import jalview.util.RangeComparator;
  import jalview.util.StringUtils;
  
  import java.io.UnsupportedEncodingException;
@@ -51,7 +53,6 @@ import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Collection;
  import java.util.Collections;
-import java.util.Comparator;
  import java.util.HashMap;
  import java.util.HashSet;
  import java.util.Iterator;
@@ -106,6 +107,15 @@ public class AlignmentUtils
      {
        return variant == null ? null : variant.getFeatureGroup();
      }
+
+    /**
+     * toString for aid in the debugger only
+     */
+    @Override
+    public String toString()
+    {
+      return base + ":" + (variant == null ? "" : variant.getDescription());
+    }
    }
  
    /**
@@ -385,7 +395,7 @@ public class AlignmentUtils
     * Answers true if the mappings include one between the given (dataset)
     * sequences.
     */
-  public static boolean mappingExists(List<AlignedCodonFrame> mappings,
+  protected static boolean mappingExists(List<AlignedCodonFrame> mappings,
            SequenceI aaSeq, SequenceI cdnaSeq)
    {
      if (mappings != null)
@@ -654,15 +664,16 @@ public class AlignmentUtils
      int toOffset = alignTo.getStart() - 1;
      int sourceGapMappedLength = 0;
      boolean inExon = false;
-    final char[] thisSeq = alignTo.getSequence();
-    final char[] thatAligned = alignFrom.getSequence();
-    StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length);
+    final int toLength = alignTo.getLength();
+    final int fromLength = alignFrom.getLength();
+    StringBuilder thisAligned = new StringBuilder(2 * toLength);
  
      /*
       * Traverse the 'model' aligned sequence
       */
-    for (char sourceChar : thatAligned)
+    for (int i = 0; i < fromLength; i++)
      {
+      char sourceChar = alignFrom.getCharAt(i);
        if (sourceChar == sourceGap)
        {
          sourceGapMappedLength += ratio;
@@ -702,9 +713,9 @@ public class AlignmentUtils
         */
        int intronLength = 0;
        while (basesWritten + toOffset < mappedCodonEnd
-              && thisSeqPos < thisSeq.length)
+              && thisSeqPos < toLength)
        {
-        final char c = thisSeq[thisSeqPos++];
+        final char c = alignTo.getCharAt(thisSeqPos++);
          if (c != myGapChar)
          {
            basesWritten++;
@@ -730,7 +741,7 @@ public class AlignmentUtils
              int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,
                      preserveUnmappedGaps, sourceGapMappedLength, inExon,
                      trailingCopiedGap.length(), intronLength, startOfCodon);
-            for (int i = 0; i < gapsToAdd; i++)
+            for (int k = 0; k < gapsToAdd; k++)
              {
                thisAligned.append(myGapChar);
              }
@@ -758,9 +769,9 @@ public class AlignmentUtils
       * At end of model aligned sequence. Copy any remaining target sequence, optionally
       * including (intron) gaps.
       */
-    while (thisSeqPos < thisSeq.length)
+    while (thisSeqPos < toLength)
      {
-      final char c = thisSeq[thisSeqPos++];
+      final char c = alignTo.getCharAt(thisSeqPos++);
        if (c != myGapChar || preserveUnmappedGaps)
        {
          thisAligned.append(c);
@@ -952,7 +963,7 @@ public class AlignmentUtils
        SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
        if (peptide != null)
        {
-        int peptideLength = peptide.getLength();
+        final int peptideLength = peptide.getLength();
          Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
          if (map != null)
          {
@@ -961,9 +972,9 @@ public class AlignmentUtils
            {
              mapList = mapList.getInverse();
            }
-          int cdsLength = cdsDss.getLength();
-          int mappedFromLength = MappingUtils
-                  .getLength(mapList.getFromRanges());
+          final int cdsLength = cdsDss.getLength();
+          int mappedFromLength = MappingUtils.getLength(mapList
+                  .getFromRanges());
            int mappedToLength = MappingUtils
                    .getLength(mapList.getToRanges());
            boolean addStopCodon = (cdsLength == mappedFromLength
@@ -988,14 +999,15 @@ public class AlignmentUtils
             * walk over the aligned peptide sequence and insert mapped 
             * codons for residues in the aligned cds sequence 
             */
-          char[] alignedPeptide = peptide.getSequence();
-          char[] nucleotides = cdsDss.getSequence();
            int copiedBases = 0;
            int cdsStart = cdsDss.getStart();
            int proteinPos = peptide.getStart() - 1;
            int cdsCol = 0;
-          for (char residue : alignedPeptide)
+
+          for (int col = 0; col < peptideLength; col++)
            {
+            char residue = peptide.getCharAt(col);
+
              if (Comparison.isGap(residue))
              {
                cdsCol += CODON_LENGTH;
@@ -1013,7 +1025,7 @@ public class AlignmentUtils
                {
                  for (int j = codon[0]; j <= codon[1]; j++)
                  {
-                  char mappedBase = nucleotides[j - cdsStart];
+                  char mappedBase = cdsDss.getCharAt(j - cdsStart);
                    alignedCds[cdsCol++] = mappedBase;
                    copiedBases++;
                  }
@@ -1025,7 +1037,7 @@ public class AlignmentUtils
             * append stop codon if not mapped from protein,
             * closing it up to the end of the mapped sequence
             */
-          if (copiedBases == nucleotides.length - CODON_LENGTH)
+          if (copiedBases == cdsLength - CODON_LENGTH)
            {
              for (int i = alignedCds.length - 1; i >= 0; i--)
              {
@@ -1035,10 +1047,9 @@ public class AlignmentUtils
                  break;
                }
              }
-            for (int i = nucleotides.length
-                    - CODON_LENGTH; i < nucleotides.length; i++)
+            for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)
              {
-              alignedCds[cdsCol++] = nucleotides[i];
+              alignedCds[cdsCol++] = cdsDss.getCharAt(i);
              }
            }
            cdsSeq.setSequence(new String(alignedCds));
@@ -1208,21 +1219,26 @@ public class AlignmentUtils
            List<SequenceI> unmappedProtein)
    {
      /*
-     * Prefill aligned sequences with gaps before inserting aligned protein
-     * residues.
+     * prefill peptide sequences with gaps 
       */
      int alignedWidth = alignedCodons.size();
      char[] gaps = new char[alignedWidth];
      Arrays.fill(gaps, protein.getGapCharacter());
-    String allGaps = String.valueOf(gaps);
+    Map<SequenceI, char[]> peptides = new HashMap<>();
      for (SequenceI seq : protein.getSequences())
      {
        if (!unmappedProtein.contains(seq))
        {
-        seq.setSequence(allGaps);
+        peptides.put(seq, Arrays.copyOf(gaps, gaps.length));
        }
      }
  
+    /*
+     * Traverse the codons left to right (as defined by CodonComparator)
+     * and insert peptides in each column where the sequence is mapped.
+     * This gives a peptide 'alignment' where residues are aligned if their
+     * corresponding codons occupy the same columns in the cdna alignment.
+     */
      int column = 0;
      for (AlignedCodon codon : alignedCodons.keySet())
      {
@@ -1230,12 +1246,20 @@ public class AlignmentUtils
                .get(codon);
        for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())
        {
-        // place translated codon at its column position in sequence
-        entry.getKey().getSequence()[column] = entry.getValue().product
-                .charAt(0);
+        char residue = entry.getValue().product.charAt(0);
+        peptides.get(entry.getKey())[column] = residue;
        }
        column++;
      }
+
+    /*
+     * and finally set the constructed sequences
+     */
+    for (Entry<SequenceI, char[]> entry : peptides.entrySet())
+    {
+      entry.getKey().setSequence(new String(entry.getValue()));
+    }
+
      return 0;
    }
  
@@ -1623,8 +1647,8 @@ public class AlignmentUtils
        productSeqs = new HashSet<SequenceI>();
        for (SequenceI seq : products)
        {
-        productSeqs.add(seq.getDatasetSequence() == null ? seq
-                : seq.getDatasetSequence());
+        productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
+                .getDatasetSequence());
        }
      }
  
@@ -1717,9 +1741,8 @@ public class AlignmentUtils
            /*
             * add a mapping from CDS to the (unchanged) mapped to range
             */
-          List<int[]> cdsRange = Collections
-                  .singletonList(new int[]
-                  { 1, cdsSeq.getLength() });
+          List<int[]> cdsRange = Collections.singletonList(new int[] { 1,
+              cdsSeq.getLength() });
            MapList cdsToProteinMap = new MapList(cdsRange,
                    mapList.getToRanges(), mapList.getFromRatio(),
                    mapList.getToRatio());
@@ -1741,7 +1764,7 @@ public class AlignmentUtils
             * add another mapping from original 'from' range to CDS
             */
            AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();
-          MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
+          final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
                    cdsRange, 1, 1);
            dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
                    dnaToCdsMap);
@@ -1751,6 +1774,13 @@ public class AlignmentUtils
            }
  
            /*
+           * transfer dna chromosomal loci (if known) to the CDS
+           * sequence (via the mapping)
+           */
+          final MapList cdsToDnaMap = dnaToCdsMap.getInverse();
+          transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);
+
+          /*
             * add DBRef with mapping from protein to CDS
             * (this enables Get Cross-References from protein alignment)
             * This is tricky because we can't have two DBRefs with the
@@ -1769,26 +1799,30 @@ public class AlignmentUtils
  
            for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
            {
-            // creates a complementary cross-reference to the source sequence's
-            // primary reference.
-
-            DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(),
-                    primRef.getSource() + ":" + primRef.getVersion(),
-                    primRef.getAccessionId());
-            cdsCrossRef
-                    .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap)));
+            /*
+             * create a cross-reference from CDS to the source sequence's
+             * primary reference and vice versa
+             */
+            String source = primRef.getSource();
+            String version = primRef.getVersion();
+            DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"
+                    + version, primRef.getAccessionId());
+            cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
              cdsSeqDss.addDBRef(cdsCrossRef);
  
+            dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
+                    .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
+
              // problem here is that the cross-reference is synthesized -
              // cdsSeq.getName() may be like 'CDS|dnaaccession' or
              // 'CDS|emblcdsacc'
              // assuming cds version same as dna ?!?
  
-            DBRefEntry proteinToCdsRef = new DBRefEntry(primRef.getSource(),
-                    primRef.getVersion(), cdsSeq.getName());
+            DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,
+                    cdsSeq.getName());
              //
-            proteinToCdsRef.setMap(
-                    new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
+            proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+                    .getInverse()));
              proteinProduct.addDBRef(proteinToCdsRef);
            }
  
@@ -1801,14 +1835,46 @@ public class AlignmentUtils
        }
      }
  
-    AlignmentI cds = new Alignment(
-            cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
+    AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
+            .size()]));
      cds.setDataset(dataset);
  
      return cds;
    }
  
    /**
+   * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to
+   * toSeq, mediated by the given mapping between the sequences
+   * 
+   * @param fromSeq
+   * @param targetToFrom
+   *          Map
+   * @param targetSeq
+   */
+  protected static void transferGeneLoci(SequenceI fromSeq,
+          MapList targetToFrom, SequenceI targetSeq)
+  {
+    if (targetSeq.getGeneLoci() != null)
+    {
+      // already have - don't override
+      return;
+    }
+    GeneLociI fromLoci = fromSeq.getGeneLoci();
+    if (fromLoci == null)
+    {
+      return;
+    }
+
+    MapList newMap = targetToFrom.traverse(fromLoci.getMap());
+
+    if (newMap != null)
+    {
+      targetSeq.setGeneLoci(fromLoci.getSpeciesId(),
+              fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);
+    }
+  }
+
+  /**
     * A helper method that finds a CDS sequence in the alignment dataset that is
     * mapped to the given protein sequence, and either is, or has a mapping from,
     * the given dna sequence.
@@ -1976,19 +2042,19 @@ public class AlignmentUtils
    }
  
    /**
-   * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+   * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
     * the given mapping.
     * 
     * @param cdsSeq
     * @param contig
+   * @param proteinProduct
     * @param mapping
-   * @return list of DBRefEntrys added.
+   * @return list of DBRefEntrys added
     */
-  public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+  protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
            SequenceI contig, SequenceI proteinProduct, Mapping mapping)
    {
-
-    // gather direct refs from contig congrent with mapping
+    // gather direct refs from contig congruent with mapping
      List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
      HashSet<String> directSources = new HashSet<String>();
      if (contig.getDBRefs() != null)
@@ -2061,14 +2127,14 @@ public class AlignmentUtils
     * 
     * @param fromSeq
     * @param toSeq
+   * @param mapping
+   *          the mapping from 'fromSeq' to 'toSeq'
     * @param select
     *          if not null, only features of this type are copied (including
     *          subtypes in the Sequence Ontology)
-   * @param mapping
-   *          the mapping from 'fromSeq' to 'toSeq'
     * @param omitting
     */
-  public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+  protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
            MapList mapping, String select, String... omitting)
    {
      SequenceI copyTo = toSeq;
@@ -2077,82 +2143,84 @@ public class AlignmentUtils
        copyTo = copyTo.getDatasetSequence();
      }
  
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+    /*
+     * get features, optionally restricted by an ontology term
+     */
+    List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()
+            .getPositionalFeatures() : fromSeq.getFeatures()
+            .getFeaturesByOntology(select);
+
      int count = 0;
-    SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
-    if (sfs != null)
+    for (SequenceFeature sf : sfs)
      {
-      for (SequenceFeature sf : sfs)
+      String type = sf.getType();
+      boolean omit = false;
+      for (String toOmit : omitting)
        {
-        String type = sf.getType();
-        if (select != null && !so.isA(type, select))
+        if (type.equals(toOmit))
          {
-          continue;
-        }
-        boolean omit = false;
-        for (String toOmit : omitting)
-        {
-          if (type.equals(toOmit))
-          {
-            omit = true;
-          }
-        }
-        if (omit)
-        {
-          continue;
+          omit = true;
          }
+      }
+      if (omit)
+      {
+        continue;
+      }
  
-        /*
-         * locate the mapped range - null if either start or end is
-         * not mapped (no partial overlaps are calculated)
-         */
-        int start = sf.getBegin();
-        int end = sf.getEnd();
-        int[] mappedTo = mapping.locateInTo(start, end);
-        /*
-         * if whole exon range doesn't map, try interpreting it
-         * as 5' or 3' exon overlapping the CDS range
-         */
-        if (mappedTo == null)
-        {
-          mappedTo = mapping.locateInTo(end, end);
-          if (mappedTo != null)
-          {
-            /*
-             * end of exon is in CDS range - 5' overlap
-             * to a range from the start of the peptide
-             */
-            mappedTo[0] = 1;
-          }
-        }
-        if (mappedTo == null)
+      /*
+       * locate the mapped range - null if either start or end is
+       * not mapped (no partial overlaps are calculated)
+       */
+      int start = sf.getBegin();
+      int end = sf.getEnd();
+      int[] mappedTo = mapping.locateInTo(start, end);
+      /*
+       * if whole exon range doesn't map, try interpreting it
+       * as 5' or 3' exon overlapping the CDS range
+       */
+      if (mappedTo == null)
+      {
+        mappedTo = mapping.locateInTo(end, end);
+        if (mappedTo != null)
          {
-          mappedTo = mapping.locateInTo(start, start);
-          if (mappedTo != null)
-          {
-            /*
-             * start of exon is in CDS range - 3' overlap
-             * to a range up to the end of the peptide
-             */
-            mappedTo[1] = toSeq.getLength();
-          }
+          /*
+           * end of exon is in CDS range - 5' overlap
+           * to a range from the start of the peptide
+           */
+          mappedTo[0] = 1;
          }
+      }
+      if (mappedTo == null)
+      {
+        mappedTo = mapping.locateInTo(start, start);
          if (mappedTo != null)
          {
-          SequenceFeature copy = new SequenceFeature(sf);
-          copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
-          copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
-          copyTo.addSequenceFeature(copy);
-          count++;
+          /*
+           * start of exon is in CDS range - 3' overlap
+           * to a range up to the end of the peptide
+           */
+          mappedTo[1] = toSeq.getLength();
          }
        }
+      if (mappedTo != null)
+      {
+        int newBegin = Math.min(mappedTo[0], mappedTo[1]);
+        int newEnd = Math.max(mappedTo[0], mappedTo[1]);
+        SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+                sf.getFeatureGroup(), sf.getScore());
+        copyTo.addSequenceFeature(copy);
+        count++;
+      }
      }
      return count;
    }
  
    /**
     * Returns a mapping from dna to protein by inspecting sequence features of
-   * type "CDS" on the dna.
+   * type "CDS" on the dna. A mapping is constructed if the total CDS feature
+   * length is 3 times the peptide length (optionally after dropping a trailing
+   * stop codon). This method does not check whether the CDS nucleotide sequence
+   * translates to the peptide sequence.
     * 
     * @param dnaSeq
     * @param proteinSeq
@@ -2164,6 +2232,15 @@ public class AlignmentUtils
      List<int[]> ranges = findCdsPositions(dnaSeq);
      int mappedDnaLength = MappingUtils.getLength(ranges);
  
+    /*
+     * if not a whole number of codons, something is wrong,
+     * abort mapping
+     */
+    if (mappedDnaLength % CODON_LENGTH > 0)
+    {
+      return null;
+    }
+
      int proteinLength = proteinSeq.getLength();
      int proteinStart = proteinSeq.getStart();
      int proteinEnd = proteinSeq.getEnd();
@@ -2187,8 +2264,12 @@ public class AlignmentUtils
      if (codesForResidues == (proteinLength + 1))
      {
        // assuming extra codon is for STOP and not in peptide
+      // todo: check trailing codon is indeed a STOP codon
        codesForResidues--;
+      mappedDnaLength -= CODON_LENGTH;
+      MappingUtils.removeEndPositions(CODON_LENGTH, ranges);
      }
+
      if (codesForResidues == proteinLength)
      {
        proteinRange.add(new int[] { proteinStart, proteinEnd });
@@ -2199,7 +2280,7 @@ public class AlignmentUtils
  
    /**
     * Returns a list of CDS ranges found (as sequence positions base 1), i.e. of
-   * start/end positions of sequence features of type "CDS" (or a sub-type of
+   * [start, end] positions of sequence features of type "CDS" (or a sub-type of
     * CDS in the Sequence Ontology). The ranges are sorted into ascending start
     * position order, so this method is only valid for linear CDS in the same
     * sense as the protein product.
@@ -2207,62 +2288,46 @@ public class AlignmentUtils
     * @param dnaSeq
     * @return
     */
-  public static List<int[]> findCdsPositions(SequenceI dnaSeq)
+  protected static List<int[]> findCdsPositions(SequenceI dnaSeq)
    {
      List<int[]> result = new ArrayList<int[]>();
-    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
-    if (sfs == null)
+
+    List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(
+            SequenceOntologyI.CDS);
+    if (sfs.isEmpty())
      {
        return result;
      }
-
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
-    int startPhase = 0;
+    SequenceFeatures.sortFeatures(sfs, true);
  
      for (SequenceFeature sf : sfs)
      {
+      int phase = 0;
+      try
+      {
+        phase = Integer.parseInt(sf.getPhase());
+      } catch (NumberFormatException e)
+      {
+        // ignore
+      }
        /*
-       * process a CDS feature (or a sub-type of CDS)
+       * phase > 0 on first codon means 5' incomplete - skip to the start
+       * of the next codon; example ENST00000496384
         */
-      if (so.isA(sf.getType(), SequenceOntologyI.CDS))
+      int begin = sf.getBegin();
+      int end = sf.getEnd();
+      if (result.isEmpty() && phase > 0)
        {
-        int phase = 0;
-        try
-        {
-          phase = Integer.parseInt(sf.getPhase());
-        } catch (NumberFormatException e)
-        {
-          // ignore
-        }
-        /*
-         * phase > 0 on first codon means 5' incomplete - skip to the start
-         * of the next codon; example ENST00000496384
-         */
-        int begin = sf.getBegin();
-        int end = sf.getEnd();
-        if (result.isEmpty())
+        begin += phase;
+        if (begin > end)
          {
-          begin += phase;
-          if (begin > end)
-          {
-            // shouldn't happen!
-            System.err.println(
-                    "Error: start phase extends beyond start CDS in "
-                            + dnaSeq.getName());
-          }
+          // shouldn't happen!
+          System.err
+                  .println("Error: start phase extends beyond start CDS in "
+                          + dnaSeq.getName());
          }
-        result.add(new int[] { begin, end });
        }
-    }
-
-    /*
-     * remove 'startPhase' positions (usually 0) from the first range 
-     * so we begin at the start of a complete codon
-     */
-    if (!result.isEmpty())
-    {
-      // TODO JAL-2022 correctly model start phase > 0
-      result.get(0)[0] += startPhase;
+      result.add(new int[] { begin, end });
      }
  
      /*
@@ -2272,7 +2337,7 @@ public class AlignmentUtils
       * ranges are assembled in order. Other cases should not use this method,
       * but instead construct an explicit mapping for CDS (e.g. EMBL parsing).
       */
-    Collections.sort(result, new RangeComparator(true));
+    Collections.sort(result, IntRangeComparator.ASCENDING);
      return result;
    }
  
@@ -2325,24 +2390,6 @@ public class AlignmentUtils
        count += computePeptideVariants(peptide, peptidePos, codonVariants);
      }
  
-    /*
-     * sort to get sequence features in start position order
-     * - would be better to store in Sequence as a TreeSet or NCList?
-     */
-    if (peptide.getSequenceFeatures() != null)
-    {
-      Arrays.sort(peptide.getSequenceFeatures(),
-              new Comparator<SequenceFeature>()
-              {
-                @Override
-                public int compare(SequenceFeature o1, SequenceFeature o2)
-                {
-                  int c = Integer.compare(o1.getBegin(), o2.getBegin());
-                  return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
-                          : c;
-                }
-              });
-    }
      return count;
    }
  
@@ -2376,7 +2423,7 @@ public class AlignmentUtils
      {
        if (var.variant != null)
        {
-        String alleles = (String) var.variant.getValue("alleles");
+        String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
          if (alleles != null)
          {
            for (String base : alleles.split(","))
@@ -2398,7 +2445,7 @@ public class AlignmentUtils
      {
        if (var.variant != null)
        {
-        String alleles = (String) var.variant.getValue("alleles");
+        String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
          if (alleles != null)
          {
            for (String base : alleles.split(","))
@@ -2420,7 +2467,7 @@ public class AlignmentUtils
      {
        if (var.variant != null)
        {
-        String alleles = (String) var.variant.getValue("alleles");
+        String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
          if (alleles != null)
          {
            for (String base : alleles.split(","))
@@ -2470,10 +2517,9 @@ public class AlignmentUtils
        String trans3Char = StringUtils
                .toSentenceCase(ResidueProperties.aa2Triplet.get(trans));
        String desc = "p." + residue3Char + peptidePos + trans3Char;
-      // set score to 0f so 'graduated colour' option is offered! JAL-2060
        SequenceFeature sf = new SequenceFeature(
                SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
-              peptidePos, 0f, var.getSource());
+              peptidePos, var.getSource());
        StringBuilder attributes = new StringBuilder(32);
        String id = (String) var.variant.getValue(ID);
        if (id != null)
@@ -2516,7 +2562,10 @@ public class AlignmentUtils
  
    /**
     * Builds a map whose key is position in the protein sequence, and value is a
-   * list of the base and all variants for each corresponding codon position
+   * list of the base and all variants for each corresponding codon position.
+   * <p>
+   * This depends on dna variants being held as a comma-separated list as
+   * property "alleles" on variant features.
     * 
     * @param dnaSeq
     * @param dnaToProtein
@@ -2531,10 +2580,10 @@ public class AlignmentUtils
       * LinkedHashMap ensures we keep the peptide features in sequence order
       */
      LinkedHashMap<Integer, List<DnaVariant>[]> variants = new LinkedHashMap<Integer, List<DnaVariant>[]>();
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
  
-    SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
-    if (dnaFeatures == null)
+    List<SequenceFeature> dnaFeatures = dnaSeq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT);
+    if (dnaFeatures.isEmpty())
      {
        return variants;
      }
@@ -2554,84 +2603,89 @@ public class AlignmentUtils
          // not handling multi-locus variant features
          continue;
        }
-      if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
+
+      /*
+       * ignore variant if not a SNP
+       */
+      String alls = (String) sf.getValue(Gff3Helper.ALLELES);
+      if (alls == null)
        {
-        int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
-        if (mapsTo == null)
-        {
-          // feature doesn't lie within coding region
-          continue;
-        }
-        int peptidePosition = mapsTo[0];
-        List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
-        if (codonVariants == null)
-        {
-          codonVariants = new ArrayList[CODON_LENGTH];
-          codonVariants[0] = new ArrayList<DnaVariant>();
-          codonVariants[1] = new ArrayList<DnaVariant>();
-          codonVariants[2] = new ArrayList<DnaVariant>();
-          variants.put(peptidePosition, codonVariants);
-        }
+        continue; // non-SNP VCF variant perhaps - can't process this
+      }
  
-        /*
-         * extract dna variants to a string array
-         */
-        String alls = (String) sf.getValue("alleles");
-        if (alls == null)
-        {
-          continue;
-        }
-        String[] alleles = alls.toUpperCase().split(",");
-        int i = 0;
-        for (String allele : alleles)
+      String[] alleles = alls.toUpperCase().split(",");
+      boolean isSnp = true;
+      for (String allele : alleles)
+      {
+        if (allele.trim().length() > 1)
          {
-          alleles[i++] = allele.trim(); // lose any space characters "A, G"
+          isSnp = false;
          }
+      }
+      if (!isSnp)
+      {
+        continue;
+      }
  
-        /*
-         * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
-         */
-        int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
-                : MappingUtils.flattenRanges(dnaToProtein
-                        .locateInFrom(peptidePosition, peptidePosition));
-        lastPeptidePostion = peptidePosition;
-        lastCodon = codon;
+      int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
+      if (mapsTo == null)
+      {
+        // feature doesn't lie within coding region
+        continue;
+      }
+      int peptidePosition = mapsTo[0];
+      List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
+      if (codonVariants == null)
+      {
+        codonVariants = new ArrayList[CODON_LENGTH];
+        codonVariants[0] = new ArrayList<DnaVariant>();
+        codonVariants[1] = new ArrayList<DnaVariant>();
+        codonVariants[2] = new ArrayList<DnaVariant>();
+        variants.put(peptidePosition, codonVariants);
+      }
  
-        /*
-         * save nucleotide (and any variant) for each codon position
-         */
-        for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
+      /*
+       * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
+       */
+      int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
+              : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
+                      peptidePosition, peptidePosition));
+      lastPeptidePostion = peptidePosition;
+      lastCodon = codon;
+
+      /*
+       * save nucleotide (and any variant) for each codon position
+       */
+      for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
+      {
+        String nucleotide = String.valueOf(
+                dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase();
+        List<DnaVariant> codonVariant = codonVariants[codonPos];
+        if (codon[codonPos] == dnaCol)
          {
-          String nucleotide = String
-                  .valueOf(dnaSeq.getCharAt(codon[codonPos] - dnaStart))
-                  .toUpperCase();
-          List<DnaVariant> codonVariant = codonVariants[codonPos];
-          if (codon[codonPos] == dnaCol)
+          if (!codonVariant.isEmpty()
+                  && codonVariant.get(0).variant == null)
            {
-            if (!codonVariant.isEmpty()
-                    && codonVariant.get(0).variant == null)
-            {
-              /*
-               * already recorded base value, add this variant
-               */
-              codonVariant.get(0).variant = sf;
-            }
-            else
-            {
-              /*
-               * add variant with base value
-               */
-              codonVariant.add(new DnaVariant(nucleotide, sf));
-            }
+            /*
+             * already recorded base value, add this variant
+             */
+            codonVariant.get(0).variant = sf;
            }
-          else if (codonVariant.isEmpty())
+          else
            {
              /*
-             * record (possibly non-varying) base value
+             * add variant with base value
               */
-            codonVariant.add(new DnaVariant(nucleotide));
+            codonVariant.add(new DnaVariant(nucleotide, sf));
            }
          }
+        else if (codonVariant.isEmpty())
+        {
+          /*
+           * record (possibly non-varying) base value
+           */
+          codonVariant.add(new DnaVariant(nucleotide));
+        }
        }
      }
      return variants;
@@ -2910,9 +2964,7 @@ public class AlignmentUtils
                seqMap.getMap().getInverse());
      }
  
-    char[] fromChars = fromSeq.getSequence();
      int toStart = seq.getStart();
-    char[] toChars = seq.getSequence();
  
      /*
       * traverse [start, end, start, end...] ranges in fromSeq
@@ -2943,10 +2995,10 @@ public class AlignmentUtils
           * of the next character of the mapped-to sequence; stop when all
           * the characters of the range have been counted
           */
-        while (mappedCharPos <= range[1] && fromCol <= fromChars.length
+        while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()
                  && fromCol >= 0)
          {
-          if (!Comparison.isGap(fromChars[fromCol - 1]))
+          if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))
            {
              /*
               * mapped from sequence has a character in this column
@@ -2958,7 +3010,7 @@ public class AlignmentUtils
                seqsMap = new HashMap<SequenceI, Character>();
                map.put(fromCol, seqsMap);
              }
-            seqsMap.put(seq, toChars[mappedCharPos - toStart]);
+            seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));
              mappedCharPos++;
            }
            fromCol += (forward ? 1 : -1);