X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=aacd92dfb50de2224b4d01ee2d94ceb1bc3d1bbb;hb=b5667f39acdf309cd92881b73edfda591e0acaf4;hp=bdc5fc16373ad8465ac7d3611ad0d1b04844bdd3;hpb=6a99631de459c9ba52894900d08be0a2d6500488;p=jalview.git

diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java
index bdc5fc1..aacd92d 100644
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -74,12 +74,15 @@ import java.util.TreeMap;
  */
 public class AlignmentUtils
 {
-
   private static final int CODON_LENGTH = 3;
 
   private static final String SEQUENCE_VARIANT = "sequence_variant:";
 
-  private static final String ID = "ID";
+  /*
+   * the 'id' attribute is provided for variant features fetched from
+   * Ensembl using its REST service with JSON format 
+   */
+  public static final String VARIANT_ID = "id";
 
   /**
    * A data model to hold the 'normal' base value at a position, and an optional
@@ -1461,28 +1464,31 @@ public class AlignmentUtils
       final List<AlignmentAnnotation> result = new ArrayList<>();
       for (AlignmentAnnotation dsann : datasetAnnotations)
       {
-        /*
-         * Find matching annotations on the alignment. If none is found, then
-         * add this annotation to the list of 'addable' annotations for this
-         * sequence.
-         */
-        final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
-                .findAnnotations(seq, dsann.getCalcId(), dsann.label);
-        if (!matchedAlignmentAnnotations.iterator().hasNext())
+        if (dsann.annotations != null) // ignore non-positional annotation
         {
-          result.add(dsann);
-          if (labelForCalcId != null)
+          /*
+           * Find matching annotations on the alignment. If none is found, then
+           * add this annotation to the list of 'addable' annotations for this
+           * sequence.
+           */
+          final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
+                  .findAnnotations(seq, dsann.getCalcId(), dsann.label);
+          if (!matchedAlignmentAnnotations.iterator().hasNext())
           {
-            labelForCalcId.put(dsann.getCalcId(), dsann.label);
+            result.add(dsann);
+            if (labelForCalcId != null)
+            {
+              labelForCalcId.put(dsann.getCalcId(), dsann.label);
+            }
           }
         }
-      }
-      /*
-       * Save any addable annotations for this sequence
-       */
-      if (!result.isEmpty())
-      {
-        candidates.put(seq, result);
+        /*
+         * Save any addable annotations for this sequence
+         */
+        if (!result.isEmpty())
+        {
+          candidates.put(seq, result);
+        }
       }
     }
   }
@@ -1888,7 +1894,7 @@ public class AlignmentUtils
    * @param seqMappings
    *          the set of mappings involving dnaSeq
    * @param aMapping
-   *          an initial candidate from seqMappings
+   *          a transcript-to-peptide mapping
    * @return
    */
   static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,
@@ -1913,7 +1919,15 @@ public class AlignmentUtils
     if (mappedFromLength == dnaLength
             || mappedFromLength == dnaLength - CODON_LENGTH)
     {
-      return seqDss;
+      /*
+       * if sequence has CDS features, this is a transcript with no UTR
+       * - do not take this as the CDS sequence! (JAL-2789)
+       */
+      if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)
+              .isEmpty())
+      {
+        return seqDss;
+      }
     }
 
     /*
@@ -1938,10 +1952,12 @@ public class AlignmentUtils
           {
             /*
             * found a 3:1 mapping to the protein product which covers
-            * the whole dna sequence i.e. is from CDS; finally check it
-            * is from the dna start sequence
+            * the whole dna sequence i.e. is from CDS; finally check the CDS
+            * is mapped from the given dna start sequence
             */
             SequenceI cdsSeq = map.getFromSeq();
+            // todo this test is weak if seqMappings contains multiple mappings;
+            // we get away with it if transcript:cds relationship is 1:1
             List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils
                     .findMappingsForSequence(cdsSeq, seqMappings);
             if (!dnaToCdsMaps.isEmpty())
@@ -2056,9 +2072,11 @@ public class AlignmentUtils
   protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
           SequenceI contig, SequenceI proteinProduct, Mapping mapping)
   {
+
     // gather direct refs from contig congruent with mapping
     List<DBRefEntry> direct = new ArrayList<>();
     HashSet<String> directSources = new HashSet<>();
+
     if (contig.getDBRefs() != null)
     {
       for (DBRefEntry dbr : contig.getDBRefs())
@@ -2235,12 +2253,13 @@ public class AlignmentUtils
     int mappedDnaLength = MappingUtils.getLength(ranges);
 
     /*
-     * if not a whole number of codons, something is wrong,
-     * abort mapping
+     * if not a whole number of codons, truncate mapping
      */
-    if (mappedDnaLength % CODON_LENGTH > 0)
+    int codonRemainder = mappedDnaLength % CODON_LENGTH;
+    if (codonRemainder > 0)
     {
-      return null;
+      mappedDnaLength -= codonRemainder;
+      MappingUtils.removeEndPositions(codonRemainder, ranges);
     }
 
     int proteinLength = proteinSeq.getLength();
@@ -2430,11 +2449,14 @@ public class AlignmentUtils
         {
           for (String base : alleles.split(","))
           {
-            if (!base1.equals(base))
+            if (!base1.equalsIgnoreCase(base))
             {
-              String codon = base + base2 + base3;
+              String codon = base.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
               if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
               {
                 count++;
               }
@@ -2456,11 +2478,14 @@ public class AlignmentUtils
         {
           for (String base : alleles.split(","))
           {
-            if (!base2.equals(base))
+            if (!base2.equalsIgnoreCase(base))
             {
-              String codon = base1 + base + base3;
+              String codon = base1.toLowerCase() + base.toUpperCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toLowerCase() + base2.toUpperCase()
+                      + base3.toLowerCase();
               if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
               {
                 count++;
               }
@@ -2482,11 +2507,14 @@ public class AlignmentUtils
         {
           for (String base : alleles.split(","))
           {
-            if (!base3.equals(base))
+            if (!base3.equalsIgnoreCase(base))
             {
-              String codon = base1 + base2 + base;
+              String codon = base1.toLowerCase() + base2.toLowerCase()
+                      + base.toUpperCase();
+              String canonical = base1.toLowerCase() + base2.toLowerCase()
+                      + base3.toUpperCase();
               if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
               {
                 count++;
               }
@@ -2500,20 +2528,22 @@ public class AlignmentUtils
   }
 
   /**
-   * Helper method that adds a peptide variant feature, provided the given codon
-   * translates to a value different to the current residue (is a non-synonymous
-   * variant). ID and clinical_significance attributes of the dna variant (if
-   * present) are copied to the new feature.
+   * Helper method that adds a peptide variant feature. ID and
+   * clinical_significance attributes of the dna variant (if present) are copied
+   * to the new feature.
    * 
    * @param peptide
    * @param peptidePos
    * @param residue
    * @param var
    * @param codon
+   *          the variant codon e.g. aCg
+   * @param canonical
+   *          the 'normal' codon e.g. aTg
    * @return true if a feature was added, else false
    */
   static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
-          String residue, DnaVariant var, String codon)
+          String residue, DnaVariant var, String codon, String canonical)
   {
     /*
      * get peptide translation of codon e.g. GAT -> D
@@ -2528,7 +2558,7 @@ public class AlignmentUtils
     {
       return false;
     }
-    String desc = codon;
+    String desc = canonical + "/" + codon;
     String featureType = "";
     if (trans.equals(residue))
     {
@@ -2551,15 +2581,15 @@ public class AlignmentUtils
             peptidePos, var.getSource());
 
     StringBuilder attributes = new StringBuilder(32);
-    String id = (String) var.variant.getValue(ID);
+    String id = (String) var.variant.getValue(VARIANT_ID);
     if (id != null)
     {
       if (id.startsWith(SEQUENCE_VARIANT))
       {
         id = id.substring(SEQUENCE_VARIANT.length());
       }
-      sf.setValue(ID, id);
-      attributes.append(ID).append("=").append(id);
+      sf.setValue(VARIANT_ID, id);
+      attributes.append(VARIANT_ID).append("=").append(id);
       // TODO handle other species variants JAL-2064
       StringBuilder link = new StringBuilder(32);
       try