Merge branch 'develop' into alpha/JAL-3362_Jalview_212_alpha
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index c7ed332..c25e56b 100644 (file)
@@ -20,8 +20,6 @@
  */
 package jalview.analysis;
 
-import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-
 import jalview.commands.RemoveGapColCommand;
 import jalview.datamodel.AlignedCodon;
 import jalview.datamodel.AlignedCodonFrame;
@@ -38,7 +36,6 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 import jalview.datamodel.features.SequenceFeatures;
-import jalview.io.gff.Gff3Helper;
 import jalview.io.gff.SequenceOntologyI;
 import jalview.schemes.ResidueProperties;
 import jalview.util.Comparison;
@@ -46,10 +43,7 @@ import jalview.util.DBRefUtils;
 import jalview.util.IntRangeComparator;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
-import jalview.util.StringUtils;
 
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -1740,15 +1734,8 @@ public class AlignmentUtils
 
           cdsSeqs.add(cdsSeq);
 
-          if (!dataset.getSequences().contains(cdsSeqDss))
-          {
-            // check if this sequence is a newly created one
-            // so needs adding to the dataset
-            dataset.addSequence(cdsSeqDss);
-          }
-
           /*
-           * add a mapping from CDS to the (unchanged) mapped to range
+           * build the mapping from CDS to protein
            */
           List<int[]> cdsRange = Collections
                   .singletonList(new int[]
@@ -1757,16 +1744,26 @@ public class AlignmentUtils
           MapList cdsToProteinMap = new MapList(cdsRange,
                   mapList.getToRanges(), mapList.getFromRatio(),
                   mapList.getToRatio());
-          AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
-          cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
-                  cdsToProteinMap);
 
-          /*
-           * guard against duplicating the mapping if repeating this action
-           */
-          if (!mappings.contains(cdsToProteinMapping))
+          if (!dataset.getSequences().contains(cdsSeqDss))
           {
-            mappings.add(cdsToProteinMapping);
+            /*
+             * if this sequence is a newly created one, add it to the dataset
+             * and made a CDS to protein mapping (if sequence already exists,
+             * CDS-to-protein mapping _is_ the transcript-to-protein mapping)
+             */
+            dataset.addSequence(cdsSeqDss);
+            AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+            cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+                  cdsToProteinMap);
+
+            /*
+             * guard against duplicating the mapping if repeating this action
+             */
+            if (!mappings.contains(cdsToProteinMapping))
+            {
+              mappings.add(cdsToProteinMapping);
+            }
           }
 
           propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
@@ -2392,97 +2389,6 @@ public class AlignmentUtils
   }
 
   /**
-   * Helper method that adds a peptide variant feature. ID and
-   * clinical_significance attributes of the dna variant (if present) are copied
-   * to the new feature.
-   * 
-   * @param peptide
-   * @param peptidePos
-   * @param residue
-   * @param var
-   * @param codon
-   *          the variant codon e.g. aCg
-   * @param canonical
-   *          the 'normal' codon e.g. aTg
-   * @return true if a feature was added, else false
-   */
-  static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
-          String residue, DnaVariant var, String codon, String canonical)
-  {
-    /*
-     * get peptide translation of codon e.g. GAT -> D
-     * note that variants which are not single alleles,
-     * e.g. multibase variants or HGMD_MUTATION etc
-     * are currently ignored here
-     */
-    String trans = codon.contains("-") ? null
-            : (codon.length() > CODON_LENGTH ? null
-                    : ResidueProperties.codonTranslate(codon));
-    if (trans == null)
-    {
-      return false;
-    }
-    String desc = canonical + "/" + codon;
-    String featureType = "";
-    if (trans.equals(residue))
-    {
-      featureType = SequenceOntologyI.SYNONYMOUS_VARIANT;
-    }
-    else if (ResidueProperties.STOP.equals(trans))
-    {
-      featureType = SequenceOntologyI.STOP_GAINED;
-    }
-    else
-    {
-      String residue3Char = StringUtils
-              .toSentenceCase(ResidueProperties.aa2Triplet.get(residue));
-      String trans3Char = StringUtils
-              .toSentenceCase(ResidueProperties.aa2Triplet.get(trans));
-      desc = "p." + residue3Char + peptidePos + trans3Char;
-      featureType = SequenceOntologyI.NONSYNONYMOUS_VARIANT;
-    }
-    SequenceFeature sf = new SequenceFeature(featureType, desc, peptidePos,
-            peptidePos, var.getSource());
-
-    StringBuilder attributes = new StringBuilder(32);
-    String id = (String) var.variant.getValue(VARIANT_ID);
-    if (id != null)
-    {
-      if (id.startsWith(SEQUENCE_VARIANT))
-      {
-        id = id.substring(SEQUENCE_VARIANT.length());
-      }
-      sf.setValue(VARIANT_ID, id);
-      attributes.append(VARIANT_ID).append("=").append(id);
-      // TODO handle other species variants JAL-2064
-      StringBuilder link = new StringBuilder(32);
-      try
-      {
-        link.append(desc).append(" ").append(id).append(
-                "|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
-                .append(URLEncoder.encode(id, "UTF-8"));
-        sf.addLink(link.toString());
-      } catch (UnsupportedEncodingException e)
-      {
-        // as if
-      }
-    }
-    String clinSig = (String) var.variant.getValue(CLINICAL_SIGNIFICANCE);
-    if (clinSig != null)
-    {
-      sf.setValue(CLINICAL_SIGNIFICANCE, clinSig);
-      attributes.append(";").append(CLINICAL_SIGNIFICANCE).append("=")
-              .append(clinSig);
-    }
-    peptide.addSequenceFeature(sf);
-    if (attributes.length() > 0)
-    {
-      sf.setAttributes(attributes.toString());
-    }
-    return true;
-  }
-
-  /**
    * Makes an alignment with a copy of the given sequences, adding in any
    * non-redundant sequences which are mapped to by the cross-referenced
    * sequences.
@@ -2670,6 +2576,13 @@ public class AlignmentUtils
     {
       List<SequenceI> alignedSequences = alignedDatasets
               .get(seq.getDatasetSequence());
+      if (alignedSequences.isEmpty())
+      {
+        /*
+         * defensive check - shouldn't happen! (JAL-3536)
+         */
+        continue;
+      }
       SequenceI alignedSeq = alignedSequences.get(0);
 
       /*