Merge branch 'develop' into alpha/JAL-3362_Jalview_212_alpha

[jalview.git] / src / jalview / analysis / AlignmentUtils.java
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 55efaa5..c25e56b 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -20,8 +20,6 @@
   */
  package jalview.analysis;
  
-import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-
  import jalview.commands.RemoveGapColCommand;
  import jalview.datamodel.AlignedCodon;
  import jalview.datamodel.AlignedCodonFrame;
@@ -38,7 +36,6 @@ import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
  import jalview.datamodel.features.SequenceFeatures;
-import jalview.io.gff.Gff3Helper;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.Comparison;
@@ -46,10 +43,7 @@ import jalview.util.DBRefUtils;
  import jalview.util.IntRangeComparator;
  import jalview.util.MapList;
  import jalview.util.MappingUtils;
-import jalview.util.StringUtils;
  
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Collection;
@@ -1465,28 +1459,31 @@ public class AlignmentUtils
        final List<AlignmentAnnotation> result = new ArrayList<>();
        for (AlignmentAnnotation dsann : datasetAnnotations)
        {
-        /*
-         * Find matching annotations on the alignment. If none is found, then
-         * add this annotation to the list of 'addable' annotations for this
-         * sequence.
-         */
-        final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
-                .findAnnotations(seq, dsann.getCalcId(), dsann.label);
-        if (!matchedAlignmentAnnotations.iterator().hasNext())
+        if (dsann.annotations != null) // ignore non-positional annotation
          {
-          result.add(dsann);
-          if (labelForCalcId != null)
+          /*
+           * Find matching annotations on the alignment. If none is found, then
+           * add this annotation to the list of 'addable' annotations for this
+           * sequence.
+           */
+          final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
+                  .findAnnotations(seq, dsann.getCalcId(), dsann.label);
+          if (!matchedAlignmentAnnotations.iterator().hasNext())
            {
-            labelForCalcId.put(dsann.getCalcId(), dsann.label);
+            result.add(dsann);
+            if (labelForCalcId != null)
+            {
+              labelForCalcId.put(dsann.getCalcId(), dsann.label);
+            }
            }
          }
-      }
-      /*
-       * Save any addable annotations for this sequence
-       */
-      if (!result.isEmpty())
-      {
-        candidates.put(seq, result);
+        /*
+         * Save any addable annotations for this sequence
+         */
+        if (!result.isEmpty())
+        {
+          candidates.put(seq, result);
+        }
        }
      }
    }
@@ -1737,15 +1734,8 @@ public class AlignmentUtils
  
            cdsSeqs.add(cdsSeq);
  
-          if (!dataset.getSequences().contains(cdsSeqDss))
-          {
-            // check if this sequence is a newly created one
-            // so needs adding to the dataset
-            dataset.addSequence(cdsSeqDss);
-          }
-
            /*
-           * add a mapping from CDS to the (unchanged) mapped to range
+           * build the mapping from CDS to protein
             */
            List<int[]> cdsRange = Collections
                    .singletonList(new int[]
@@ -1754,16 +1744,26 @@ public class AlignmentUtils
            MapList cdsToProteinMap = new MapList(cdsRange,
                    mapList.getToRanges(), mapList.getFromRatio(),
                    mapList.getToRatio());
-          AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
-          cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
-                  cdsToProteinMap);
  
-          /*
-           * guard against duplicating the mapping if repeating this action
-           */
-          if (!mappings.contains(cdsToProteinMapping))
+          if (!dataset.getSequences().contains(cdsSeqDss))
            {
-            mappings.add(cdsToProteinMapping);
+            /*
+             * if this sequence is a newly created one, add it to the dataset
+             * and made a CDS to protein mapping (if sequence already exists,
+             * CDS-to-protein mapping _is_ the transcript-to-protein mapping)
+             */
+            dataset.addSequence(cdsSeqDss);
+            AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+            cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+                  cdsToProteinMap);
+
+            /*
+             * guard against duplicating the mapping if repeating this action
+             */
+            if (!mappings.contains(cdsToProteinMapping))
+            {
+              mappings.add(cdsToProteinMapping);
+            }
            }
  
            propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
@@ -2389,97 +2389,6 @@ public class AlignmentUtils
    }
  
    /**
-   * Helper method that adds a peptide variant feature. ID and
-   * clinical_significance attributes of the dna variant (if present) are copied
-   * to the new feature.
-   * 
-   * @param peptide
-   * @param peptidePos
-   * @param residue
-   * @param var
-   * @param codon
-   *          the variant codon e.g. aCg
-   * @param canonical
-   *          the 'normal' codon e.g. aTg
-   * @return true if a feature was added, else false
-   */
-  static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
-          String residue, DnaVariant var, String codon, String canonical)
-  {
-    /*
-     * get peptide translation of codon e.g. GAT -> D
-     * note that variants which are not single alleles,
-     * e.g. multibase variants or HGMD_MUTATION etc
-     * are currently ignored here
-     */
-    String trans = codon.contains("-") ? null
-            : (codon.length() > CODON_LENGTH ? null
-                    : ResidueProperties.codonTranslate(codon));
-    if (trans == null)
-    {
-      return false;
-    }
-    String desc = canonical + "/" + codon;
-    String featureType = "";
-    if (trans.equals(residue))
-    {
-      featureType = SequenceOntologyI.SYNONYMOUS_VARIANT;
-    }
-    else if (ResidueProperties.STOP.equals(trans))
-    {
-      featureType = SequenceOntologyI.STOP_GAINED;
-    }
-    else
-    {
-      String residue3Char = StringUtils
-              .toSentenceCase(ResidueProperties.aa2Triplet.get(residue));
-      String trans3Char = StringUtils
-              .toSentenceCase(ResidueProperties.aa2Triplet.get(trans));
-      desc = "p." + residue3Char + peptidePos + trans3Char;
-      featureType = SequenceOntologyI.NONSYNONYMOUS_VARIANT;
-    }
-    SequenceFeature sf = new SequenceFeature(featureType, desc, peptidePos,
-            peptidePos, var.getSource());
-
-    StringBuilder attributes = new StringBuilder(32);
-    String id = (String) var.variant.getValue(VARIANT_ID);
-    if (id != null)
-    {
-      if (id.startsWith(SEQUENCE_VARIANT))
-      {
-        id = id.substring(SEQUENCE_VARIANT.length());
-      }
-      sf.setValue(VARIANT_ID, id);
-      attributes.append(VARIANT_ID).append("=").append(id);
-      // TODO handle other species variants JAL-2064
-      StringBuilder link = new StringBuilder(32);
-      try
-      {
-        link.append(desc).append(" ").append(id).append(
-                "|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
-                .append(URLEncoder.encode(id, "UTF-8"));
-        sf.addLink(link.toString());
-      } catch (UnsupportedEncodingException e)
-      {
-        // as if
-      }
-    }
-    String clinSig = (String) var.variant.getValue(CLINICAL_SIGNIFICANCE);
-    if (clinSig != null)
-    {
-      sf.setValue(CLINICAL_SIGNIFICANCE, clinSig);
-      attributes.append(";").append(CLINICAL_SIGNIFICANCE).append("=")
-              .append(clinSig);
-    }
-    peptide.addSequenceFeature(sf);
-    if (attributes.length() > 0)
-    {
-      sf.setAttributes(attributes.toString());
-    }
-    return true;
-  }
-
-  /**
     * Makes an alignment with a copy of the given sequences, adding in any
     * non-redundant sequences which are mapped to by the cross-referenced
     * sequences.
@@ -2667,6 +2576,13 @@ public class AlignmentUtils
      {
        List<SequenceI> alignedSequences = alignedDatasets
                .get(seq.getDatasetSequence());
+      if (alignedSequences.isEmpty())
+      {
+        /*
+         * defensive check - shouldn't happen! (JAL-3536)
+         */
+        continue;
+      }
        SequenceI alignedSeq = alignedSequences.get(0);
  
        /*