Merge branch 'feature/JAL-3187linkedFeatures' into
[jalview.git] / src / jalview / datamodel / MappedFeatures.java
diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java
new file mode 100644 (file)
index 0000000..07d3857
--- /dev/null
@@ -0,0 +1,154 @@
+package jalview.datamodel;
+
+import jalview.io.gff.Gff3Helper;
+import jalview.schemes.ResidueProperties;
+import jalview.util.MappingUtils;
+import jalview.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A data bean to hold a list of mapped sequence features (e.g. CDS features
+ * mapped from protein), and the mapping between the sequences
+ * 
+ * @author gmcarstairs
+ */
+public class MappedFeatures
+{
+  /*
+   * the mapping from CDS to peptide
+   */
+  public final Mapping mapping;
+
+  /**
+   * the CDS sequence mapped to
+   */
+  public final SequenceI fromSeq;
+
+  /*
+   * the residue position in the peptide sequence
+   */
+  public final int fromPosition;
+
+  /*
+   * the peptide residue at the position 
+   */
+  public final char fromResidue;
+
+  /*
+   * features on CDS that overlap the codon positions
+   */
+  public final List<SequenceFeature> features;
+
+  /**
+   * Constructor
+   * 
+   * @param theMapping
+   * @param pos
+   * @param res
+   * @param theFeatures
+   */
+  public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
+          char res,
+          List<SequenceFeature> theFeatures)
+  {
+    mapping = theMapping;
+    fromSeq = from;
+    fromPosition = pos;
+    fromResidue = res;
+    features = theFeatures;
+  }
+
+  /**
+   * Computes and returns a (possibly empty) list of HGVS notation peptide
+   * variants derived from codon allele variants
+   * 
+   * @return
+   */
+  public List<String> findProteinVariants()
+  {
+    List<String> vars = new ArrayList<>();
+
+    /*
+     * determine canonical codon
+     */
+    int[] codonPos = MappingUtils.flattenRanges(
+            mapping.getMap().locateInFrom(fromPosition, fromPosition));
+    if (codonPos.length != 3)
+    {
+      // error
+      return vars;
+    }
+    final char[] baseCodon = new char[3];
+    int cdsStart = fromSeq.getStart();
+    baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
+    baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
+    baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+
+    // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
+
+    for (SequenceFeature sf : features)
+    {
+      int cdsPos = sf.getBegin();
+      if (cdsPos != sf.getEnd())
+      {
+        // not handling multi-locus variant features
+        continue;
+      }
+      if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
+              && cdsPos != codonPos[2])
+      {
+        // e.g. feature on intron within spliced codon!
+        continue;
+      }
+
+      String alls = (String) sf.getValue(Gff3Helper.ALLELES);
+      if (alls == null)
+      {
+        continue;
+      }
+      String from3 = StringUtils.toSentenceCase(
+              ResidueProperties.aa2Triplet
+                      .get(String.valueOf(fromResidue)));
+
+      /*
+       * make a peptide variant for each SNP allele 
+       * e.g. C,G,T gives variants G and T for base C
+       */
+      String[] alleles = alls.toUpperCase().split(",");
+      for (String allele : alleles)
+      {
+        allele = allele.trim().toUpperCase();
+        if (allele.length() > 1)
+        {
+          continue; // multi-locus variant
+        }
+        char[] variantCodon = new char[3];
+        variantCodon[0] = baseCodon[0];
+        variantCodon[1] = baseCodon[1];
+        variantCodon[2] = baseCodon[2];
+
+        /*
+         * poke variant base into canonical codon
+         */
+        int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
+        variantCodon[i] = allele.toUpperCase().charAt(0);
+        String codon = new String(variantCodon);
+        String peptide = ResidueProperties.codonTranslate(codon);
+        if (fromResidue != peptide.charAt(0))
+        {
+          String to3 = StringUtils.toSentenceCase(
+                  ResidueProperties.aa2Triplet.get(peptide));
+          String var = "p." + from3 + fromPosition + to3;
+          if (!vars.contains(var))
+          {
+            vars.add(var);
+          }
+        }
+      }
+    }
+
+    return vars;
+  }
+}