JAL-2049 set Ensembl variant source to ENSEMBL if '.' (not provided) and
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 26 Aug 2016 11:09:27 +0000 (12:09 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 26 Aug 2016 11:09:27 +0000 (12:09 +0100)
in computed peptide variants

src/jalview/analysis/AlignmentUtils.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
test/jalview/analysis/AlignmentUtilsTests.java

index d93f42f..d1cd5a3 100644 (file)
@@ -73,6 +73,7 @@ import java.util.TreeMap;
 public class AlignmentUtils
 {
 
+  private static final int CODON_LENGTH = 3;
   private static final String SEQUENCE_VARIANT = "sequence_variant:";
   private static final String ID = "ID";
 
@@ -80,15 +81,16 @@ public class AlignmentUtils
    * A data model to hold the 'normal' base value at a position, and an optional
    * sequence variant feature
    */
-  static class DnaVariant
+  static final class DnaVariant
   {
-    String base;
+    final String base;
 
     SequenceFeature variant;
 
     DnaVariant(String nuc)
     {
       base = nuc;
+      variant = null;
     }
 
     DnaVariant(String nuc, SequenceFeature var)
@@ -96,6 +98,11 @@ public class AlignmentUtils
       base = nuc;
       variant = var;
     }
+
+    public String getSource()
+    {
+      return variant == null ? null : variant.getFeatureGroup();
+    }
   }
 
   /**
@@ -428,7 +435,7 @@ public class AlignmentUtils
     /*
      * cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)
      */
-    final int mappedLength = 3 * aaSeqChars.length;
+    final int mappedLength = CODON_LENGTH * aaSeqChars.length;
     int cdnaLength = cdnaSeqChars.length;
     int cdnaStart = cdnaSeq.getStart();
     int cdnaEnd = cdnaSeq.getEnd();
@@ -440,14 +447,14 @@ public class AlignmentUtils
      */
     if (cdnaLength != mappedLength && cdnaLength > 2)
     {
-      String lastCodon = String.valueOf(cdnaSeqChars, cdnaLength - 3, 3)
+      String lastCodon = String.valueOf(cdnaSeqChars, cdnaLength - CODON_LENGTH, CODON_LENGTH)
               .toUpperCase();
       for (String stop : ResidueProperties.STOP)
       {
         if (lastCodon.equals(stop))
         {
-          cdnaEnd -= 3;
-          cdnaLength -= 3;
+          cdnaEnd -= CODON_LENGTH;
+          cdnaLength -= CODON_LENGTH;
           break;
         }
       }
@@ -459,12 +466,12 @@ public class AlignmentUtils
     int startOffset = 0;
     if (cdnaLength != mappedLength
             && cdnaLength > 2
-            && String.valueOf(cdnaSeqChars, 0, 3).toUpperCase()
+            && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()
                     .equals(ResidueProperties.START))
     {
-      startOffset += 3;
-      cdnaStart += 3;
-      cdnaLength -= 3;
+      startOffset += CODON_LENGTH;
+      cdnaStart += CODON_LENGTH;
+      cdnaLength -= CODON_LENGTH;
     }
 
     if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))
@@ -473,7 +480,7 @@ public class AlignmentUtils
        * protein is translation of dna (+/- start/stop codons)
        */
       MapList map = new MapList(new int[] { cdnaStart, cdnaEnd }, new int[]
-      { proteinStart, proteinEnd }, 3, 1);
+      { proteinStart, proteinEnd }, CODON_LENGTH, 1);
       return map;
     }
 
@@ -504,9 +511,9 @@ public class AlignmentUtils
     int aaPos = 0;
     int dnaPos = cdnaStart;
     for (; dnaPos < cdnaSeqChars.length - 2
-            && aaPos < aaSeqChars.length; dnaPos += 3, aaPos++)
+            && aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)
     {
-      String codon = String.valueOf(cdnaSeqChars, dnaPos, 3);
+      String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
       final String translated = ResidueProperties.codonTranslate(codon);
 
       /*
@@ -542,9 +549,9 @@ public class AlignmentUtils
     {
       return true;
     }
-    if (dnaPos == cdnaSeqChars.length - 3)
+    if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)
     {
-      String codon = String.valueOf(cdnaSeqChars, dnaPos, 3);
+      String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
       if ("STOP".equals(ResidueProperties.codonTranslate(codon)))
       {
         return true;
@@ -895,7 +902,8 @@ public class AlignmentUtils
       }
       width = Math.max(dnaSeq.getLength(), width);
     }
-    int oldwidth, diff;
+    int oldwidth;
+    int diff;
     for (SequenceI dnaSeq : dna.getSequences())
     {
       oldwidth = dnaSeq.getLength();
@@ -935,9 +943,9 @@ public class AlignmentUtils
     for (AlignedCodonFrame mapping : dnaMappings)
     {
       SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
-      int peptideLength = peptide.getLength();
       if (peptide != null)
       {
+        int peptideLength = peptide.getLength();
         Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
         if (map != null)
         {
@@ -951,7 +959,7 @@ public class AlignmentUtils
                   .getFromRanges());
           int mappedToLength = MappingUtils
                   .getLength(mapList.getToRanges());
-          boolean addStopCodon = (cdsLength == mappedFromLength * 3 + 3)
+          boolean addStopCodon = (cdsLength == mappedFromLength * CODON_LENGTH + CODON_LENGTH)
                   || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
           if (cdsLength != mappedToLength && !addStopCodon)
           {
@@ -965,8 +973,8 @@ public class AlignmentUtils
           /*
            * pre-fill the aligned cds sequence with gaps
            */
-          char[] alignedCds = new char[peptideLength * 3
-                  + (addStopCodon ? 3 : 0)];
+          char[] alignedCds = new char[peptideLength * CODON_LENGTH
+                  + (addStopCodon ? CODON_LENGTH : 0)];
           Arrays.fill(alignedCds, gapChar);
 
           /*
@@ -983,7 +991,7 @@ public class AlignmentUtils
           {
             if (Comparison.isGap(residue))
             {
-              cdsCol += 3;
+              cdsCol += CODON_LENGTH;
             }
             else
             {
@@ -992,7 +1000,7 @@ public class AlignmentUtils
               if (codon == null)
               {
                 // e.g. incomplete start codon, X in peptide
-                cdsCol += 3;
+                cdsCol += CODON_LENGTH;
               }
               else
               {
@@ -1010,7 +1018,7 @@ public class AlignmentUtils
            * append stop codon if not mapped from protein,
            * closing it up to the end of the mapped sequence
            */
-          if (copiedBases == nucleotides.length - 3)
+          if (copiedBases == nucleotides.length - CODON_LENGTH)
           {
             for (int i = alignedCds.length - 1; i >= 0; i--)
             {
@@ -1020,7 +1028,7 @@ public class AlignmentUtils
                 break;
               }
             }
-            for (int i = nucleotides.length - 3; i < nucleotides.length; i++)
+            for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++)
             {
               alignedCds[cdsCol++] = nucleotides[i];
             }
@@ -1806,7 +1814,7 @@ public class AlignmentUtils
     int mappedFromLength = MappingUtils.getLength(aMapping.getMap()
             .getFromRanges());
     int dnaLength = seqDss.getLength();
-    if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - 3)
+    if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - CODON_LENGTH)
     {
       return seqDss;
     }
@@ -1822,7 +1830,7 @@ public class AlignmentUtils
       for (SequenceToSequenceMapping map : acf.getMappings())
       {
         Mapping mapping = map.getMapping();
-        if (mapping != aMapping && mapping.getMap().getFromRatio() == 3
+        if (mapping != aMapping && mapping.getMap().getFromRatio() == CODON_LENGTH
                 && proteinProduct == mapping.getTo()
                 && seqDss != map.getFromSeq())
         {
@@ -2027,7 +2035,7 @@ public class AlignmentUtils
     /*
      * dna length should map to protein (or protein plus stop codon)
      */
-    int codesForResidues = mappedDnaLength / 3;
+    int codesForResidues = mappedDnaLength / CODON_LENGTH;
     if (codesForResidues == (proteinLength + 1))
     {
       // assuming extra codon is for STOP and not in peptide
@@ -2036,7 +2044,7 @@ public class AlignmentUtils
     if (codesForResidues == proteinLength)
     {
       proteinRange.add(new int[] { proteinStart, proteinEnd });
-      return new MapList(ranges, proteinRange, 3, 1);
+      return new MapList(ranges, proteinRange, CODON_LENGTH, 1);
     }
     return null;
   }
@@ -2312,7 +2320,7 @@ public class AlignmentUtils
      * are currently ignored here
      */
     String trans = codon.contains("-") ? "-"
-            : (codon.length() > 3 ? null : ResidueProperties
+            : (codon.length() > CODON_LENGTH ? null : ResidueProperties
                     .codonTranslate(codon));
     if (trans != null && !trans.equals(residue))
     {
@@ -2324,7 +2332,7 @@ public class AlignmentUtils
       // set score to 0f so 'graduated colour' option is offered! JAL-2060
       SequenceFeature sf = new SequenceFeature(
               SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
-              peptidePos, 0f, "Jalview");
+              peptidePos, 0f, var.getSource());
       StringBuilder attributes = new StringBuilder(32);
       String id = (String) var.variant.getValue(ID);
       if (id != null)
@@ -2335,7 +2343,7 @@ public class AlignmentUtils
         }
         sf.setValue(ID, id);
         attributes.append(ID).append("=").append(id);
-        // TODO handle other species variants
+        // TODO handle other species variants JAL-2064
         StringBuilder link = new StringBuilder(32);
         try
         {
@@ -2374,6 +2382,7 @@ public class AlignmentUtils
    * @param dnaToProtein
    * @return
    */
+  @SuppressWarnings("unchecked")
   static LinkedHashMap<Integer, List<DnaVariant>[]> buildDnaVariantsMap(
           SequenceI dnaSeq, MapList dnaToProtein)
   {
@@ -2417,7 +2426,7 @@ public class AlignmentUtils
         List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
         if (codonVariants == null)
         {
-          codonVariants = new ArrayList[3];
+          codonVariants = new ArrayList[CODON_LENGTH];
           codonVariants[0] = new ArrayList<DnaVariant>();
           codonVariants[1] = new ArrayList<DnaVariant>();
           codonVariants[2] = new ArrayList<DnaVariant>();
@@ -2451,7 +2460,7 @@ public class AlignmentUtils
         /*
          * save nucleotide (and any variant) for each codon position
          */
-        for (int codonPos = 0; codonPos < 3; codonPos++)
+        for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
         {
           String nucleotide = String.valueOf(
                   dnaSeq.getCharAt(codon[codonPos] - dnaStart))
index 31552af..cc002e1 100644 (file)
@@ -613,6 +613,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       SequenceFeature copy = new SequenceFeature(sf);
       copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
       copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
+      if (".".equals(copy.getFeatureGroup()))
+      {
+        copy.setFeatureGroup(getDbSource());
+      }
       targetSequence.addSequenceFeature(copy);
 
       /*
index 22bb680..a856231 100644 (file)
@@ -1937,13 +1937,15 @@ public class AlignmentUtilsTests
   public void testComputePeptideVariants()
   {
     /*
-     * scenario: AAATTTCCC codes for KFP, with variants
-     *           GAA -> E
-     *           CAA -> Q
-     *           AAG synonymous
-     *           AAT -> N
-     *              TTC synonymous
-     *                 CAC,CGC -> H,R (as one variant)
+     * scenario: AAATTTCCC codes for KFP
+     * variants:
+     *           GAA -> E             source: Ensembl
+     *           CAA -> Q             source: dbSNP
+     *           AAG synonymous       source: COSMIC
+     *           AAT -> N             source: Ensembl
+     *           ...TTC synonymous    source: dbSNP
+     *           ......CAC,CGC -> H,R source: COSMIC
+     *                 (one variant with two alleles)
      */
     SequenceI peptide = new Sequence("pep/10-12", "KFP");
 
@@ -1951,32 +1953,35 @@ public class AlignmentUtilsTests
      * two distinct variants for codon 1 position 1
      * second one has clinical significance
      */
+    String ensembl = "Ensembl";
+    String dbSnp = "dbSNP";
+    String cosmic = "COSMIC";
     SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
-            0f, null);
+            0f, ensembl);
     sf1.setValue("alleles", "A,G"); // GAA -> E
     sf1.setValue("ID", "var1.125A>G");
     SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
-            0f, null);
+            0f, dbSnp);
     sf2.setValue("alleles", "A,C"); // CAA -> Q
     sf2.setValue("ID", "var2");
     sf2.setValue("clinical_significance", "Dodgy");
     SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3,
-            0f, null);
+            0f, cosmic);
     sf3.setValue("alleles", "A,G"); // synonymous
     sf3.setValue("ID", "var3");
     sf3.setValue("clinical_significance", "None");
     SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
-            0f, null);
+            0f, ensembl);
     sf4.setValue("alleles", "A,T"); // AAT -> N
     sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off
     sf4.setValue("clinical_significance", "Benign");
     SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6,
-            0f, null);
+            0f, dbSnp);
     sf5.setValue("alleles", "T,C"); // synonymous
     sf5.setValue("ID", "var5");
     sf5.setValue("clinical_significance", "Bad");
     SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8,
-            0f, null);
+            0f, cosmic);
     sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R
     sf6.setValue("ID", "var6");
     sf6.setValue("clinical_significance", "Good");
@@ -2024,14 +2029,15 @@ public class AlignmentUtilsTests
 
     /*
      * verify added sequence features for
-     * var1 K -> E
-     * var2 K -> Q
-     * var4 K -> N
-     * var6 P -> H
-     * var6 P -> R
+     * var1 K -> E Ensembl
+     * var2 K -> Q dbSNP
+     * var4 K -> N Ensembl
+     * var6 P -> H COSMIC
+     * var6 P -> R COSMIC
      */
     SequenceFeature[] sfs = peptide.getSequenceFeatures();
     assertEquals(5, sfs.length);
+
     SequenceFeature sf = sfs[0];
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
@@ -2044,7 +2050,8 @@ public class AlignmentUtilsTests
     assertEquals(
             "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
             sf.links.get(0));
-    assertEquals("Jalview", sf.getFeatureGroup());
+    assertEquals(ensembl, sf.getFeatureGroup());
+
     sf = sfs[1];
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
@@ -2056,7 +2063,8 @@ public class AlignmentUtilsTests
     assertEquals(
             "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
             sf.links.get(0));
-    assertEquals("Jalview", sf.getFeatureGroup());
+    assertEquals(dbSnp, sf.getFeatureGroup());
+
     sf = sfs[2];
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
@@ -2068,7 +2076,9 @@ public class AlignmentUtilsTests
     assertEquals(
             "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
             sf.links.get(0));
-    assertEquals("Jalview", sf.getFeatureGroup());
+    assertEquals(ensembl, sf.getFeatureGroup());
+
+    // var5 generates two distinct protein variant features
     sf = sfs[3];
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
@@ -2080,8 +2090,8 @@ public class AlignmentUtilsTests
     assertEquals(
             "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
-    // var5 generates two distinct protein variant features
-    assertEquals("Jalview", sf.getFeatureGroup());
+    assertEquals(cosmic, sf.getFeatureGroup());
+
     sf = sfs[4];
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
@@ -2093,7 +2103,7 @@ public class AlignmentUtilsTests
     assertEquals(
             "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
-    assertEquals("Jalview", sf.getFeatureGroup());
+    assertEquals(cosmic, sf.getFeatureGroup());
   }
 
   /**