JAL-2738 transfer VCF variant features to CDS sequences
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 26 Sep 2017 12:15:30 +0000 (13:15 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 26 Sep 2017 12:15:30 +0000 (13:15 +0100)
src/jalview/io/vcf/VCFLoader.java

index 6b1eb0d..6c4526f 100644 (file)
@@ -36,6 +36,8 @@ import java.util.Map.Entry;
  */
 public class VCFLoader
 {
+  private static final String FEATURE_GROUP_VCF = "VCF";
+
   private static final String EXCL = "!";
 
   /*
@@ -103,7 +105,7 @@ public class VCFLoader
         {
           seqCount++;
           varCount += added;
-          computePeptideVariants(seq);
+          transferAddedFeatures(seq);
         }
       }
       // long elapsed = System.currentTimeMillis() - start;
@@ -133,16 +135,14 @@ public class VCFLoader
   }
 
   /**
-   * (Re-)computes peptide variants from dna variants, for any protein sequence
-   * to which the dna sequence has a mapping. Note that although duplicate
-   * features may get computed, they will not be added, since duplicate sequence
-   * features are ignored in Sequence.addSequenceFeature.
+   * Transfers VCF features to sequences to which this sequence has a mapping.
+   * If the mapping is 1:3, computes peptide variants from nucleotide variants.
    * 
-   * @param dnaSeq
+   * @param seq
    */
-  protected void computePeptideVariants(SequenceI dnaSeq)
+  protected void transferAddedFeatures(SequenceI seq)
   {
-    DBRefEntry[] dbrefs = dnaSeq.getDBRefs();
+    DBRefEntry[] dbrefs = seq.getDBRefs();
     if (dbrefs == null)
     {
       return;
@@ -150,13 +150,36 @@ public class VCFLoader
     for (DBRefEntry dbref : dbrefs)
     {
       Mapping mapping = dbref.getMap();
-      if (mapping == null || mapping.getTo() == null
-              || mapping.getMap().getFromRatio() != 3)
+      if (mapping == null || mapping.getTo() == null)
       {
         continue;
       }
-      AlignmentUtils.computeProteinFeatures(dnaSeq, mapping.getTo(),
-              mapping.getMap());
+
+      SequenceI mapTo = mapping.getTo();
+      MapList map = mapping.getMap();
+      if (map.getFromRatio() == 3)
+      {
+        /*
+         * dna-to-peptide product mapping
+         */
+        AlignmentUtils.computeProteinFeatures(seq, mapTo, map);
+      }
+      else
+      {
+        /*
+         * nucleotide-to-nucleotide mapping e.g. transcript to CDS
+         */
+        // TODO no DBRef to CDS is added to transcripts
+        List<SequenceFeature> features = seq.getFeatures()
+                .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT);
+        for (SequenceFeature sf : features)
+        {
+          if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup()))
+          {
+            transferFeature(sf, mapTo, map);
+          }
+        }
+      }
     }
   }
 
@@ -363,7 +386,7 @@ public class VCFLoader
     String type = SequenceOntologyI.SEQUENCE_VARIANT;
 
     SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
-            featureEnd, score, "VCF");
+            featureEnd, score, FEATURE_GROUP_VCF);
 
     sf.setValue(Gff3Helper.ALLELES, alleles);
 
@@ -503,6 +526,32 @@ public class VCFLoader
   }
 
   /**
+   * Transfers the sequence feature to the target sequence, locating its start
+   * and end range based on the mapping. Features which do not overlap the
+   * target sequence are ignored.
+   * 
+   * @param sf
+   * @param targetSequence
+   * @param mapping
+   *          mapping from the feature's coordinates to the target sequence
+   */
+  protected void transferFeature(SequenceFeature sf,
+          SequenceI targetSequence, MapList mapping)
+  {
+    int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+  
+    if (mappedRange != null)
+    {
+      String group = sf.getFeatureGroup();
+      int newBegin = Math.min(mappedRange[0], mappedRange[1]);
+      int newEnd = Math.max(mappedRange[0], mappedRange[1]);
+      SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+              group, sf.getScore());
+      targetSequence.addSequenceFeature(copy);
+    }
+  }
+
+  /**
    * Formats a ranges map lookup key
    * 
    * @param chromosome