JAL-1705 refine feature copying to including 5' and 3' overlaps
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index d8cb9a2..41538eb 100644 (file)
@@ -34,6 +34,7 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntology;
 import jalview.schemes.ResidueProperties;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
@@ -1301,7 +1302,7 @@ public class AlignmentUtils
       return false;
     }
     String name = seq2.getName();
-    final DBRefEntry[] xrefs = seq1.getDBRef();
+    final DBRefEntry[] xrefs = seq1.getDBRefs();
     if (xrefs != null)
     {
       for (DBRefEntry xref : xrefs)
@@ -1394,7 +1395,7 @@ public class AlignmentUtils
 
     for (Mapping seqMapping : seqMappings)
     {
-      SequenceI cds = makeCdsSequence(dnaSeq, seqMapping, newMappings);
+      SequenceI cds = makeCdsSequence(dnaSeq, seqMapping);
       cdsSequences.add(cds);
 
       /*
@@ -1406,30 +1407,46 @@ public class AlignmentUtils
       /*
        * transfer any features on dna that overlap the CDS
        */
-      transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */);
+      transferFeatures(dnaSeq, cds, dnaToCds, null, "CDS" /* SequenceOntology.CDS */);
     }
     return cdsSequences;
   }
 
   /**
-   * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the
+   * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
    * feature start/end ranges, optionally omitting specified feature types.
+   * Returns the number of features copied.
    * 
    * @param fromSeq
    * @param toSeq
+   * @param select
+   *          if not null, only features of this type are copied (including
+   *          subtypes in the Sequence Ontology)
    * @param mapping
    *          the mapping from 'fromSeq' to 'toSeq'
    * @param omitting
    */
-  protected static void transferFeatures(SequenceI fromSeq,
-          SequenceI toSeq, MapList mapping, String... omitting)
+  public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+          MapList mapping, String select, String... omitting)
   {
+    SequenceI copyTo = toSeq;
+    while (copyTo.getDatasetSequence() != null)
+    {
+      copyTo = copyTo.getDatasetSequence();
+    }
+
+    SequenceOntology so = SequenceOntology.getInstance();
+    int count = 0;
     SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
     if (sfs != null)
     {
       for (SequenceFeature sf : sfs)
       {
         String type = sf.getType();
+        if (select != null && !so.isA(type, select))
+        {
+          continue;
+        }
         boolean omit = false;
         for (String toOmit : omitting)
         {
@@ -1447,16 +1464,48 @@ public class AlignmentUtils
          * locate the mapped range - null if either start or end is
          * not mapped (no partial overlaps are calculated)
          */
-        int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+        int start = sf.getBegin();
+        int end = sf.getEnd();
+        int[] mappedTo = mapping.locateInTo(start, end);
+        /*
+         * if whole exon range doesn't map, try interpreting it
+         * as 5' or 3' exon overlapping the CDS range
+         */
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(end, end);
+          if (mappedTo != null)
+          {
+            /*
+             * end of exon is in CDS range - 5' overlap
+             * to a range from the start of the peptide
+             */
+            mappedTo[0] = 1;
+          }
+        }
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(start, start);
+          if (mappedTo != null)
+          {
+            /*
+             * start of exon is in CDS range - 3' overlap
+             * to a range up to the end of the peptide
+             */
+            mappedTo[1] = toSeq.getLength();
+          }
+        }
         if (mappedTo != null)
         {
           SequenceFeature copy = new SequenceFeature(sf);
           copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
           copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
-          toSeq.addSequenceFeature(copy);
+          copyTo.addSequenceFeature(copy);
+          count++;
         }
       }
     }
+    return count;
   }
 
   /**
@@ -1501,19 +1550,16 @@ public class AlignmentUtils
 
   /**
    * Makes and returns a CDS-only sequence, where the CDS regions are identified
-   * as the 'from' ranges of the mapping on the dna. Any sequence features on
-   * the dna which overlap the CDS regions are copied to the new sequence.
+   * as the 'from' ranges of the mapping on the dna.
    * 
    * @param dnaSeq
    *          nucleotide sequence
    * @param seqMapping
    *          mappings from CDS regions of nucleotide
-   * @param exonMappings
-   *          CDS-to-peptide mapping (to add to)
    * @return
    */
   protected static SequenceI makeCdsSequence(SequenceI dnaSeq,
-          Mapping seqMapping, AlignedCodonFrame exonMappings)
+          Mapping seqMapping)
   {
     StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
     final char[] dna = dnaSeq.getSequence();
@@ -1536,6 +1582,7 @@ public class AlignmentUtils
             newSequence.toString());
 
     transferDbRefs(seqMapping.getTo(), cds);
+
     return cds;
   }
 
@@ -1549,7 +1596,7 @@ public class AlignmentUtils
   protected static void transferDbRefs(SequenceI from, SequenceI to)
   {
     String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL);
-    DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRef(),
+    DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(),
             DBRefSource.CODINGDBS);
     if (cdsRefs != null)
     {