JAL-1705 refine feature copying to including 5' and 3' overlaps
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index fe95dca..41538eb 100644 (file)
@@ -34,6 +34,7 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntology;
 import jalview.schemes.ResidueProperties;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
@@ -1406,23 +1407,27 @@ public class AlignmentUtils
       /*
        * transfer any features on dna that overlap the CDS
        */
-      transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */);
+      transferFeatures(dnaSeq, cds, dnaToCds, null, "CDS" /* SequenceOntology.CDS */);
     }
     return cdsSequences;
   }
 
   /**
-   * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the
+   * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
    * feature start/end ranges, optionally omitting specified feature types.
+   * Returns the number of features copied.
    * 
    * @param fromSeq
    * @param toSeq
+   * @param select
+   *          if not null, only features of this type are copied (including
+   *          subtypes in the Sequence Ontology)
    * @param mapping
    *          the mapping from 'fromSeq' to 'toSeq'
    * @param omitting
    */
-  protected static void transferFeatures(SequenceI fromSeq,
-          SequenceI toSeq, MapList mapping, String... omitting)
+  public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+          MapList mapping, String select, String... omitting)
   {
     SequenceI copyTo = toSeq;
     while (copyTo.getDatasetSequence() != null)
@@ -1430,12 +1435,18 @@ public class AlignmentUtils
       copyTo = copyTo.getDatasetSequence();
     }
 
+    SequenceOntology so = SequenceOntology.getInstance();
+    int count = 0;
     SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
     if (sfs != null)
     {
       for (SequenceFeature sf : sfs)
       {
         String type = sf.getType();
+        if (select != null && !so.isA(type, select))
+        {
+          continue;
+        }
         boolean omit = false;
         for (String toOmit : omitting)
         {
@@ -1453,16 +1464,48 @@ public class AlignmentUtils
          * locate the mapped range - null if either start or end is
          * not mapped (no partial overlaps are calculated)
          */
-        int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+        int start = sf.getBegin();
+        int end = sf.getEnd();
+        int[] mappedTo = mapping.locateInTo(start, end);
+        /*
+         * if whole exon range doesn't map, try interpreting it
+         * as 5' or 3' exon overlapping the CDS range
+         */
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(end, end);
+          if (mappedTo != null)
+          {
+            /*
+             * end of exon is in CDS range - 5' overlap
+             * to a range from the start of the peptide
+             */
+            mappedTo[0] = 1;
+          }
+        }
+        if (mappedTo == null)
+        {
+          mappedTo = mapping.locateInTo(start, start);
+          if (mappedTo != null)
+          {
+            /*
+             * start of exon is in CDS range - 3' overlap
+             * to a range up to the end of the peptide
+             */
+            mappedTo[1] = toSeq.getLength();
+          }
+        }
         if (mappedTo != null)
         {
           SequenceFeature copy = new SequenceFeature(sf);
           copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
           copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
           copyTo.addSequenceFeature(copy);
+          count++;
         }
       }
     }
+    return count;
   }
 
   /**