Merge branch 'develop' into feature/r2_11_2/JAL-3808_gff2_exonerate

[jalview.git] / src / jalview / io / gff / ExonerateHelper.java
diff --git a/src/jalview/io/gff/ExonerateHelper.java b/src/jalview/io/gff/ExonerateHelper.java

index 1ee99c0..4973010 100644 (file)
--- a/src/jalview/io/gff/ExonerateHelper.java
+++ b/src/jalview/io/gff/ExonerateHelper.java
@@ -1,5 +1,27 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
  package jalview.io.gff;
  
+import java.util.Locale;
+
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.MappingType;
@@ -8,6 +30,7 @@ import jalview.datamodel.SequenceI;
  import jalview.util.MapList;
  
  import java.io.IOException;
+import java.util.ArrayList;
  import java.util.List;
  import java.util.Map;
  
@@ -134,21 +157,15 @@ public class ExonerateHelper extends Gff2Helper
      }
  
      /*
-     * locate the mapped sequence in the alignment or 'new' (GFF file) sequences; 
-     */
-    SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
-            relaxedIdMatching);
-
-    /*
-     * If mapping is from protein to dna, we store it as dna to protein instead
+     * similarity start and end can tell us 
+     * which part of the alignment refers to which sequence
       */
-    SequenceI mapFromSequence = seq;
-    SequenceI mapToSequence = mappedSequence;
-    if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
-            || (type == MappingType.PeptideToNucleotide && !featureIsOnTarget))
-    {
-      mapFromSequence = mappedSequence;
-      mapToSequence = seq;
+    int similarityFrom,similarityTo;
+    try {
+      similarityFrom = Integer.parseInt(gff[START_COL]);
+      similarityTo = Integer.parseInt(gff[END_COL]);
+    } catch (Exception x) {
+      throw new IOException("Couldn't parse start/end of the similarity feature",x); 
      }
  
      /*
@@ -159,13 +176,6 @@ public class ExonerateHelper extends Gff2Helper
       */
  
      /*
-     * get any existing mapping for these sequences (or start one),
-     * and add this mapped range
-     */
-    AlignedCodonFrame acf = getMapping(align, mapFromSequence,
-            mapToSequence);
-
-    /*
       * exonerate GFF has the strand of the target in column 7
       * (differs from GFF3 which has it in the Target descriptor)
       */
@@ -182,6 +192,8 @@ public class ExonerateHelper extends Gff2Helper
      }
  
      List<String> alignedRegions = set.get(ALIGN);
+    List<MapList> mappings = new ArrayList<MapList>();
+    int fromLowest=0, fromHighest=0, toLowest=0, toHighest=0;
      for (String region : alignedRegions)
      {
        MapList mapping = buildMapping(region, type, forwardStrand,
@@ -192,6 +204,133 @@ public class ExonerateHelper extends Gff2Helper
          continue;
        }
  
+      /*
+       * record total extent of aligned region(s) for later
+       */
+      if (mappings.size() == 0)
+      {
+        if (mapping.getFromLowest() < mapping.getFromHighest())
+        {
+          fromLowest = mapping.getFromLowest();
+          fromHighest = mapping.getFromHighest();
+        }
+        else
+        {
+          fromLowest = mapping.getFromHighest();
+          fromHighest = mapping.getFromLowest();
+        }
+        if (mapping.getToLowest() < mapping.getToHighest())
+        {
+          toLowest = mapping.getToLowest();
+          toHighest = mapping.getToHighest();
+        }
+        else
+        {
+          toLowest = mapping.getToHighest();
+          toHighest = mapping.getToLowest();
+        }
+      }
+      else
+      {
+        int fl = mapping.getFromLowest(), fh = mapping.getFromHighest(),
+                tl = mapping.getToLowest(), th = mapping.getToHighest();
+        if (fl > fh)
+        {
+          fl = fh;
+          fh = mapping.getFromLowest();
+        }
+        if (tl > th)
+        {
+          tl = th;
+          th = mapping.getToLowest();
+        }
+        if (fromLowest > fl)
+
+        {
+          fromLowest = fl;
+        }
+        if (fromHighest < fh)
+        {
+          fromHighest = fh;
+        }
+        if (toLowest > tl)
+        {
+          toLowest = tl;
+        }
+        if (toHighest < th)
+        {
+          toHighest = th;
+        }
+      }
+      mappings.add(mapping);
+    }
+    
+    /*
+     * locate the mapped sequence in the alignment or 'new' (GFF file) sequences; 
+     */
+    SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
+            relaxedIdMatching);
+    
+    /*
+     * finally, resolve the sense of the mapping 
+     */
+    SequenceI mapFromSequence = seq;
+    SequenceI mapToSequence = mappedSequence;
+
+    /*
+     * If mapping is from protein to dna, we store it as dna to protein instead
+     */
+    if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
+            || (type == MappingType.PeptideToNucleotide
+                    && !featureIsOnTarget))
+    {
+      mapFromSequence = mappedSequence;
+      mapToSequence = seq;
+    }
+    /*
+     * the sense of 'align' mappings for nucleotide alignments
+     * from exonerate seem to be ambiguous, so we need to do a bit more work
+     */
+    if (type == MappingType.NucleotideToNucleotide || type == MappingType.PeptideToPeptide)
+    {
+      /*
+       *  then check whether the aligned region is contained 
+       *  by the feature to determine sense of mapping
+       */
+      if (fromHighest==toHighest && fromLowest==toLowest)
+      {
+        // ambiguous case - for simple alignments this doesn't matter, but important for rearrangements or inversions
+        if (featureIsOnTarget)
+        {
+          // TODO: raise a warning since we don't have test coverage for this case
+          mapFromSequence=mappedSequence; // Target sequence 
+          mapToSequence=seq; // annotated sequence
+        }
+      } else if (similarityFrom == fromLowest && similarityTo == fromHighest)
+      {
+        mapFromSequence = seq;
+        mapToSequence = mappedSequence;
+      }
+      else if (similarityFrom == toLowest && similarityTo == toHighest)
+      {
+        mapFromSequence = mappedSequence;
+        mapToSequence = seq;
+      }
+      else
+      {
+        throw new IOException(
+                "Couldn't determine sense for similarity feature");
+      }
+    }
+    
+    /*
+     * get any existing mapping for these sequences (or start one),
+     * and add this mapped range
+     */
+    AlignedCodonFrame acf = getMapping(align, mapFromSequence,
+            mapToSequence);
+    for (MapList mapping : mappings)
+    {
        acf.addMap(mapFromSequence, mapToSequence, mapping);
      }
      align.addCodonFrame(acf);
@@ -247,8 +386,8 @@ public class ExonerateHelper extends Gff2Helper
      {
        fromStart = alignToStart;
        toStart = alignFromStart;
-      toEnd = forwardStrand ? toStart + alignCount - 1 : toStart
-              - (alignCount - 1);
+      toEnd = forwardStrand ? toStart + alignCount - 1
+              : toStart - (alignCount - 1);
        int toLength = Math.abs(toEnd - toStart) + 1;
        int fromLength = toLength * type.getFromRatio() / type.getToRatio();
        fromEnd = fromStart + fromLength - 1;
@@ -320,7 +459,7 @@ public class ExonerateHelper extends Gff2Helper
      // e.g. exonerate:protein2genome:local
      if (model != null)
      {
-      String mdl = model.toLowerCase();
+      String mdl = model.toLowerCase(Locale.ROOT);
        if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)
                || mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME)
                || mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME))
@@ -332,12 +471,16 @@ public class ExonerateHelper extends Gff2Helper
      return false;
    }
  
+  /**
+   * An override to set feature group to "exonerate" instead of the default GFF
+   * source value (column 2)
+   */
    @Override
    protected SequenceFeature buildSequenceFeature(String[] gff,
            Map<String, List<String>> set)
    {
-    SequenceFeature sf = super.buildSequenceFeature(gff, set);
-    sf.setFeatureGroup("exonerate");
+    SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL,
+            "exonerate", set);
  
      return sf;
    }