JAL-845 fixed start/end range of a mapped SequenceGroup

[jalview.git] / src / jalview / util / MappingUtils.java
diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java

index 4cfb49e..ece1bac 100644 (file)
--- a/src/jalview/util/MappingUtils.java
+++ b/src/jalview/util/MappingUtils.java
@@ -1,5 +1,13 @@
  package jalview.util;
  
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
  import jalview.analysis.AlignmentSorter;
  import jalview.api.AlignViewportI;
  import jalview.commands.CommandI;
@@ -17,13 +25,6 @@ import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
  
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
  /**
   * Helper methods for manipulations involving sequence mappings.
   * 
@@ -270,8 +271,8 @@ public final class MappingUtils
     * @param mapTo
     * @return
     */
-  public static SequenceGroup mapSequenceGroup(SequenceGroup sg,
-          AlignViewportI mapFrom, AlignViewportI mapTo)
+  public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
+          final AlignViewportI mapFrom, final AlignViewportI mapTo)
    {
      /*
       * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
@@ -281,18 +282,50 @@ public final class MappingUtils
      AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
      Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
              .getCodonFrames();
-
      /*
-     * Copy group name, name colours, but not sequences or sequence colour
-     * scheme
+     * Copy group name, colours etc, but not sequences or sequence colour scheme
       */
      SequenceGroup mappedGroup = new SequenceGroup(sg);
      mappedGroup.cs = mapTo.getGlobalColourScheme();
      mappedGroup.clear();
-    // TODO set width of mapped group
  
+    int minStartCol = -1;
+    int maxEndCol = -1;
+    final int selectionStartRes = sg.getStartRes();
+    final int selectionEndRes = sg.getEndRes();
      for (SequenceI selected : sg.getSequences())
      {
+      /*
+       * Find the widest range of non-gapped positions in the selection range
+       */
+      int firstUngappedPos = selectionStartRes;
+      while (firstUngappedPos <= selectionEndRes
+              && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
+      {
+        firstUngappedPos++;
+      }
+
+      /*
+       * If this sequence is only gaps in the selected range, skip it
+       */
+      if (firstUngappedPos > selectionEndRes)
+      {
+        continue;
+      }
+
+      int lastUngappedPos = selectionEndRes;
+      while (lastUngappedPos >= selectionStartRes
+              && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
+      {
+        lastUngappedPos--;
+      }
+
+      /*
+       * Find the selected start/end residue positions in sequence
+       */
+      int startResiduePos = selected.findPosition(firstUngappedPos);
+      int endResiduePos = selected.findPosition(lastUngappedPos);
+      
        for (AlignedCodonFrame acf : codonFrames)
        {
          SequenceI mappedSequence = targetIsNucleotide ? acf
@@ -301,8 +334,39 @@ public final class MappingUtils
          {
            for (SequenceI seq : mapTo.getAlignment().getSequences())
            {
+            int mappedStartResidue = 0;
+            int mappedEndResidue = 0;
              if (seq.getDatasetSequence() == mappedSequence)
              {
+              /*
+               * Found a sequence mapping. Locate the start/end mapped residues.
+               */
+              SearchResults sr = buildSearchResults(selected,
+                      startResiduePos, Collections.singleton(acf));
+              for (Match m : sr.getResults())
+              {
+                mappedStartResidue = m.getStart();
+                mappedEndResidue = m.getEnd();
+              }
+              sr = buildSearchResults(selected, endResiduePos,
+                      Collections.singleton(acf));
+              for (Match m : sr.getResults())
+              {
+                mappedStartResidue = Math.min(mappedStartResidue,
+                        m.getStart());
+                mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
+              }
+
+              /*
+               * Find the mapped aligned columns, save the range. Note findIndex
+               * returns a base 1 position, SequenceGroup uses base 0
+               */
+              int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
+              minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
+                      minStartCol, mappedStartCol);
+              int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
+              maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
+                      maxEndCol, mappedEndCol);
                mappedGroup.addSequence(seq, false);
                break;
              }
@@ -310,6 +374,8 @@ public final class MappingUtils
          }
        }
      }
+    mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
+    mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
      return mappedGroup;
    }