Merge branch 'bug/JAL-2541cutRelocateFeatures' into bug/JAL-2684_fixPairwiseAlignofEd...
authorJim Procter <jprocter@issues.jalview.org>
Mon, 6 Nov 2017 17:18:27 +0000 (17:18 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Mon, 6 Nov 2017 17:18:27 +0000 (17:18 +0000)
1  2 
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/features/SequenceFeatures.java
test/jalview/datamodel/SequenceTest.java
test/jalview/datamodel/features/SequenceFeaturesTest.java

@@@ -38,17 -38,21 +38,14 @@@ import java.util.List
  import java.util.ListIterator;
  import java.util.Vector;
  
 -import com.stevesoft.pat.Regex;
 -
  import fr.orsay.lri.varna.models.rna.RNA;
  
  /**
   * 
-  * Implements the SequenceI interface for a char[] based sequence object.
-  * 
-  * @author $author$
-  * @version $Revision$
+  * Implements the SequenceI interface for a char[] based sequence object
   */
  public class Sequence extends ASequence implements SequenceI
  {
 -  private static final Regex limitrx = new Regex(
 -          "[/][0-9]{1,}[-][0-9]{1,}$");
 -
 -  private static final Regex endrx = new Regex("[0-9]{1,}$");
 -
    SequenceI datasetSequence;
  
    String name;
@@@ -82,7 -86,7 +79,7 @@@
     */
    int index = -1;
  
 -  private SequenceFeatures sequenceFeatureStore;
 +  private SequenceFeaturesI sequenceFeatureStore;
  
    /*
     * A cursor holding the approximate current view position to the sequence,
      checkValidRange();
    }
  
 +  /**
 +   * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 +   * start and end respectively and removes the suffix from the name
 +   */
    void parseId()
    {
      if (name == null)
                "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
        name = "";
      }
 -    // Does sequence have the /start-end signature?
 -    if (limitrx.search(name))
 +    int slashPos = name.lastIndexOf('/');
 +    if (slashPos > -1 && slashPos < name.length() - 1)
      {
 -      name = limitrx.left();
 -      endrx.search(limitrx.stringMatched());
 -      setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
 -              endrx.matchedFrom() - 1)));
 -      setEnd(Integer.parseInt(endrx.stringMatched()));
 +      String suffix = name.substring(slashPos + 1);
 +      String[] range = suffix.split("-");
 +      if (range.length == 2)
 +      {
 +        try
 +        {
 +          int from = Integer.valueOf(range[0]);
 +          int to = Integer.valueOf(range[1]);
 +          if (from > 0 && to >= from)
 +          {
 +            name = name.substring(0, slashPos);
 +            setStart(from);
 +            setEnd(to);
 +            checkValidRange();
 +          }
 +        } catch (NumberFormatException e)
 +        {
 +          // leave name unchanged if suffix is invalid
 +        }
 +      }
      }
    }
  
 +  /**
 +   * Ensures that 'end' is not before the end of the sequence, that is,
 +   * (end-start+1) is at least as long as the count of ungapped positions. Note
 +   * that end is permitted to be beyond the end of the sequence data.
 +   */
    void checkValidRange()
    {
      // Note: JAL-774 :
        int endRes = 0;
        for (int j = 0; j < sequence.length; j++)
        {
 -        if (!jalview.util.Comparison.isGap(sequence[j]))
 +        if (!Comparison.isGap(sequence[j]))
          {
            endRes++;
          }
    }
  
    /**
 -   * DOCUMENT ME!
 +   * Sets the sequence name. If the name ends in /start-end, then the start-end
 +   * values are parsed out and set, and the suffix is removed from the name.
     * 
 -   * @param name
 -   *          DOCUMENT ME!
 +   * @param theName
     */
    @Override
 -  public void setName(String name)
 +  public void setName(String theName)
    {
 -    this.name = name;
 +    this.name = theName;
      this.parseId();
    }
  
       * preserve end residue column provided cursor was valid
       */
      int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
      if (residuePos == this.end)
      {
        endColumn = column;
      /*
       * move left or right to find pos from hint.position
       */
-     int col = curs.columnPosition - 1; // convert from base 1 to 0-based array
-                                        // index
+     int col = curs.columnPosition - 1; // convert from base 1 to base 0
      int newPos = curs.residuePosition;
      int delta = newPos > pos ? -1 : 1;
  
      // the very large sequence case
      int eindex = -1, sindex = -1;
      boolean ecalc = false, scalc = false;
-     for (int s = i; s < j; s++)
+     for (int s = i; s < j && s < sequence.length; s++)
      {
-       if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
+       if (!Comparison.isGap(sequence[s]))
        {
          if (createNewDs)
          {
  
      List<SequenceFeature> result = getFeatures().findFeatures(startPos,
              endPos, types);
+     if (datasetSequence != null)
+     {
+       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
+               types);
+     }
+     else
+     {
+       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
+     }
  
      /*
       * if end column is gapped, endPos may be to the right, 
       * and we may have included adjacent or enclosing features;
       * remove any that are not enclosing, non-contact features
       */
 -    if (endPos > this.end || Comparison.isGap(sequence[toColumn - 1]))
 +    boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
 +            && Comparison.isGap(sequence[toColumn - 1]);
 +    if (endPos > this.end || endColumnIsGapped)
      {
        ListIterator<SequenceFeature> it = result.listIterator();
        while (it.hasNext())
@@@ -149,14 -149,6 +149,14 @@@ public class SequenceFeatures implement
      }
  
      Set<String> featureTypes = getFeatureTypes(ontologyTerm);
 +    if (featureTypes.isEmpty())
 +    {
 +      /*
 +       * no features of the specified type or any sub-type
 +       */
 +      return new ArrayList<>();
 +    }
 +
      return getAllFeatures(featureTypes.toArray(new String[featureTypes
              .size()]));
    }
     * {@inheritDoc}
     */
    @Override
-   public boolean shiftFeatures(int shift)
+   public boolean shiftFeatures(int fromPosition, int shiftBy)
    {
      boolean modified = false;
      for (FeatureStore fs : featureStore.values())
      {
-       modified |= fs.shiftFeatures(shift);
+       modified |= fs.shiftFeatures(fromPosition, shiftBy);
      }
      return modified;
    }
- }
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void deleteAll()
+   {
+     featureStore.clear();
+   }
+ }
@@@ -263,6 -263,61 +263,61 @@@ public class SequenceTes
      assertEquals(13, sq.findIndex(99));
    }
  
+   @Test(groups = { "Functional" })
+   public void testFindPositions()
+   {
+     SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
+     /*
+      * invalid inputs
+      */
+     assertNull(sq.findPositions(6, 5));
+     assertNull(sq.findPositions(0, 5));
+     assertNull(sq.findPositions(-1, 5));
+     /*
+      * all gapped ranges
+      */
+     assertNull(sq.findPositions(1, 1)); // 1-based columns
+     assertNull(sq.findPositions(5, 5));
+     assertNull(sq.findPositions(5, 6));
+     assertNull(sq.findPositions(5, 7));
+     /*
+      * all ungapped ranges
+      */
+     assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
+     assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
+     assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
+     assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
+     /*
+      * gap to ungapped range
+      */
+     assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
+     assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
+     /*
+      * ungapped to gapped range
+      */
+     assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
+     assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
+     /*
+      * ungapped to ungapped enclosing gaps
+      */
+     assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
+     assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
+     /*
+      * gapped to gapped enclosing ungapped
+      */
+     assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
+     assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
+     assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
+     assertEquals(new Range(8, 13), sq.findPositions(1, 99));
+   }
    /**
     * Tests for the method that returns a dataset sequence position (start..) for
     * an aligned column position (base 0).
      assertEquals("test:Pos13:Col10:startCol3:endCol10:tok0",
              PA.getValue(sq, "cursor").toString());
      sq.sequenceChanged();
-     assertEquals(12, sq.findPosition(8));
-     cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+     assertEquals(12, sq.findPosition(8)); // E12
      // sequenceChanged() invalidates cursor.lastResidueColumn
      cursor = (SequenceCursor) PA.getValue(sq, "cursor");
      assertEquals("test:Pos12:Col9:startCol3:endCol0:tok1",
      assertEquals(6, sq.getEnd());
      assertNull(PA.getValue(sq, "datasetSequence"));
  
+     sq = new Sequence("test", "ABCDE");
+     sq.deleteChars(0, 3);
+     assertEquals("DE", sq.getSequenceAsString());
+     assertEquals(4, sq.getStart());
+     assertEquals(5, sq.getEnd());
+     assertNull(PA.getValue(sq, "datasetSequence"));
      /*
       * delete at end
       */
      assertEquals(1, sq.getStart());
      assertEquals(4, sq.getEnd());
      assertNull(PA.getValue(sq, "datasetSequence"));
+     /*
+      * delete more positions than there are
+      */
+     sq = new Sequence("test/8-11", "ABCD");
+     sq.deleteChars(0, 99);
+     assertEquals("", sq.getSequenceAsString());
+     assertEquals(12, sq.getStart()); // = findPosition(99) ?!?
+     assertEquals(11, sq.getEnd());
+     sq = new Sequence("test/8-11", "----");
+     sq.deleteChars(0, 99); // ArrayIndexOutOfBoundsException <= 2.10.2
+     assertEquals("", sq.getSequenceAsString());
+     assertEquals(8, sq.getStart());
+     assertEquals(11, sq.getEnd());
    }
  
    @Test(groups = { "Functional" })
      // cursor should now be at [D 6]
      cursor = (SequenceCursor) PA.getValue(sq, "cursor");
      assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
+     assertEquals(0, cursor.lastColumnPosition); // not yet found
+     assertEquals(13, sq.findPosition(8)); // E13
+     cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+     assertEquals(9, cursor.lastColumnPosition); // found
  
      /*
       * deleteChars should invalidate the cached cursor
      assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
      assertEquals(2, PA.getValue(sq, "changeCount"));
    }
 +
 +  @Test(groups = { "Functional" })
-   public void testFindPositions()
-   {
-     SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
-     /*
-      * invalid inputs
-      */
-     assertNull(sq.findPositions(6, 5));
-     assertNull(sq.findPositions(0, 5));
-     assertNull(sq.findPositions(-1, 5));
-     /*
-      * all gapped ranges
-      */
-     assertNull(sq.findPositions(1, 1)); // 1-based columns
-     assertNull(sq.findPositions(5, 5));
-     assertNull(sq.findPositions(5, 6));
-     assertNull(sq.findPositions(5, 7));
-     /*
-      * all ungapped ranges
-      */
-     assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
-     assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
-     assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
-     assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
-     /*
-      * gap to ungapped range
-      */
-     assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
-     assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
-     /*
-      * ungapped to gapped range
-      */
-     assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
-     assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
-     /*
-      * ungapped to ungapped enclosing gaps
-      */
-     assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
-     assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
-     /*
-      * gapped to gapped enclosing ungapped
-      */
-     assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
-     assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
-     assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
-     assertEquals(new Range(8, 13), sq.findPositions(1, 99));
-   }
-   @Test(groups = { "Functional" })
 +  public void testFindFeatures_largeEndPos()
 +  {
 +    /*
 +     * imitate a PDB sequence where end is larger than end position
 +     */
 +    SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
 +    sq.createDatasetSequence();
 +  
 +    assertTrue(sq.findFeatures(1, 9).isEmpty());
 +    // should be no array bounds exception - JAL-2772
 +    assertTrue(sq.findFeatures(1, 15).isEmpty());
 +  
 +    // add feature on BCD
 +    SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
 +            null);
 +    sq.addSequenceFeature(sfBCD);
 +  
 +    // no features in columns 1-2 (-A)
 +    List<SequenceFeature> found = sq.findFeatures(1, 2);
 +    assertTrue(found.isEmpty());
 +  
 +    // columns 1-6 (-ABC--) includes BCD
 +    found = sq.findFeatures(1, 6);
 +    assertEquals(1, found.size());
 +    assertTrue(found.contains(sfBCD));
 +
 +    // columns 10-11 (--) should find nothing
 +    found = sq.findFeatures(10, 11);
 +    assertEquals(0, found.size());
 +  }
 +
 +  @Test(groups = { "Functional" })
 +  public void testSetName()
 +  {
 +    SequenceI sq = new Sequence("test", "-ABC---DE-F--");
 +    assertEquals("test", sq.getName());
 +    assertEquals(1, sq.getStart());
 +    assertEquals(6, sq.getEnd());
 +
 +    sq.setName("testing");
 +    assertEquals("testing", sq.getName());
 +
 +    sq.setName("test/8-10");
 +    assertEquals("test", sq.getName());
 +    assertEquals(8, sq.getStart());
 +    assertEquals(13, sq.getEnd()); // note end is recomputed
 +
 +    sq.setName("testing/7-99");
 +    assertEquals("testing", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd()); // end may be beyond physical end
 +
 +    sq.setName("/2-3");
 +    assertEquals("", sq.getName());
 +    assertEquals(2, sq.getStart());
 +    assertEquals(7, sq.getEnd());
 +
 +    sq.setName("test/"); // invalid
 +    assertEquals("test/", sq.getName());
 +    assertEquals(2, sq.getStart());
 +    assertEquals(7, sq.getEnd());
 +
 +    sq.setName("test/6-13/7-99");
 +    assertEquals("test/6-13", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/0-5"); // 0 is invalid - ignored
 +    assertEquals("test/0-5", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/a-5"); // a is invalid - ignored
 +    assertEquals("test/a-5", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/6-5"); // start > end is invalid - ignored
 +    assertEquals("test/6-5", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/5"); // invalid - ignored
 +    assertEquals("test/5", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/-5"); // invalid - ignored
 +    assertEquals("test/-5", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/5-"); // invalid - ignored
 +    assertEquals("test/5-", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName("test/5-6-7"); // invalid - ignored
 +    assertEquals("test/5-6-7", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +
 +    sq.setName(null); // invalid, gets converted to space
 +    assertEquals("", sq.getName());
 +    assertEquals(7, sq.getStart());
 +    assertEquals(99, sq.getEnd());
 +  }
 +
 +  @Test(groups = { "Functional" })
 +  public void testCheckValidRange()
 +  {
 +    Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--");
 +    assertEquals(7, sq.getStart());
 +    assertEquals(12, sq.getEnd());
 +
 +    /*
 +     * checkValidRange ensures end is at least the last residue position
 +     */
 +    PA.setValue(sq, "end", 2);
 +    sq.checkValidRange();
 +    assertEquals(12, sq.getEnd());
 +
 +    /*
 +     * end may be beyond the last residue position
 +     */
 +    PA.setValue(sq, "end", 22);
 +    sq.checkValidRange();
 +    assertEquals(22, sq.getEnd());
 +  }
  }
@@@ -1032,9 -1032,6 +1032,9 @@@ public class SequenceFeaturesTes
      assertEquals(features.size(), 2);
      assertTrue(features.contains(sf2));
      assertTrue(features.contains(sf3));
 +
 +    features = store.getFeaturesByOntology("sequence_variant");
 +    assertTrue(features.isEmpty());
    }
  
    @Test(groups = "Functional")
    public void testShiftFeatures()
    {
      SequenceFeatures store = new SequenceFeatures();
-     assertFalse(store.shiftFeatures(1));
+     assertFalse(store.shiftFeatures(0, 1));
  
      SequenceFeature sf1 = new SequenceFeature("Cath", "", 2, 5, 0f, null);
      store.add(sf1);
      /*
       * shift features right by 5
       */
-     assertTrue(store.shiftFeatures(5));
+     assertTrue(store.shiftFeatures(0, 5));
    
      // non-positional features untouched:
      List<SequenceFeature> nonPos = store.getNonPositionalFeatures();
       * feature at [7-10] should be removed
       * feature at [13-19] should become [1-4] 
       */
-     assertTrue(store.shiftFeatures(-15));
+     assertTrue(store.shiftFeatures(0, -15));
      pos = store.getPositionalFeatures();
      assertEquals(pos.size(), 2);
      SequenceFeatures.sortFeatures(pos, true);
      assertEquals(pos.get(1).getBegin(), 13);
      assertEquals(pos.get(1).getEnd(), 22);
      assertEquals(pos.get(1).getType(), "Disulfide bond");
+     /*
+      * shift right by 4 from column 2
+      * feature at [1-4] should be unchanged
+      * feature at [13-22] should become [17-26] 
+      */
+     assertTrue(store.shiftFeatures(2, 4));
+     pos = store.getPositionalFeatures();
+     assertEquals(pos.size(), 2);
+     SequenceFeatures.sortFeatures(pos, true);
+     assertEquals(pos.get(0).getBegin(), 1);
+     assertEquals(pos.get(0).getEnd(), 4);
+     assertEquals(pos.get(0).getType(), "Metal");
+     assertEquals(pos.get(1).getBegin(), 17);
+     assertEquals(pos.get(1).getEnd(), 26);
+     assertEquals(pos.get(1).getType(), "Disulfide bond");
+     /*
+      * shift right from column 18
+      * should be no updates
+      */
+     SequenceFeature f1 = pos.get(0);
+     SequenceFeature f2 = pos.get(1);
+     assertFalse(store.shiftFeatures(18, 6));
+     pos = store.getPositionalFeatures();
+     assertEquals(pos.size(), 2);
+     SequenceFeatures.sortFeatures(pos, true);
+     assertSame(pos.get(0), f1);
+     assertSame(pos.get(1), f2);
    }
  
    @Test(groups = "Functional")
      assertTrue(store.isOntologyTerm("junk", new String[] {}));
      assertTrue(store.isOntologyTerm("junk", (String[]) null));
    }
+   @Test(groups = "Functional")
+   public void testDeleteAll()
+   {
+     SequenceFeaturesI store = new SequenceFeatures();
+     assertFalse(store.hasFeatures());
+     store.deleteAll();
+     assertFalse(store.hasFeatures());
+     store.add(new SequenceFeature("Cath", "Desc", 12, 20, 0f, "Group"));
+     store.add(new SequenceFeature("Pfam", "Desc", 6, 12, 2f, "Group2"));
+     assertTrue(store.hasFeatures());
+     store.deleteAll();
+     assertFalse(store.hasFeatures());
+   }
  }