merge
authortcofoegbu <tcnofoegbu@dundee.ac.uk>
Thu, 14 Apr 2016 10:09:51 +0000 (11:09 +0100)
committertcofoegbu <tcnofoegbu@dundee.ac.uk>
Thu, 14 Apr 2016 10:09:51 +0000 (11:09 +0100)
src/jalview/analysis/SeqsetUtils.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
test/jalview/analysis/SeqsetUtilsTest.java [new file with mode: 0644]
test/jalview/datamodel/SequenceTest.java

index 40bedad..b0ecfde 100755 (executable)
@@ -59,13 +59,19 @@ public class SeqsetUtils
         sfeat.addElement(sfarray[i]);
       }
     }
-    sqinfo.put("SeqFeatures", sfeat);
-    sqinfo.put("PdbId",
+    if (seq.getDatasetSequence() == null)
+    {
+      sqinfo.put("SeqFeatures", sfeat);
+      sqinfo.put("PdbId",
             (seq.getAllPDBEntries() != null) ? seq.getAllPDBEntries()
                     : new Vector<PDBEntry>());
-    sqinfo.put("datasetSequence",
+    }
+    else
+    {
+      sqinfo.put("datasetSequence",
             (seq.getDatasetSequence() != null) ? seq.getDatasetSequence()
                     : new Sequence("THISISAPLACEHOLDER", ""));
+    }
     return sqinfo;
   }
 
@@ -129,6 +135,11 @@ public class SeqsetUtils
             && !(seqds.getName().equals("THISISAPLACEHOLDER") && seqds
                     .getLength() == 0))
     {
+      if (sfeatures!=null)
+      {
+        System.err
+                .println("Implementation error: setting dataset sequence for a sequence which has sequence features.\n\tDataset sequence features will not be visible.");
+      }
       sq.setDatasetSequence(seqds);
     }
 
index 6c8cbc0..a61f093 100755 (executable)
@@ -25,6 +25,7 @@ import jalview.api.DBRefEntryI;
 import jalview.util.StringUtils;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.List;
 import java.util.Vector;
@@ -185,12 +186,13 @@ public class Sequence extends ASequence implements SequenceI
   }
 
   /**
-   * Creates a new Sequence object with new features, DBRefEntries,
-   * AlignmentAnnotations, and PDBIds but inherits any existing dataset sequence
-   * reference.
+   * Creates a new Sequence object with new AlignmentAnnotations but inherits
+   * any existing dataset sequence reference. If non exists, everything is
+   * copied.
    * 
    * @param seq
-   *          DOCUMENT ME!
+   *          if seq is a dataset sequence, behaves like a plain old copy
+   *          constructor
    */
   public Sequence(SequenceI seq)
   {
@@ -213,31 +215,48 @@ public class Sequence extends ASequence implements SequenceI
 
   }
 
+  /**
+   * does the heavy lifting when cloning a dataset sequence, or coping data from
+   * dataset to a new derived sequence.
+   * 
+   * @param seq
+   *          - source of attributes.
+   * @param alAnnotation
+   *          - alignment annotation present on seq that should be copied onto
+   *          this sequence
+   */
   protected void initSeqFrom(SequenceI seq,
           AlignmentAnnotation[] alAnnotation)
   {
-    initSeqAndName(seq.getName(), seq.getSequence(), seq.getStart(),
+    {
+      char[] oseq = seq.getSequence();
+      initSeqAndName(seq.getName(), Arrays.copyOf(oseq, oseq.length),
+              seq.getStart(),
             seq.getEnd());
+    }
     description = seq.getDescription();
     sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry(
             seq.getSourceDBRef());
-    if (seq.getSequenceFeatures() != null)
+    if (seq != datasetSequence)
     {
-      SequenceFeature[] sf = seq.getSequenceFeatures();
-      for (int i = 0; i < sf.length; i++)
-      {
-        addSequenceFeature(new SequenceFeature(sf[i]));
-      }
+      setDatasetSequence(seq.getDatasetSequence());
     }
-    setDatasetSequence(seq.getDatasetSequence());
     if (datasetSequence == null && seq.getDBRefs() != null)
     {
-      // only copy DBRefs if we really are a dataset sequence
+      // only copy DBRefs and seqfeatures if we really are a dataset sequence
       DBRefEntry[] dbr = seq.getDBRefs();
       for (int i = 0; i < dbr.length; i++)
       {
         addDBRef(new DBRefEntry(dbr[i]));
       }
+      if (seq.getSequenceFeatures() != null)
+      {
+        SequenceFeature[] sf = seq.getSequenceFeatures();
+        for (int i = 0; i < sf.length; i++)
+        {
+          addSequenceFeature(new SequenceFeature(sf[i]));
+        }
+      }
     }
     if (seq.getAnnotation() != null)
     {
@@ -274,22 +293,30 @@ public class Sequence extends ASequence implements SequenceI
     }
   }
 
-  /**
-   * DOCUMENT ME!
-   * 
-   * @param v
-   *          DOCUMENT ME!
-   */
+
   @Override
   public void setSequenceFeatures(SequenceFeature[] features)
   {
-    sequenceFeatures = features;
+    if (datasetSequence == null)
+    {
+      sequenceFeatures = features;
+    }
+    else
+    {
+      System.err
+              .println("Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment");
+      datasetSequence.setSequenceFeatures(features);
+    }
   }
 
   @Override
   public synchronized void addSequenceFeature(SequenceFeature sf)
   {
-    // TODO add to dataset sequence instead if there is one?
+    if (sequenceFeatures==null && datasetSequence != null)
+    {
+      datasetSequence.addSequenceFeature(sf);
+      return;
+    }
     if (sequenceFeatures == null)
     {
       sequenceFeatures = new SequenceFeature[0];
@@ -315,6 +342,9 @@ public class Sequence extends ASequence implements SequenceI
   {
     if (sequenceFeatures == null)
     {
+      if (datasetSequence!=null) {
+         datasetSequence.deleteFeature(sf);
+      }
       return;
     }
 
@@ -1037,31 +1067,24 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public SequenceI deriveSequence()
   {
-    SequenceI seq = new Sequence(this);
-    if (datasetSequence != null)
-    {
-      // duplicate current sequence with same dataset
-      seq.setDatasetSequence(datasetSequence);
-    }
-    else
+    Sequence seq=null;
+    if (datasetSequence == null)
     {
       if (isValidDatasetSequence())
       {
         // Use this as dataset sequence
+        seq = new Sequence(getName(), "", 1, -1);
         seq.setDatasetSequence(this);
+        seq.initSeqFrom(this, getAnnotation());
+        return seq;
       }
       else
       {
         // Create a new, valid dataset sequence
-        SequenceI ds = seq;
-        ds.setSequence(AlignSeq.extractGaps(
-                jalview.util.Comparison.GapChars, new String(sequence)));
-        setDatasetSequence(ds);
-        ds.setSequenceFeatures(getSequenceFeatures());
-        seq = this; // and return this sequence as the derived sequence.
+       createDatasetSequence();
       }
     }
-    return seq;
+    return new Sequence(this);
   }
 
   /*
@@ -1074,20 +1097,27 @@ public class Sequence extends ASequence implements SequenceI
   {
     if (datasetSequence == null)
     {
-      datasetSequence = new Sequence(getName(), AlignSeq.extractGaps(
+      Sequence dsseq = new Sequence(getName(), AlignSeq.extractGaps(
               jalview.util.Comparison.GapChars, getSequenceAsString()),
               getStart(), getEnd());
-      datasetSequence.setSequenceFeatures(getSequenceFeatures());
-      datasetSequence.setDescription(getDescription());
-      setSequenceFeatures(null);
-      // move database references onto dataset sequence
-      datasetSequence.setDBRefs(getDBRefs());
-      setDBRefs(null);
-      datasetSequence.setPDBId(getAllPDBEntries());
-      setPDBId(null);
+
+      datasetSequence = dsseq;
+
+      dsseq.setDescription(description);
+      // move features and database references onto dataset sequence
+      dsseq.sequenceFeatures = sequenceFeatures;
+      sequenceFeatures=null;
+      dsseq.dbrefs = dbrefs;
+      dbrefs=null;
+      // TODO: search and replace any references to this sequence with
+      // references to the dataset sequence in Mappings on dbref
+      dsseq.pdbIds = pdbIds;
+      pdbIds = null;
       datasetSequence.updatePDBIds();
       if (annotation != null)
       {
+        // annotation is cloned rather than moved, to preserve what's currently
+        // on the alignment
         for (AlignmentAnnotation aa : annotation)
         {
           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
index f1cba43..60040d8 100755 (executable)
@@ -250,17 +250,20 @@ public interface SequenceI extends ASequenceI
   public void insertCharAt(int position, int count, char ch);
 
   /**
-   * DOCUMENT ME!
+   * Gets array holding sequence features associated with this sequence. The
+   * array may be held by the sequence's dataset sequence if that is defined.
    * 
-   * @return DOCUMENT ME!
+   * @return hard reference to array
    */
   public SequenceFeature[] getSequenceFeatures();
 
   /**
-   * DOCUMENT ME!
+   * Replaces the array of sequence features associated with this sequence with
+   * a new array reference. If this sequence has a dataset sequence, then this
+   * method will update the dataset sequence's feature array
    * 
-   * @param v
-   *          DOCUMENT ME!
+   * @param features
+   *          New array of sequence features
    */
   public void setSequenceFeatures(SequenceFeature[] features);
 
diff --git a/test/jalview/analysis/SeqsetUtilsTest.java b/test/jalview/analysis/SeqsetUtilsTest.java
new file mode 100644 (file)
index 0000000..b4d079a
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.Hashtable;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * @author jprocter
+ *
+ */
+public class SeqsetUtilsTest
+{
+  /**
+   * test for JAL-2046 bug - duplication of sequence features on reconstructed
+   * alignment
+   */
+  @Test(groups = { "Functional" })
+  public void testSeqFeatureAddition()
+  {
+    SequenceI[] sqset = new SequenceI[] {
+        new Sequence("Aseq1", "AREALSEQ"),
+        new Sequence("Aseq2", "AREALSEQ") };
+
+    AlignmentI al = new Alignment(sqset);
+    al.setDataset(null);
+    AlignmentI ds = al.getDataset();
+    SequenceFeature sf1 = new SequenceFeature("f1", "foo", "bleh", 2, 3,
+            "far"), sf2 = new SequenceFeature("f2", "foo", "bleh", 2, 3,
+            "far");
+    ds.getSequenceAt(0).addSequenceFeature(sf1);
+    Hashtable unq = SeqsetUtils.uniquify(sqset, true);
+    SequenceI[] sqset2 = new SequenceI[] {
+        new Sequence(sqset[0].getName(), sqset[0].getSequenceAsString()),
+        new Sequence(sqset[1].getName(), sqset[1].getSequenceAsString()) };
+    Assert.assertTrue(sqset[0].getSequenceFeatures()[0] == sf1);
+    Assert.assertEquals(sqset2[0].getSequenceFeatures(), null);
+    ds.getSequenceAt(0).addSequenceFeature(sf2);
+    Assert.assertEquals(sqset[0].getSequenceFeatures().length, 2);
+    SeqsetUtils.deuniquify(unq, sqset2);
+    // explicitly test that original sequence features still exist because they
+    // are on the shared dataset sequence
+    Assert.assertEquals(sqset[0].getSequenceFeatures().length, 2);
+    Assert.assertEquals(sqset2[0].getSequenceFeatures().length, 2);
+    Assert.assertTrue(sqset[0].getSequenceFeatures()[0] == sqset2[0]
+            .getSequenceFeatures()[0]);
+    Assert.assertTrue(sqset[0].getSequenceFeatures()[1] == sqset2[0]
+            .getSequenceFeatures()[1]);
+  }
+}
index 95755ee..ab11c09 100644 (file)
@@ -310,9 +310,13 @@ public class SequenceTest
     assertEquals(1, sfs.length);
     assertSame(sf, sfs[0]);
 
+
     /*
      * SequenceFeature on sequence and dataset sequence; returns that on
      * sequence
+     * 
+     * Note JAL-2046: spurious: we have no use case for this at the moment.
+     * This test also buggy - as sf2.equals(sf), no new feature is added
      */
     SequenceFeature sf2 = new SequenceFeature();
     sq.getDatasetSequence().addSequenceFeature(sf2);
@@ -322,17 +326,20 @@ public class SequenceTest
 
     /*
      * SequenceFeature on dataset sequence only
+     * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
      */
     sq.setSequenceFeatures(null);
-    sfs = sq.getSequenceFeatures();
-    assertEquals(1, sfs.length);
-    assertSame(sf2, sfs[0]);
+    assertNull(sq.getDatasetSequence().getSequenceFeatures());
 
     /*
      * Corrupt case - no SequenceFeature, dataset's dataset is the original
      * sequence. Test shows no infinite loop results.
      */
     sq.getDatasetSequence().setSequenceFeatures(null);
+    /**
+     * is there a usecase for this ? setDatasetSequence should throw an error if
+     * this actually occurs.
+     */
     sq.getDatasetSequence().setDatasetSequence(sq); // loop!
     assertNull(sq.getSequenceFeatures());
   }
@@ -395,9 +402,9 @@ public class SequenceTest
     assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
 
     assertNull(sq.sequenceFeatures);
-    // assertNull(derived.sequenceFeatures);
+    assertNull(derived.sequenceFeatures);
+    // derived sequence should access dataset sequence features
     assertNotNull(sq.getSequenceFeatures());
-    // derived sequence has a copy of the sequence features (is this right?)
     assertArrayEquals(sq.getSequenceFeatures(),
             derived.getSequenceFeatures());
   }
@@ -471,6 +478,8 @@ public class SequenceTest
     seq1.setDescription("description");
     seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
             1.3d));
+    // JAL-2046 - what is the contract for using a derived sequence's
+    // addSequenceFeature ?
     seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
             12.4f, "group"));
     seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
@@ -518,7 +527,11 @@ public class SequenceTest
     // copy has a copy of the sequence feature:
     SequenceFeature[] sfs = copy.getSequenceFeatures();
     assertEquals(1, sfs.length);
-    assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
+    if (seq1.getDatasetSequence()!=null && copy.getDatasetSequence()==seq1.getDatasetSequence()) {
+      assertTrue(sfs[0] == seq1.getSequenceFeatures()[0]);
+    } else {
+      assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
+    }
     assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0]));
 
     // copy has a copy of the PDB entry