Merge branch 'develop' into features/JAL-2446NCList
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 5 Jun 2017 07:52:40 +0000 (08:52 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 5 Jun 2017 07:52:40 +0000 (08:52 +0100)
Conflicts:
src/jalview/datamodel/SequenceI.java

1  2 
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/gui/PopupMenu.java
test/jalview/datamodel/SequenceTest.java

@@@ -22,8 -22,6 +22,8 @@@ package jalview.datamodel
  
  import jalview.analysis.AlignSeq;
  import jalview.api.DBRefEntryI;
 +import jalview.datamodel.features.SequenceFeatures;
 +import jalview.datamodel.features.SequenceFeaturesI;
  import jalview.util.Comparison;
  import jalview.util.DBRefUtils;
  import jalview.util.MapList;
@@@ -31,13 -29,12 +31,14 @@@ import jalview.util.StringUtils
  
  import java.util.ArrayList;
  import java.util.Arrays;
+ import java.util.BitSet;
  import java.util.Collections;
  import java.util.Enumeration;
  import java.util.List;
  import java.util.Vector;
  
 +import com.stevesoft.pat.Regex;
 +
  import fr.orsay.lri.varna.models.rna.RNA;
  
  /**
   */
  public class Sequence extends ASequence implements SequenceI
  {
 +  private static final Regex limitrx = new Regex(
 +          "[/][0-9]{1,}[-][0-9]{1,}$");
 +
 +  private static final Regex endrx = new Regex("[0-9]{1,}$");
 +
    SequenceI datasetSequence;
  
    String name;
@@@ -87,7 -79,8 +88,7 @@@
     */
    int index = -1;
  
 -  /** array of sequence features - may not be null for a valid sequence object */
 -  public SequenceFeature[] sequenceFeatures;
 +  private SequenceFeatures sequenceFeatureStore;
  
    /**
     * Creates a new Sequence object.
     */
    public Sequence(String name, String sequence, int start, int end)
    {
 +    this();
      initSeqAndName(name, sequence.toCharArray(), start, end);
    }
  
    public Sequence(String name, char[] sequence, int start, int end)
    {
 +    this();
      initSeqAndName(name, sequence, start, end);
    }
  
      checkValidRange();
    }
  
 -  com.stevesoft.pat.Regex limitrx = new com.stevesoft.pat.Regex(
 -          "[/][0-9]{1,}[-][0-9]{1,}$");
 -
 -  com.stevesoft.pat.Regex endrx = new com.stevesoft.pat.Regex("[0-9]{1,}$");
 -
    void parseId()
    {
      if (name == null)
    }
  
    /**
 +   * default constructor
 +   */
 +  private Sequence()
 +  {
 +    sequenceFeatureStore = new SequenceFeatures();
 +  }
 +
 +  /**
     * Creates a new Sequence object.
     * 
     * @param name
     */
    public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
    {
 +    this();
      initSeqFrom(seq, alAnnotation);
 -
    }
  
    /**
    protected void initSeqFrom(SequenceI seq,
            AlignmentAnnotation[] alAnnotation)
    {
 -    {
 -      char[] oseq = seq.getSequence();
 -      initSeqAndName(seq.getName(), Arrays.copyOf(oseq, oseq.length),
 -              seq.getStart(), seq.getEnd());
 -    }
 +    char[] oseq = seq.getSequence();
 +    initSeqAndName(seq.getName(), Arrays.copyOf(oseq, oseq.length),
 +            seq.getStart(), seq.getEnd());
 +
      description = seq.getDescription();
      if (seq != datasetSequence)
      {
        setDatasetSequence(seq.getDatasetSequence());
      }
 -    if (datasetSequence == null && seq.getDBRefs() != null)
 +    
 +    /*
 +     * only copy DBRefs and seqfeatures if we really are a dataset sequence
 +     */
 +    if (datasetSequence == null)
      {
 -      // only copy DBRefs and seqfeatures if we really are a dataset sequence
 -      DBRefEntry[] dbr = seq.getDBRefs();
 -      for (int i = 0; i < dbr.length; i++)
 -      {
 -        addDBRef(new DBRefEntry(dbr[i]));
 -      }
 -      if (seq.getSequenceFeatures() != null)
 +      if (seq.getDBRefs() != null)
        {
 -        SequenceFeature[] sf = seq.getSequenceFeatures();
 -        for (int i = 0; i < sf.length; i++)
 +        DBRefEntry[] dbr = seq.getDBRefs();
 +        for (int i = 0; i < dbr.length; i++)
          {
 -          addSequenceFeature(new SequenceFeature(sf[i]));
 +          addDBRef(new DBRefEntry(dbr[i]));
          }
        }
 +
 +      /*
 +       * make copies of any sequence features
 +       */
 +      for (SequenceFeature sf : seq.getSequenceFeatures())
 +      {
 +        addSequenceFeature(new SequenceFeature(sf));
 +      }
      }
 +
      if (seq.getAnnotation() != null)
      {
        AlignmentAnnotation[] sqann = seq.getAnnotation();
    }
  
    @Override
 -  public void setSequenceFeatures(SequenceFeature[] features)
 +  public void setSequenceFeatures(List<SequenceFeature> features)
    {
 -    if (datasetSequence == null)
 -    {
 -      sequenceFeatures = features;
 -    }
 -    else
 +    if (datasetSequence != null)
      {
 -      if (datasetSequence.getSequenceFeatures() != features
 -              && datasetSequence.getSequenceFeatures() != null
 -              && datasetSequence.getSequenceFeatures().length > 0)
 -      {
 -        new Exception(
 -                "Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment")
 -                .printStackTrace();
 -      }
        datasetSequence.setSequenceFeatures(features);
 +      return;
      }
 +    sequenceFeatureStore = new SequenceFeatures(features);
    }
  
    @Override
    public synchronized boolean addSequenceFeature(SequenceFeature sf)
    {
 -    if (sequenceFeatures == null && datasetSequence != null)
 -    {
 -      return datasetSequence.addSequenceFeature(sf);
 -    }
 -    if (sequenceFeatures == null)
 +    if (sf.getType() == null)
      {
 -      sequenceFeatures = new SequenceFeature[0];
 +      System.err.println("SequenceFeature type may not be null: "
 +              + sf.toString());
 +      return false;
      }
  
 -    for (int i = 0; i < sequenceFeatures.length; i++)
 +    if (datasetSequence != null)
      {
 -      if (sequenceFeatures[i].equals(sf))
 -      {
 -        return false;
 -      }
 +      return datasetSequence.addSequenceFeature(sf);
      }
  
 -    SequenceFeature[] temp = new SequenceFeature[sequenceFeatures.length + 1];
 -    System.arraycopy(sequenceFeatures, 0, temp, 0, sequenceFeatures.length);
 -    temp[sequenceFeatures.length] = sf;
 -
 -    sequenceFeatures = temp;
 -    return true;
 +    return sequenceFeatureStore.add(sf);
    }
  
    @Override
    public void deleteFeature(SequenceFeature sf)
    {
 -    if (sequenceFeatures == null)
 -    {
 -      if (datasetSequence != null)
 -      {
 -        datasetSequence.deleteFeature(sf);
 -      }
 -      return;
 -    }
 -
 -    int index = 0;
 -    for (index = 0; index < sequenceFeatures.length; index++)
 -    {
 -      if (sequenceFeatures[index].equals(sf))
 -      {
 -        break;
 -      }
 -    }
 -
 -    if (index == sequenceFeatures.length)
 -    {
 -      return;
 -    }
 -
 -    int sfLength = sequenceFeatures.length;
 -    if (sfLength < 2)
 +    if (datasetSequence != null)
      {
 -      sequenceFeatures = null;
 +      datasetSequence.deleteFeature(sf);
      }
      else
      {
 -      SequenceFeature[] temp = new SequenceFeature[sfLength - 1];
 -      System.arraycopy(sequenceFeatures, 0, temp, 0, index);
 -
 -      if (index < sfLength)
 -      {
 -        System.arraycopy(sequenceFeatures, index + 1, temp, index,
 -                sequenceFeatures.length - index - 1);
 -      }
 -
 -      sequenceFeatures = temp;
 +      sequenceFeatureStore.delete(sf);
      }
    }
  
    /**
 -   * Returns the sequence features (if any), looking first on the sequence, then
 -   * on its dataset sequence, and so on until a non-null value is found (or
 -   * none). This supports retrieval of sequence features stored on the sequence
 -   * (as in the applet) or on the dataset sequence (as in the Desktop version).
 +   * {@inheritDoc}
     * 
     * @return
     */
    @Override
 -  public SequenceFeature[] getSequenceFeatures()
 +  public List<SequenceFeature> getSequenceFeatures()
    {
 -    SequenceFeature[] features = sequenceFeatures;
 -
 -    SequenceI seq = this;
 -    int count = 0; // failsafe against loop in sequence.datasetsequence...
 -    while (features == null && seq.getDatasetSequence() != null
 -            && count++ < 10)
 +    if (datasetSequence != null)
      {
 -      seq = seq.getDatasetSequence();
 -      features = ((Sequence) seq).sequenceFeatures;
 +      return datasetSequence.getSequenceFeatures();
      }
 -    return features;
 +    return sequenceFeatureStore.getAllFeatures();
 +  }
 +
 +  @Override
 +  public SequenceFeaturesI getFeatures()
 +  {
 +    return datasetSequence != null ? datasetSequence.getFeatures()
 +            : sequenceFeatureStore;
    }
  
    @Override
      // Rely on end being at least as long as the length of the sequence.
      while ((i < sequence.length) && (j <= end) && (j <= pos))
      {
 -      if (!jalview.util.Comparison.isGap(sequence[i]))
 +      if (!Comparison.isGap(sequence[i]))
        {
          j++;
        }
      int seqlen = sequence.length;
      while ((j < i) && (j < seqlen))
      {
 -      if (!jalview.util.Comparison.isGap(sequence[j]))
 +      if (!Comparison.isGap(sequence[j]))
        {
          pos++;
        }
    }
  
    /**
 +   * {@inheritDoc}
 +   */
 +  @Override
 +  public Range findPositions(int fromCol, int toCol)
 +  {
 +    /*
 +     * count residues before fromCol
 +     */
 +    int j = 0;
 +    int count = 0;
 +    int seqlen = sequence.length;
 +    while (j < fromCol && j < seqlen)
 +    {
 +      if (!Comparison.isGap(sequence[j]))
 +      {
 +        count++;
 +      }
 +      j++;
 +    }
 +
 +    /*
 +     * find first and last residues between fromCol and toCol
 +     */
 +    int firstPos = 0;
 +    int lastPos = 0;
 +    int firstPosCol = 0;
 +    boolean foundFirst = false;
 +    
 +    while (j <= toCol && j < seqlen)
 +    {
 +      if (!Comparison.isGap(sequence[j]))
 +      {
 +        count++;
 +        if (!foundFirst)
 +        {
 +          firstPos = count;
 +          firstPosCol = j;
 +          foundFirst = true;
 +        }
 +        lastPos = count;
 +      }
 +      j++;
 +    }
 +
 +    if (firstPos == 0)
 +    {
 +      /*
 +       * no residues in this range
 +       */
 +      return null;
 +    }
 +
 +    /*
 +     * adjust for sequence start coordinate
 +     */
 +    firstPos += start - 1;
 +    lastPos += start - 1;
 +
 +    return new Range(firstPos, lastPos);
 +  }
 +
 +  /**
     * Returns an int array where indices correspond to each residue in the
     * sequence and the element value gives its position in the alignment
     * 
    }
  
    @Override
+   public BitSet getInsertionsAsBits()
+   {
+     BitSet map = new BitSet();
+     int lastj = -1, j = 0;
+     int pos = start;
+     int seqlen = sequence.length;
+     while ((j < seqlen))
+     {
+       if (jalview.util.Comparison.isGap(sequence[j]))
+       {
+         if (lastj == -1)
+         {
+           lastj = j;
+         }
+       }
+       else
+       {
+         if (lastj != -1)
+         {
+           map.set(lastj, j);
+           lastj = -1;
+         }
+       }
+       j++;
+     }
+     if (lastj != -1)
+     {
+       map.set(lastj, j);
+       lastj = -1;
+     }
+     return map;
+   }
+   @Override
    public void deleteChars(int i, int j)
    {
      int newstart = start, newend = end;
  
        dsseq.setDescription(description);
        // move features and database references onto dataset sequence
 -      dsseq.sequenceFeatures = sequenceFeatures;
 -      sequenceFeatures = null;
 +      dsseq.sequenceFeatureStore = sequenceFeatureStore;
 +      sequenceFeatureStore = null;
        dsseq.dbrefs = dbrefs;
        dbrefs = null;
        // TODO: search and replace any references to this sequence with
        return null;
      }
  
 -    Vector subset = new Vector();
 -    Enumeration e = annotation.elements();
 +    Vector<AlignmentAnnotation> subset = new Vector<AlignmentAnnotation>();
 +    Enumeration<AlignmentAnnotation> e = annotation.elements();
      while (e.hasMoreElements())
      {
 -      AlignmentAnnotation ann = (AlignmentAnnotation) e.nextElement();
 +      AlignmentAnnotation ann = e.nextElement();
        if (ann.label != null && ann.label.equals(label))
        {
          subset.addElement(ann);
      e = subset.elements();
      while (e.hasMoreElements())
      {
 -      anns[i++] = (AlignmentAnnotation) e.nextElement();
 +      anns[i++] = e.nextElement();
      }
      subset.removeAllElements();
      return anns;
      if (entry.getSequenceFeatures() != null)
      {
  
 -      SequenceFeature[] sfs = entry.getSequenceFeatures();
 -      for (int si = 0; si < sfs.length; si++)
 +      List<SequenceFeature> sfs = entry.getSequenceFeatures();
 +      for (SequenceFeature feature : sfs)
        {
 -        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(sfs[si])
 -                : new SequenceFeature[] { new SequenceFeature(sfs[si]) };
 -        if (sf != null && sf.length > 0)
 +        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
 +                : new SequenceFeature[] { new SequenceFeature(feature) };
 +        if (sf != null)
          {
            for (int sfi = 0; sfi < sf.length; sfi++)
            {
      // transfer PDB entries
      if (entry.getAllPDBEntries() != null)
      {
 -      Enumeration e = entry.getAllPDBEntries().elements();
 +      Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
        while (e.hasMoreElements())
        {
 -        PDBEntry pdb = (PDBEntry) e.nextElement();
 +        PDBEntry pdb = e.nextElement();
          addPDBId(pdb);
        }
      }
      }
    }
  
 +  /**
 +   * {@inheritDoc}
 +   */
 +  @Override
 +  public List<SequenceFeature> findFeatures(int from, int to,
 +          String... types)
 +  {
 +    if (datasetSequence != null)
 +    {
 +      return datasetSequence.findFeatures(from, to, types);
 +    }
 +    return sequenceFeatureStore.findFeatures(from, to, types);
 +  }
  }
@@@ -20,8 -20,7 +20,9 @@@
   */
  package jalview.datamodel;
  
 +import jalview.datamodel.features.SequenceFeaturesI;
 +
+ import java.util.BitSet;
  import java.util.List;
  import java.util.Vector;
  
@@@ -176,7 -175,7 +177,7 @@@ public interface SequenceI extends ASeq
    public String getDescription();
  
    /**
 -   * Return the alignment column for a sequence position
 +   * Return the alignment column (from 1..) for a sequence position
     * 
     * @param pos
     *          lying from start to end
    public int findPosition(int i);
  
    /**
 +   * Returns the range of sequence positions included in the given alignment
 +   * position range. If no positions are included (the range is entirely gaps),
 +   * then returns null.
 +   * 
 +   * <pre>
 +   * Example: 
 +   * >Seq/8-13
 +   * ABC--DE-F
 +   * findPositions(1, 4) returns Range(9, 9) // B only
 +   * findPositions(3, 4) returns null // all gaps
 +   * findPositions(2, 6) returns Range(10, 12) // CDE
 +   * findPositions(3, 7) returns Range(11,12) // DE
 +   * </pre>
 +   * 
 +   * @param fromCol
 +   *          first aligned column position (base 0, inclusive)
 +   * @param toCol
 +   *          last aligned column position (base 0, inclusive)
 +   * 
 +   * @return
 +   */
 +  public Range findPositions(int fromCol, int toCol);
 +
 +  /**
     * Returns an int array where indices correspond to each residue in the
     * sequence and the element value gives its position in the alignment
     * 
    public void insertCharAt(int position, int count, char ch);
  
    /**
 -   * Gets array holding sequence features associated with this sequence. The
 -   * array may be held by the sequence's dataset sequence if that is defined.
 +   * Answers a list of all sequence features associated with this sequence. The
 +   * list may be held by the sequence's dataset sequence if that is defined.
     * 
     * @return hard reference to array
     */
 -  public SequenceFeature[] getSequenceFeatures();
 +  public List<SequenceFeature> getSequenceFeatures();
  
    /**
 -   * Replaces the array of sequence features associated with this sequence with
 -   * a new array reference. If this sequence has a dataset sequence, then this
 -   * method will update the dataset sequence's feature array
 +   * Answers the object holding features for the sequence
 +   * 
 +   * @return
 +   */
 +  SequenceFeaturesI getFeatures();
 +
 +  /**
 +   * Replaces the sequence features associated with this sequence with the given
 +   * features. If this sequence has a dataset sequence, then this method will
 +   * update the dataset sequence's features instead.
     * 
     * @param features
 -   *          New array of sequence features
     */
 -  public void setSequenceFeatures(SequenceFeature[] features);
 +  public void setSequenceFeatures(List<SequenceFeature> features);
  
    /**
     * DOCUMENT ME!
  
    /**
     * Adds the given sequence feature and returns true, or returns false if it is
 -   * already present on the sequence
 +   * already present on the sequence, or if the feature type is null.
     * 
     * @param sf
     * @return
    public List<DBRefEntry> getPrimaryDBRefs();
  
    /**
 +   * Returns a (possibly empty) list of sequence features that overlap the range
 +   * from-to (inclusive), optionally restricted to one or more specified feature
 +   * types
 +   * 
 +   * @param from
 +   * @param to
 +   * @param types
 +   * @return
 +   */
 +  List<SequenceFeature> findFeatures(int from, int to, String... types);
++  
++  /**
+    * 
+    * @return BitSet corresponding to index [0,length) where Comparison.isGap()
+    *         returns true.
+    */
+   BitSet getInsertionsAsBits();
  }
@@@ -26,7 -26,6 +26,7 @@@ import jalview.datamodel.AlignmentI
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
 +import jalview.datamodel.features.SequenceFeatures;
  import jalview.io.gff.SequenceOntologyFactory;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.FeatureColour;
@@@ -191,7 -190,22 +191,22 @@@ public class EnsemblGene extends Ensemb
            geneIds.add(geneId);
          }
        }
+       else if (isProteinIdentifier(acc))
+       {
+         String tscriptId = new EnsemblLookup(getDomain()).getParent(acc);
+         if (tscriptId != null)
+         {
+           String geneId = new EnsemblLookup(getDomain())
+                   .getParent(tscriptId);
  
+           if (geneId != null && !geneIds.contains(geneId))
+           {
+             geneIds.add(geneId);
+           }
+         }
+         // NOTE - acc is lost if it resembles an ENS.+ ID but isn't actually
+         // resolving to one... e.g. ENSMICP00000009241
+       }
        /*
         * if given a gene or other external name, lookup and fetch 
         * the corresponding gene for all model organisms 
     */
    protected void clearGeneFeatures(SequenceI gene)
    {
 -    SequenceFeature[] sfs = gene.getSequenceFeatures();
 -    if (sfs != null)
 +    /*
 +     * Note we include NMD_transcript_variant here because it behaves like 
 +     * 'transcript' in Ensembl, although strictly speaking it is not 
 +     * (it is a sub-type of sequence_variant)    
 +     */
 +    String[] soTerms = new String[] {
 +        SequenceOntologyI.NMD_TRANSCRIPT_VARIANT,
 +        SequenceOntologyI.TRANSCRIPT, SequenceOntologyI.EXON,
 +        SequenceOntologyI.CDS };
 +    List<SequenceFeature> sfs = gene.getFeatures().getFeaturesByOntology(
 +            soTerms);
 +    for (SequenceFeature sf : sfs)
      {
 -      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
 -      List<SequenceFeature> filtered = new ArrayList<SequenceFeature>();
 -      for (SequenceFeature sf : sfs)
 -      {
 -        String type = sf.getType();
 -        if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON)
 -                && !so.isA(type, SequenceOntologyI.CDS))
 -        {
 -          filtered.add(sf);
 -        }
 -      }
 -      gene.setSequenceFeatures(filtered
 -              .toArray(new SequenceFeature[filtered.size()]));
 +      gene.deleteFeature(sf);
      }
    }
  
      {
        splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
      }
 +    SequenceFeatures.sortFeatures(splices, true);
  
      int transcriptLength = 0;
      final char[] geneChars = gene.getSequence();
      mapTo.add(new int[] { 1, transcriptLength });
      MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
      EnsemblCdna cdna = new EnsemblCdna(getDomain());
 -    cdna.transferFeatures(gene.getSequenceFeatures(),
 +    cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
              transcript.getDatasetSequence(), mapping, parentId);
  
      /*
      List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
  
      String parentIdentifier = GENE_PREFIX + accId;
 -    SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
 +    // todo optimise here by transcript type!
 +    List<SequenceFeature> sfs = geneSequence.getFeatures()
 +            .getPositionalFeatures();
  
 -    if (sfs != null)
 +    for (SequenceFeature sf : sfs)
      {
 -      for (SequenceFeature sf : sfs)
 +      if (isTranscript(sf.getType()))
        {
 -        if (isTranscript(sf.getType()))
 +        String parent = (String) sf.getValue(PARENT);
 +        if (parentIdentifier.equals(parent))
          {
 -          String parent = (String) sf.getValue(PARENT);
 -          if (parentIdentifier.equals(parent))
 -          {
 -            transcriptFeatures.add(sf);
 -          }
 +          transcriptFeatures.add(sf);
          }
        }
      }
@@@ -57,6 -57,7 +57,7 @@@ import java.awt.event.ActionEvent
  import java.awt.event.ActionListener;
  import java.util.ArrayList;
  import java.util.Arrays;
+ import java.util.BitSet;
  import java.util.Collection;
  import java.util.Collections;
  import java.util.Hashtable;
@@@ -1444,24 -1445,59 +1445,59 @@@ public class PopupMenu extends JPopupMe
  
    protected void hideInsertions_actionPerformed(ActionEvent actionEvent)
    {
-     if (sequence != null)
+     HiddenColumns hidden = new HiddenColumns();
+     BitSet inserts = new BitSet(), mask = new BitSet();
+     // set mask to preserve existing hidden columns outside selected group
+     if (ap.av.hasHiddenColumns())
+     {
+       ap.av.getAlignment().getHiddenColumns().markHiddenRegions(mask);
+     }
+     boolean markedPopup = false;
+     // mark inserts in current selection
+     if (ap.av.getSelectionGroup() != null)
      {
-       /* ColumnSelection cs = ap.av.getColumnSelection();
-        if (cs == null)
-        {
-          cs = new ColumnSelection();
-        }
-        cs.hideInsertionsFor(sequence);
-        ap.av.setColumnSelection(cs);*/
+       // mark just the columns in the selection group to be hidden
+       inserts.set(ap.av.getSelectionGroup().getStartRes(), ap.av
+               .getSelectionGroup().getEndRes() + 1);
+       // and clear that part of the mask
+       mask.andNot(inserts);
  
-       HiddenColumns hidden = ap.av.getAlignment().getHiddenColumns();
-       if (hidden == null)
+       // now clear columns without gaps
+       for (SequenceI sq : ap.av.getSelectionGroup().getSequences())
        {
-         hidden = new HiddenColumns();
+         if (sq == sequence)
+         {
+           markedPopup = true;
+         }
+         inserts.and(sq.getInsertionsAsBits());
        }
-       hidden.hideInsertionsFor(sequence);
-       ap.av.getAlignment().setHiddenColumns(hidden);
      }
+     else
+     {
+       // initially, mark all columns to be hidden
+       inserts.set(0, ap.av.getAlignment().getWidth());
+       // and clear out old hidden regions completely
+       mask.clear();
+     }
+     // now mark for sequence under popup if we haven't already done it
+     if (!markedPopup && sequence != null)
+     {
+       inserts.and(sequence.getInsertionsAsBits());
+     }
+     // finally, preserve hidden regions outside selection
+     inserts.or(mask);
+     // and set hidden columns accordingly
+     hidden.hideMarkedBits(inserts);
+     ap.av.getAlignment().setHiddenColumns(hidden);
      refresh();
    }
  
        if (start <= end)
        {
          seqs.add(sg.getSequenceAt(i).getDatasetSequence());
 -        features.add(new SequenceFeature(null, null, null, start, end, null));
 +        features.add(new SequenceFeature(null, null, start, end, null));
        }
      }
  
@@@ -23,10 -23,10 +23,10 @@@ package jalview.datamodel
  import static org.testng.AssertJUnit.assertEquals;
  import static org.testng.AssertJUnit.assertFalse;
  import static org.testng.AssertJUnit.assertNotNull;
 +import static org.testng.AssertJUnit.assertNotSame;
  import static org.testng.AssertJUnit.assertNull;
  import static org.testng.AssertJUnit.assertSame;
  import static org.testng.AssertJUnit.assertTrue;
 -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
  
  import jalview.datamodel.PDBEntry.Type;
  import jalview.gui.JvOptionPane;
@@@ -35,11 -35,10 +35,12 @@@ import jalview.util.MapList
  import java.io.File;
  import java.util.ArrayList;
  import java.util.Arrays;
+ import java.util.BitSet;
  import java.util.List;
  import java.util.Vector;
  
 +import junit.extensions.PA;
 +
  import org.testng.Assert;
  import org.testng.annotations.BeforeClass;
  import org.testng.annotations.BeforeMethod;
@@@ -76,6 -75,18 +77,18 @@@ public class SequenceTes
      assertEquals("Gap interval 1 end wrong", 4, gapInt.get(0)[1]);
      assertEquals("Gap interval 2 start wrong", 6, gapInt.get(1)[0]);
      assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
+     BitSet gapfield = aseq.getInsertionsAsBits();
+     BitSet expectedgaps = new BitSet();
+     expectedgaps.set(2, 5);
+     expectedgaps.set(6, 9);
+     assertEquals(6, expectedgaps.cardinality());
+     assertEquals("getInsertionsAsBits didn't mark expected number of gaps",
+             6, gapfield.cardinality());
+     assertEquals("getInsertionsAsBits not correct.", expectedgaps, gapfield);
    }
  
    @Test(groups = ("Functional"))
      assertEquals(6, sq.findIndex(6));
      assertEquals(6, sq.findIndex(9));
  
 -    sq = new Sequence("test", "-A--B-C-D-E-F--");
 -    assertEquals(2, sq.findIndex(1));
 -    assertEquals(5, sq.findIndex(2));
 -    assertEquals(7, sq.findIndex(3));
 +    sq = new Sequence("test/8-13", "-A--B-C-D-E-F--");
 +    assertEquals(2, sq.findIndex(8));
 +    assertEquals(5, sq.findIndex(9));
 +    assertEquals(7, sq.findIndex(10));
  
      // before start returns 0
      assertEquals(0, sq.findIndex(0));
    @Test(groups = { "Functional" })
    public void testDeleteChars()
    {
 +    /*
 +     * internal delete
 +     */
      SequenceI sq = new Sequence("test", "ABCDEF");
 +    assertNull(PA.getValue(sq, "datasetSequence"));
      assertEquals(1, sq.getStart());
      assertEquals(6, sq.getEnd());
      sq.deleteChars(2, 3);
      assertEquals("ABDEF", sq.getSequenceAsString());
      assertEquals(1, sq.getStart());
      assertEquals(5, sq.getEnd());
 +    assertNull(PA.getValue(sq, "datasetSequence"));
  
 +    /*
 +     * delete at start
 +     */
      sq = new Sequence("test", "ABCDEF");
      sq.deleteChars(0, 2);
      assertEquals("CDEF", sq.getSequenceAsString());
      assertEquals(3, sq.getStart());
      assertEquals(6, sq.getEnd());
 +    assertNull(PA.getValue(sq, "datasetSequence"));
 +
 +    /*
 +     * delete at end
 +     */
 +    sq = new Sequence("test", "ABCDEF");
 +    sq.deleteChars(4, 6);
 +    assertEquals("ABCD", sq.getSequenceAsString());
 +    assertEquals(1, sq.getStart());
 +    assertEquals(4, sq.getEnd());
 +    assertNull(PA.getValue(sq, "datasetSequence"));
 +  }
 +
 +  @Test(groups = { "Functional" })
 +  public void testDeleteChars_withDbRefsAndFeatures()
 +  {
 +    /*
 +     * internal delete - new dataset sequence created
 +     * gets a copy of any dbrefs
 +     */
 +    SequenceI sq = new Sequence("test", "ABCDEF");
 +    sq.createDatasetSequence();
 +    DBRefEntry dbr1 = new DBRefEntry("Uniprot", "0", "a123");
 +    sq.addDBRef(dbr1);
 +    Object ds = PA.getValue(sq, "datasetSequence");
 +    assertNotNull(ds);
 +    assertEquals(1, sq.getStart());
 +    assertEquals(6, sq.getEnd());
 +    sq.deleteChars(2, 3);
 +    assertEquals("ABDEF", sq.getSequenceAsString());
 +    assertEquals(1, sq.getStart());
 +    assertEquals(5, sq.getEnd());
 +    Object newDs = PA.getValue(sq, "datasetSequence");
 +    assertNotNull(newDs);
 +    assertNotSame(ds, newDs);
 +    assertNotNull(sq.getDBRefs());
 +    assertEquals(1, sq.getDBRefs().length);
 +    assertNotSame(dbr1, sq.getDBRefs()[0]);
 +    assertEquals(dbr1, sq.getDBRefs()[0]);
 +
 +    /*
 +     * internal delete with sequence features
 +     * (failure case for JAL-2541)
 +     */
 +    sq = new Sequence("test", "ABCDEF");
 +    sq.createDatasetSequence();
 +    SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
 +            "CathGroup");
 +    sq.addSequenceFeature(sf1);
 +    ds = PA.getValue(sq, "datasetSequence");
 +    assertNotNull(ds);
 +    assertEquals(1, sq.getStart());
 +    assertEquals(6, sq.getEnd());
 +    sq.deleteChars(2, 4);
 +    assertEquals("ABEF", sq.getSequenceAsString());
 +    assertEquals(1, sq.getStart());
 +    assertEquals(4, sq.getEnd());
 +    newDs = PA.getValue(sq, "datasetSequence");
 +    assertNotNull(newDs);
 +    assertNotSame(ds, newDs);
 +    List<SequenceFeature> sfs = sq.getSequenceFeatures();
 +    assertEquals(1, sfs.size());
 +    assertNotSame(sf1, sfs.get(0));
 +    assertEquals(sf1, sfs.get(0));
 +
 +    /*
 +     * delete at start - no new dataset sequence created
 +     * any sequence features remain as before
 +     */
 +    sq = new Sequence("test", "ABCDEF");
 +    sq.createDatasetSequence();
 +    ds = PA.getValue(sq, "datasetSequence");
 +    sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f, "CathGroup");
 +    sq.addSequenceFeature(sf1);
 +    sq.deleteChars(0, 2);
 +    assertEquals("CDEF", sq.getSequenceAsString());
 +    assertEquals(3, sq.getStart());
 +    assertEquals(6, sq.getEnd());
 +    assertSame(ds, PA.getValue(sq, "datasetSequence"));
 +    sfs = sq.getSequenceFeatures();
 +    assertNotNull(sfs);
 +    assertEquals(1, sfs.size());
 +    assertSame(sf1, sfs.get(0));
 +
 +    /*
 +     * delete at end - no new dataset sequence created
 +     * any dbrefs remain as before
 +     */
 +    sq = new Sequence("test", "ABCDEF");
 +    sq.createDatasetSequence();
 +    ds = PA.getValue(sq, "datasetSequence");
 +    dbr1 = new DBRefEntry("Uniprot", "0", "a123");
 +    sq.addDBRef(dbr1);
 +    sq.deleteChars(4, 6);
 +    assertEquals("ABCD", sq.getSequenceAsString());
 +    assertEquals(1, sq.getStart());
 +    assertEquals(4, sq.getEnd());
 +    assertSame(ds, PA.getValue(sq, "datasetSequence"));
 +    assertNotNull(sq.getDBRefs());
 +    assertEquals(1, sq.getDBRefs().length);
 +    assertSame(dbr1, sq.getDBRefs()[0]);
    }
  
    @Test(groups = { "Functional" })
      SequenceI sq = new Sequence("test", "GATCAT");
      sq.createDatasetSequence();
  
 -    assertNull(sq.getSequenceFeatures());
 +    assertTrue(sq.getSequenceFeatures().isEmpty());
  
      /*
       * SequenceFeature on sequence
       */
 -    SequenceFeature sf = new SequenceFeature();
 +    SequenceFeature sf = new SequenceFeature("Cath", "desc", 2, 4, 2f, null);
      sq.addSequenceFeature(sf);
 -    SequenceFeature[] sfs = sq.getSequenceFeatures();
 -    assertEquals(1, sfs.length);
 -    assertSame(sf, sfs[0]);
 +    List<SequenceFeature> sfs = sq.getSequenceFeatures();
 +    assertEquals(1, sfs.size());
 +    assertSame(sf, sfs.get(0));
  
      /*
       * SequenceFeature on sequence and dataset sequence; returns that on
       * Note JAL-2046: spurious: we have no use case for this at the moment.
       * This test also buggy - as sf2.equals(sf), no new feature is added
       */
 -    SequenceFeature sf2 = new SequenceFeature();
 +    SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
 +            null);
      sq.getDatasetSequence().addSequenceFeature(sf2);
      sfs = sq.getSequenceFeatures();
 -    assertEquals(1, sfs.length);
 -    assertSame(sf, sfs[0]);
 +    assertEquals(1, sfs.size());
 +    assertSame(sf, sfs.get(0));
  
      /*
       * SequenceFeature on dataset sequence only
       * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
       */
      sq.setSequenceFeatures(null);
 -    assertNull(sq.getDatasetSequence().getSequenceFeatures());
 +    assertTrue(sq.getDatasetSequence().getSequenceFeatures().isEmpty());
  
      /*
       * Corrupt case - no SequenceFeature, dataset's dataset is the original
        assertTrue(e.getMessage().toLowerCase()
                .contains("implementation error"));
      }
 -    assertNull(sq.getSequenceFeatures());
 +    assertTrue(sq.getSequenceFeatures().isEmpty());
    }
  
    /**
    public void testCreateDatasetSequence()
    {
      SequenceI sq = new Sequence("my", "ASDASD");
 +    sq.addSequenceFeature(new SequenceFeature("type", "desc", 1, 10, 1f,
 +            "group"));
 +    sq.addDBRef(new DBRefEntry("source", "version", "accession"));
      assertNull(sq.getDatasetSequence());
 +    assertNotNull(PA.getValue(sq, "sequenceFeatureStore"));
 +    assertNotNull(PA.getValue(sq, "dbrefs"));
 +
      SequenceI rds = sq.createDatasetSequence();
      assertNotNull(rds);
      assertNull(rds.getDatasetSequence());
 -    assertEquals(sq.getDatasetSequence(), rds);
 +    assertSame(sq.getDatasetSequence(), rds);
 +
 +    // sequence features and dbrefs transferred to dataset sequence
 +    assertNull(PA.getValue(sq, "sequenceFeatureStore"));
 +    assertNull(PA.getValue(sq, "dbrefs"));
 +    assertNotNull(PA.getValue(rds, "sequenceFeatureStore"));
 +    assertNotNull(PA.getValue(rds, "dbrefs"));
    }
  
    /**
      assertEquals("CD", derived.getSequenceAsString());
      assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
  
 -    assertNull(sq.sequenceFeatures);
 -    assertNull(derived.sequenceFeatures);
      // derived sequence should access dataset sequence features
      assertNotNull(sq.getSequenceFeatures());
 -    assertArrayEquals(sq.getSequenceFeatures(),
 -            derived.getSequenceFeatures());
 +    assertEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures());
  
      /*
       *  verify we have primary db refs *just* for PDB IDs with associated
      assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
  
      // copy has a copy of the sequence feature:
 -    SequenceFeature[] sfs = copy.getSequenceFeatures();
 -    assertEquals(1, sfs.length);
 +    List<SequenceFeature> sfs = copy.getSequenceFeatures();
 +    assertEquals(1, sfs.size());
      if (seq1.getDatasetSequence() != null
              && copy.getDatasetSequence() == seq1.getDatasetSequence())
      {
 -      assertTrue(sfs[0] == seq1.getSequenceFeatures()[0]);
 +      assertSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
      }
      else
      {
 -      assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
 +      assertNotSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
      }
 -    assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0]));
 +    assertEquals(sfs.get(0), seq1.getSequenceFeatures().get(0));
  
      // copy has a copy of the PDB entry
      Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
      assertEquals(' ', sq.getCharAt(-1));
    }
  
 +  @Test(groups = { "Functional" })
 +  public void testAddSequenceFeatures()
 +  {
 +    SequenceI sq = new Sequence("", "abcde");
 +    // type may not be null
 +    assertFalse(sq.addSequenceFeature(new SequenceFeature(null, "desc", 4,
 +            8, 0f, null)));
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
 +            8, 0f, null)));
 +    // can't add a duplicate feature
 +    assertFalse(sq.addSequenceFeature(new SequenceFeature("Cath", "desc",
 +            4, 8, 0f, null)));
 +    // can add a different feature
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Scop", "desc", 4,
 +            8, 0f, null))); // different type
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath",
 +            "description", 4, 8, 0f, null)));// different description
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 3,
 +            8, 0f, null))); // different start position
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
 +            9, 0f, null))); // different end position
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
 +            8, 1f, null))); // different score
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
 +            8, Float.NaN, null))); // score NaN
 +    assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
 +            8, 0f, "Metal"))); // different group
 +    assertEquals(8, sq.getFeatures().getAllFeatures().size());
 +  }
 +
    /**
     * Tests for adding (or updating) dbrefs
     * 
      seq2.createDatasetSequence();
      seq.setDatasetSequence(seq2);
    }
 +
 +  @Test
 +  public void testFindPositions()
 +  {
 +    SequenceI sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
 +
 +    Range range = sq.findPositions(1, 4); // BC
 +    assertEquals(new Range(9, 10), range);
 +
 +    range = sq.findPositions(2, 4); // C
 +    assertEquals(new Range(10, 10), range);
 +
 +    assertNull(sq.findPositions(3, 4)); // all gaps
 +
 +    range = sq.findPositions(2, 6); // CDE
 +    assertEquals(new Range(10, 12), range);
 +
 +    range = sq.findPositions(3, 7); // DE
 +    assertEquals(new Range(11, 12), range);
 +  }
  }