src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.BitSet;
  35 import java.util.Collections;
  36 import java.util.Enumeration;
  37 import java.util.List;
  38 import java.util.ListIterator;
  39 import java.util.Vector;
  40
  41 import com.stevesoft.pat.Regex;
  42
  43 import fr.orsay.lri.varna.models.rna.RNA;
  44
  45 /**
  46  *
  47  * Implements the SequenceI interface for a char[] based sequence object
  48  */
  49 public class Sequence extends ASequence implements SequenceI
  50 {
  51   private static final Regex limitrx = new Regex(
  52           "[/][0-9]{1,}[-][0-9]{1,}$");
  53
  54   private static final Regex endrx = new Regex("[0-9]{1,}$");
  55
  56   SequenceI datasetSequence;
  57
  58   String name;
  59
  60   private char[] sequence;
  61
  62   String description;
  63
  64   int start;
  65
  66   int end;
  67
  68   Vector<PDBEntry> pdbIds;
  69
  70   String vamsasId;
  71
  72   DBRefEntry[] dbrefs;
  73
  74   RNA rna;
  75
  76   /**
  77    * This annotation is displayed below the alignment but the positions are tied
  78    * to the residues of this sequence
  79    *
  80    * TODO: change to List<>
  81    */
  82   Vector<AlignmentAnnotation> annotation;
  83
  84   /**
  85    * The index of the sequence in a MSA
  86    */
  87   int index = -1;
  88
  89   private SequenceFeatures sequenceFeatureStore;
  90
  91   /*
  92    * A cursor holding the approximate current view position to the sequence,
  93    * as determined by findIndex or findPosition or findPositions.
  94    * Using a cursor as a hint allows these methods to be more performant for
  95    * large sequences.
  96    */
  97   private SequenceCursor cursor;
  98
  99   /*
 100    * A number that should be incremented whenever the sequence is edited.
 101    * If the value matches the cursor token, then we can trust the cursor,
 102    * if not then it should be recomputed.
 103    */
 104   private int changeCount;
 105
 106   /**
 107    * Creates a new Sequence object.
 108    *
 109    * @param name
 110    *          display name string
 111    * @param sequence
 112    *          string to form a possibly gapped sequence out of
 113    * @param start
 114    *          first position of non-gap residue in the sequence
 115    * @param end
 116    *          last position of ungapped residues (nearly always only used for
 117    *          display purposes)
 118    */
 119   public Sequence(String name, String sequence, int start, int end)
 120   {
 121     this();
 122     initSeqAndName(name, sequence.toCharArray(), start, end);
 123   }
 124
 125   public Sequence(String name, char[] sequence, int start, int end)
 126   {
 127     this();
 128     initSeqAndName(name, sequence, start, end);
 129   }
 130
 131   /**
 132    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 133    * start and end are updated values from name2 if it ends with /start-end
 134    *
 135    * @param name2
 136    * @param sequence2
 137    * @param start2
 138    * @param end2
 139    */
 140   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 141           int end2)
 142   {
 143     this.name = name2;
 144     this.sequence = sequence2;
 145     this.start = start2;
 146     this.end = end2;
 147     parseId();
 148     checkValidRange();
 149   }
 150
 151   void parseId()
 152   {
 153     if (name == null)
 154     {
 155       System.err
 156               .println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 157       name = "";
 158     }
 159     // Does sequence have the /start-end signature?
 160     if (limitrx.search(name))
 161     {
 162       name = limitrx.left();
 163       endrx.search(limitrx.stringMatched());
 164       setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
 165               endrx.matchedFrom() - 1)));
 166       setEnd(Integer.parseInt(endrx.stringMatched()));
 167     }
 168   }
 169
 170   void checkValidRange()
 171   {
 172     // Note: JAL-774 :
 173     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 174     {
 175       int endRes = 0;
 176       for (int j = 0; j < sequence.length; j++)
 177       {
 178         if (!jalview.util.Comparison.isGap(sequence[j]))
 179         {
 180           endRes++;
 181         }
 182       }
 183       if (endRes > 0)
 184       {
 185         endRes += start - 1;
 186       }
 187
 188       if (end < endRes)
 189       {
 190         end = endRes;
 191       }
 192     }
 193
 194   }
 195
 196   /**
 197    * default constructor
 198    */
 199   private Sequence()
 200   {
 201     sequenceFeatureStore = new SequenceFeatures();
 202   }
 203
 204   /**
 205    * Creates a new Sequence object.
 206    *
 207    * @param name
 208    *          DOCUMENT ME!
 209    * @param sequence
 210    *          DOCUMENT ME!
 211    */
 212   public Sequence(String name, String sequence)
 213   {
 214     this(name, sequence, 1, -1);
 215   }
 216
 217   /**
 218    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 219    * any existing dataset sequence reference. If non exists, everything is
 220    * copied.
 221    *
 222    * @param seq
 223    *          if seq is a dataset sequence, behaves like a plain old copy
 224    *          constructor
 225    */
 226   public Sequence(SequenceI seq)
 227   {
 228     this(seq, seq.getAnnotation());
 229   }
 230
 231   /**
 232    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 233    * but inherits any existing dataset sequence reference, and duplicate of any
 234    * annotation that is present in the given annotation array.
 235    *
 236    * @param seq
 237    *          the sequence to be copied
 238    * @param alAnnotation
 239    *          an array of annotation including some associated with seq
 240    */
 241   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 242   {
 243     this();
 244     initSeqFrom(seq, alAnnotation);
 245   }
 246
 247   /**
 248    * does the heavy lifting when cloning a dataset sequence, or coping data from
 249    * dataset to a new derived sequence.
 250    *
 251    * @param seq
 252    *          - source of attributes.
 253    * @param alAnnotation
 254    *          - alignment annotation present on seq that should be copied onto
 255    *          this sequence
 256    */
 257   protected void initSeqFrom(SequenceI seq,
 258           AlignmentAnnotation[] alAnnotation)
 259   {
 260     char[] oseq = seq.getSequence(); // returns a copy of the array
 261     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 262
 263     description = seq.getDescription();
 264     if (seq != datasetSequence)
 265     {
 266       setDatasetSequence(seq.getDatasetSequence());
 267     }
 268
 269     /*
 270      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 271      */
 272     if (datasetSequence == null)
 273     {
 274       if (seq.getDBRefs() != null)
 275       {
 276         DBRefEntry[] dbr = seq.getDBRefs();
 277         for (int i = 0; i < dbr.length; i++)
 278         {
 279           addDBRef(new DBRefEntry(dbr[i]));
 280         }
 281       }
 282
 283       /*
 284        * make copies of any sequence features
 285        */
 286       for (SequenceFeature sf : seq.getSequenceFeatures())
 287       {
 288         addSequenceFeature(new SequenceFeature(sf));
 289       }
 290     }
 291
 292     if (seq.getAnnotation() != null)
 293     {
 294       AlignmentAnnotation[] sqann = seq.getAnnotation();
 295       for (int i = 0; i < sqann.length; i++)
 296       {
 297         if (sqann[i] == null)
 298         {
 299           continue;
 300         }
 301         boolean found = (alAnnotation == null);
 302         if (!found)
 303         {
 304           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 305           {
 306             found = (alAnnotation[apos] == sqann[i]);
 307           }
 308         }
 309         if (found)
 310         {
 311           // only copy the given annotation
 312           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 313           addAlignmentAnnotation(newann);
 314         }
 315       }
 316     }
 317     if (seq.getAllPDBEntries() != null)
 318     {
 319       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 320       for (PDBEntry pdb : ids)
 321       {
 322         this.addPDBId(new PDBEntry(pdb));
 323       }
 324     }
 325   }
 326
 327   @Override
 328   public void setSequenceFeatures(List<SequenceFeature> features)
 329   {
 330     if (datasetSequence != null)
 331     {
 332       datasetSequence.setSequenceFeatures(features);
 333       return;
 334     }
 335     sequenceFeatureStore = new SequenceFeatures(features);
 336   }
 337
 338   @Override
 339   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 340   {
 341     if (sf.getType() == null)
 342     {
 343       System.err.println("SequenceFeature type may not be null: "
 344               + sf.toString());
 345       return false;
 346     }
 347
 348     if (datasetSequence != null)
 349     {
 350       return datasetSequence.addSequenceFeature(sf);
 351     }
 352
 353     return sequenceFeatureStore.add(sf);
 354   }
 355
 356   @Override
 357   public void deleteFeature(SequenceFeature sf)
 358   {
 359     if (datasetSequence != null)
 360     {
 361       datasetSequence.deleteFeature(sf);
 362     }
 363     else
 364     {
 365       sequenceFeatureStore.delete(sf);
 366     }
 367   }
 368
 369   /**
 370    * {@inheritDoc}
 371    *
 372    * @return
 373    */
 374   @Override
 375   public List<SequenceFeature> getSequenceFeatures()
 376   {
 377     if (datasetSequence != null)
 378     {
 379       return datasetSequence.getSequenceFeatures();
 380     }
 381     return sequenceFeatureStore.getAllFeatures();
 382   }
 383
 384   @Override
 385   public SequenceFeaturesI getFeatures()
 386   {
 387     return datasetSequence != null ? datasetSequence.getFeatures()
 388             : sequenceFeatureStore;
 389   }
 390
 391   @Override
 392   public boolean addPDBId(PDBEntry entry)
 393   {
 394     if (pdbIds == null)
 395     {
 396       pdbIds = new Vector<PDBEntry>();
 397       pdbIds.add(entry);
 398       return true;
 399     }
 400
 401     for (PDBEntry pdbe : pdbIds)
 402     {
 403       if (pdbe.updateFrom(entry))
 404       {
 405         return false;
 406       }
 407     }
 408     pdbIds.addElement(entry);
 409     return true;
 410   }
 411
 412   /**
 413    * DOCUMENT ME!
 414    *
 415    * @param id
 416    *          DOCUMENT ME!
 417    */
 418   @Override
 419   public void setPDBId(Vector<PDBEntry> id)
 420   {
 421     pdbIds = id;
 422   }
 423
 424   /**
 425    * DOCUMENT ME!
 426    *
 427    * @return DOCUMENT ME!
 428    */
 429   @Override
 430   public Vector<PDBEntry> getAllPDBEntries()
 431   {
 432     return pdbIds == null ? new Vector<PDBEntry>() : pdbIds;
 433   }
 434
 435   /**
 436    * DOCUMENT ME!
 437    *
 438    * @return DOCUMENT ME!
 439    */
 440   @Override
 441   public String getDisplayId(boolean jvsuffix)
 442   {
 443     StringBuffer result = new StringBuffer(name);
 444     if (jvsuffix)
 445     {
 446       result.append("/" + start + "-" + end);
 447     }
 448
 449     return result.toString();
 450   }
 451
 452   /**
 453    * DOCUMENT ME!
 454    *
 455    * @param name
 456    *          DOCUMENT ME!
 457    */
 458   @Override
 459   public void setName(String name)
 460   {
 461     this.name = name;
 462     this.parseId();
 463   }
 464
 465   /**
 466    * DOCUMENT ME!
 467    *
 468    * @return DOCUMENT ME!
 469    */
 470   @Override
 471   public String getName()
 472   {
 473     return this.name;
 474   }
 475
 476   /**
 477    * DOCUMENT ME!
 478    *
 479    * @param start
 480    *          DOCUMENT ME!
 481    */
 482   @Override
 483   public void setStart(int start)
 484   {
 485     this.start = start;
 486   }
 487
 488   /**
 489    * DOCUMENT ME!
 490    *
 491    * @return DOCUMENT ME!
 492    */
 493   @Override
 494   public int getStart()
 495   {
 496     return this.start;
 497   }
 498
 499   /**
 500    * DOCUMENT ME!
 501    *
 502    * @param end
 503    *          DOCUMENT ME!
 504    */
 505   @Override
 506   public void setEnd(int end)
 507   {
 508     this.end = end;
 509   }
 510
 511   /**
 512    * DOCUMENT ME!
 513    *
 514    * @return DOCUMENT ME!
 515    */
 516   @Override
 517   public int getEnd()
 518   {
 519     return this.end;
 520   }
 521
 522   /**
 523    * DOCUMENT ME!
 524    *
 525    * @return DOCUMENT ME!
 526    */
 527   @Override
 528   public int getLength()
 529   {
 530     return this.sequence.length;
 531   }
 532
 533   /**
 534    * DOCUMENT ME!
 535    *
 536    * @param seq
 537    *          DOCUMENT ME!
 538    */
 539   @Override
 540   public void setSequence(String seq)
 541   {
 542     this.sequence = seq.toCharArray();
 543     checkValidRange();
 544     sequenceChanged();
 545   }
 546
 547   @Override
 548   public String getSequenceAsString()
 549   {
 550     return new String(sequence);
 551   }
 552
 553   @Override
 554   public String getSequenceAsString(int start, int end)
 555   {
 556     return new String(getSequence(start, end));
 557   }
 558
 559   @Override
 560   public char[] getSequence()
 561   {
 562     // return sequence;
 563     return sequence == null ? null : Arrays.copyOf(sequence,
 564             sequence.length);
 565   }
 566
 567   /*
 568    * (non-Javadoc)
 569    *
 570    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 571    */
 572   @Override
 573   public char[] getSequence(int start, int end)
 574   {
 575     if (start < 0)
 576     {
 577       start = 0;
 578     }
 579     // JBPNote - left to user to pad the result here (TODO:Decide on this
 580     // policy)
 581     if (start >= sequence.length)
 582     {
 583       return new char[0];
 584     }
 585
 586     if (end >= sequence.length)
 587     {
 588       end = sequence.length;
 589     }
 590
 591     char[] reply = new char[end - start];
 592     System.arraycopy(sequence, start, reply, 0, end - start);
 593
 594     return reply;
 595   }
 596
 597   @Override
 598   public SequenceI getSubSequence(int start, int end)
 599   {
 600     if (start < 0)
 601     {
 602       start = 0;
 603     }
 604     char[] seq = getSequence(start, end);
 605     if (seq.length == 0)
 606     {
 607       return null;
 608     }
 609     int nstart = findPosition(start);
 610     int nend = findPosition(end) - 1;
 611     // JBPNote - this is an incomplete copy.
 612     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 613     nseq.setDescription(description);
 614     if (datasetSequence != null)
 615     {
 616       nseq.setDatasetSequence(datasetSequence);
 617     }
 618     else
 619     {
 620       nseq.setDatasetSequence(this);
 621     }
 622     return nseq;
 623   }
 624
 625   /**
 626    * Returns the character of the aligned sequence at the given position (base
 627    * zero), or space if the position is not within the sequence's bounds
 628    *
 629    * @return
 630    */
 631   @Override
 632   public char getCharAt(int i)
 633   {
 634     if (i >= 0 && i < sequence.length)
 635     {
 636       return sequence[i];
 637     }
 638     else
 639     {
 640       return ' ';
 641     }
 642   }
 643
 644   /**
 645    * DOCUMENT ME!
 646    *
 647    * @param desc
 648    *          DOCUMENT ME!
 649    */
 650   @Override
 651   public void setDescription(String desc)
 652   {
 653     this.description = desc;
 654   }
 655
 656   /**
 657    * DOCUMENT ME!
 658    *
 659    * @return DOCUMENT ME!
 660    */
 661   @Override
 662   public String getDescription()
 663   {
 664     return this.description;
 665   }
 666
 667   /**
 668    * {@inheritDoc}
 669    */
 670   @Override
 671   public int findIndex(int pos)
 672   {
 673     /*
 674      * use a valid, hopefully nearby, cursor if available
 675      */
 676     if (isValidCursor(cursor))
 677     {
 678       return findIndex(pos, cursor);
 679     }
 680
 681     int j = start;
 682     int i = 0;
 683     int startColumn = 0;
 684
 685     /*
 686      * traverse sequence from the start counting gaps; make a note of
 687      * the column of the first residue to save in the cursor
 688      */
 689     while ((i < sequence.length) && (j <= end) && (j <= pos))
 690     {
 691       if (!Comparison.isGap(sequence[i]))
 692       {
 693         if (j == start)
 694         {
 695           startColumn = i;
 696         }
 697         j++;
 698       }
 699       i++;
 700     }
 701
 702     if (j == end && j < pos)
 703     {
 704       return end + 1;
 705     }
 706
 707     updateCursor(pos, i, startColumn);
 708     return i;
 709   }
 710
 711   /**
 712    * Updates the cursor to the latest found residue and column position
 713    *
 714    * @param residuePos
 715    *          (start..)
 716    * @param column
 717    *          (1..)
 718    * @param startColumn
 719    *          column position of the first sequence residue
 720    */
 721   protected void updateCursor(int residuePos, int column, int startColumn)
 722   {
 723     int endColumn = cursor == null ? 0 : cursor.lastColumnPosition;
 724     if (residuePos == this.end)
 725     {
 726       endColumn = column;
 727     }
 728
 729     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 730             endColumn, this.changeCount);
 731   }
 732
 733   /**
 734    * Answers the aligned column position (1..) for the given residue position
 735    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 736    * The hint may be left of, at, or to the right of the required position.
 737    *
 738    * @param pos
 739    * @param curs
 740    * @return
 741    */
 742   protected int findIndex(int pos, SequenceCursor curs)
 743   {
 744     if (!isValidCursor(curs))
 745     {
 746       /*
 747        * wrong or invalidated cursor, compute de novo
 748        */
 749       return findIndex(pos);
 750     }
 751
 752     if (curs.residuePosition == pos)
 753     {
 754       return curs.columnPosition;
 755     }
 756
 757     /*
 758      * move left or right to find pos from hint.position
 759      */
 760     int col = curs.columnPosition - 1; // convert from base 1 to 0-based array
 761                                        // index
 762     int newPos = curs.residuePosition;
 763     int delta = newPos > pos ? -1 : 1;
 764
 765     while (newPos != pos)
 766     {
 767       col += delta; // shift one column left or right
 768       if (col < 0 || col == sequence.length)
 769       {
 770         break;
 771       }
 772       if (!Comparison.isGap(sequence[col]))
 773       {
 774         newPos += delta;
 775       }
 776     }
 777
 778     col++; // convert back to base 1
 779     updateCursor(pos, col, curs.firstColumnPosition);
 780
 781     return col;
 782   }
 783
 784   /**
 785    * {@inheritDoc}
 786    */
 787   @Override
 788   public int findPosition(final int column)
 789   {
 790     /*
 791      * use a valid, hopefully nearby, cursor if available
 792      */
 793     if (isValidCursor(cursor))
 794     {
 795       return findPosition(column + 1, cursor);
 796     }
 797
 798     // TODO recode this more naturally i.e. count residues only
 799     // as they are found, not 'in anticipation'
 800
 801     /*
 802      * traverse the sequence counting gaps; note the column position
 803      * of the first residue, to save in the cursor
 804      */
 805     int firstResidueColumn = 0;
 806     int lastPosFound = 0;
 807     int lastPosFoundColumn = 0;
 808     int seqlen = sequence.length;
 809
 810     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 811     {
 812       lastPosFound = start;
 813       lastPosFoundColumn = 0;
 814     }
 815
 816     int j = 0;
 817     int pos = start;
 818
 819     while (j < column && j < seqlen)
 820     {
 821       if (!Comparison.isGap(sequence[j]))
 822       {
 823         lastPosFound = pos;
 824         lastPosFoundColumn = j;
 825         if (pos == this.start)
 826         {
 827           firstResidueColumn = j;
 828         }
 829         pos++;
 830       }
 831       j++;
 832     }
 833     if (j < seqlen && !Comparison.isGap(sequence[j]))
 834     {
 835       lastPosFound = pos;
 836       lastPosFoundColumn = j;
 837       if (pos == this.start)
 838       {
 839         firstResidueColumn = j;
 840       }
 841     }
 842
 843     /*
 844      * update the cursor to the last residue position found (if any)
 845      * (converting column position to base 1)
 846      */
 847     if (lastPosFound != 0)
 848     {
 849       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 850               firstResidueColumn + 1);
 851     }
 852
 853     return pos;
 854   }
 855
 856   /**
 857    * Answers true if the given cursor is not null, is for this sequence object,
 858    * and has a token value that matches this object's changeCount, else false.
 859    * This allows us to ignore a cursor as 'stale' if the sequence has been
 860    * modified since the cursor was created.
 861    *
 862    * @param curs
 863    * @return
 864    */
 865   protected boolean isValidCursor(SequenceCursor curs)
 866   {
 867     if (curs == null || curs.sequence != this || curs.token != changeCount)
 868     {
 869       return false;
 870     }
 871     /*
 872      * sanity check against range
 873      */
 874     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 875     {
 876       return false;
 877     }
 878     if (curs.residuePosition < start || curs.residuePosition > end)
 879     {
 880       return false;
 881     }
 882     return true;
 883   }
 884
 885   /**
 886    * Answers the sequence position (start..) for the given aligned column
 887    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 888    * may lie left of, at, or to the right of the column position.
 889    *
 890    * @param col
 891    * @param curs
 892    * @return
 893    */
 894   protected int findPosition(final int col, SequenceCursor curs)
 895   {
 896     if (!isValidCursor(curs))
 897     {
 898       /*
 899        * wrong or invalidated cursor, compute de novo
 900        */
 901       return findPosition(col - 1);// ugh back to base 0
 902     }
 903
 904     if (curs.columnPosition == col)
 905     {
 906       cursor = curs; // in case this method becomes public
 907       return curs.residuePosition; // easy case :-)
 908     }
 909
 910     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 911     {
 912       /*
 913        * sequence lies entirely to the left of col
 914        * - return last residue + 1
 915        */
 916       return end + 1;
 917     }
 918
 919     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
 920     {
 921       /*
 922        * sequence lies entirely to the right of col
 923        * - return first residue
 924        */
 925       return start;
 926     }
 927
 928     // todo could choose closest to col out of column,
 929     // firstColumnPosition, lastColumnPosition as a start point
 930
 931     /*
 932      * move left or right to find pos from cursor position
 933      */
 934     int firstResidueColumn = curs.firstColumnPosition;
 935     int column = curs.columnPosition - 1; // to base 0
 936     int newPos = curs.residuePosition;
 937     int delta = curs.columnPosition > col ? -1 : 1;
 938     boolean gapped = false;
 939     int lastFoundPosition = curs.residuePosition;
 940     int lastFoundPositionColumn = curs.columnPosition;
 941
 942     while (column != col - 1)
 943     {
 944       column += delta; // shift one column left or right
 945       if (column < 0 || column == sequence.length)
 946       {
 947         break;
 948       }
 949       gapped = Comparison.isGap(sequence[column]);
 950       if (!gapped)
 951       {
 952         newPos += delta;
 953         lastFoundPosition = newPos;
 954         lastFoundPositionColumn = column + 1;
 955         if (lastFoundPosition == this.start)
 956         {
 957           firstResidueColumn = column + 1;
 958         }
 959       }
 960     }
 961
 962     if (cursor == null || lastFoundPosition != cursor.residuePosition)
 963     {
 964       updateCursor(lastFoundPosition, lastFoundPositionColumn,
 965               firstResidueColumn);
 966     }
 967
 968     /*
 969      * hack to give position to the right if on a gap
 970      * or beyond the length of the sequence (see JAL-2562)
 971      */
 972     if (delta > 0 && (gapped || column >= sequence.length))
 973     {
 974       newPos++;
 975     }
 976
 977     return newPos;
 978   }
 979
 980   /**
 981    * Returns an int array where indices correspond to each residue in the
 982    * sequence and the element value gives its position in the alignment
 983    *
 984    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
 985    *         residues in SequenceI object
 986    */
 987   @Override
 988   public int[] gapMap()
 989   {
 990     String seq = jalview.analysis.AlignSeq.extractGaps(
 991             jalview.util.Comparison.GapChars, new String(sequence));
 992     int[] map = new int[seq.length()];
 993     int j = 0;
 994     int p = 0;
 995
 996     while (j < sequence.length)
 997     {
 998       if (!jalview.util.Comparison.isGap(sequence[j]))
 999       {
1000         map[p++] = j;
1001       }
1002
1003       j++;
1004     }
1005
1006     return map;
1007   }
1008
1009   @Override
1010   public int[] findPositionMap()
1011   {
1012     int map[] = new int[sequence.length];
1013     int j = 0;
1014     int pos = start;
1015     int seqlen = sequence.length;
1016     while ((j < seqlen))
1017     {
1018       map[j] = pos;
1019       if (!jalview.util.Comparison.isGap(sequence[j]))
1020       {
1021         pos++;
1022       }
1023
1024       j++;
1025     }
1026     return map;
1027   }
1028
1029   @Override
1030   public List<int[]> getInsertions()
1031   {
1032     ArrayList<int[]> map = new ArrayList<int[]>();
1033     int lastj = -1, j = 0;
1034     int pos = start;
1035     int seqlen = sequence.length;
1036     while ((j < seqlen))
1037     {
1038       if (jalview.util.Comparison.isGap(sequence[j]))
1039       {
1040         if (lastj == -1)
1041         {
1042           lastj = j;
1043         }
1044       }
1045       else
1046       {
1047         if (lastj != -1)
1048         {
1049           map.add(new int[] { lastj, j - 1 });
1050           lastj = -1;
1051         }
1052       }
1053       j++;
1054     }
1055     if (lastj != -1)
1056     {
1057       map.add(new int[] { lastj, j - 1 });
1058       lastj = -1;
1059     }
1060     return map;
1061   }
1062
1063   @Override
1064   public BitSet getInsertionsAsBits()
1065   {
1066     BitSet map = new BitSet();
1067     int lastj = -1, j = 0;
1068     int pos = start;
1069     int seqlen = sequence.length;
1070     while ((j < seqlen))
1071     {
1072       if (jalview.util.Comparison.isGap(sequence[j]))
1073       {
1074         if (lastj == -1)
1075         {
1076           lastj = j;
1077         }
1078       }
1079       else
1080       {
1081         if (lastj != -1)
1082         {
1083           map.set(lastj, j);
1084           lastj = -1;
1085         }
1086       }
1087       j++;
1088     }
1089     if (lastj != -1)
1090     {
1091       map.set(lastj, j);
1092       lastj = -1;
1093     }
1094     return map;
1095   }
1096
1097   @Override
1098   public void deleteChars(int i, int j)
1099   {
1100     int newstart = start, newend = end;
1101     if (i >= sequence.length || i < 0)
1102     {
1103       return;
1104     }
1105
1106     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1107     boolean createNewDs = false;
1108     // TODO: take a (second look) at the dataset creation validation method for
1109     // the very large sequence case
1110     int eindex = -1, sindex = -1;
1111     boolean ecalc = false, scalc = false;
1112     for (int s = i; s < j; s++)
1113     {
1114       if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
1115       {
1116         if (createNewDs)
1117         {
1118           newend--;
1119         }
1120         else
1121         {
1122           if (!scalc)
1123           {
1124             sindex = findIndex(start) - 1;
1125             scalc = true;
1126           }
1127           if (sindex == s)
1128           {
1129             // delete characters including start of sequence
1130             newstart = findPosition(j);
1131             break; // don't need to search for any more residue characters.
1132           }
1133           else
1134           {
1135             // delete characters after start.
1136             if (!ecalc)
1137             {
1138               eindex = findIndex(end) - 1;
1139               ecalc = true;
1140             }
1141             if (eindex < j)
1142             {
1143               // delete characters at end of sequence
1144               newend = findPosition(i - 1);
1145               break; // don't need to search for any more residue characters.
1146             }
1147             else
1148             {
1149               createNewDs = true;
1150               newend--; // decrease end position by one for the deleted residue
1151               // and search further
1152             }
1153           }
1154         }
1155       }
1156     }
1157     // deletion occured in the middle of the sequence
1158     if (createNewDs && this.datasetSequence != null)
1159     {
1160       // construct a new sequence
1161       Sequence ds = new Sequence(datasetSequence);
1162       // TODO: remove any non-inheritable properties ?
1163       // TODO: create a sequence mapping (since there is a relation here ?)
1164       ds.deleteChars(i, j);
1165       datasetSequence = ds;
1166     }
1167     start = newstart;
1168     end = newend;
1169     sequence = tmp;
1170     sequenceChanged();
1171   }
1172
1173   @Override
1174   public void insertCharAt(int i, int length, char c)
1175   {
1176     char[] tmp = new char[sequence.length + length];
1177
1178     if (i >= sequence.length)
1179     {
1180       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1181       i = sequence.length;
1182     }
1183     else
1184     {
1185       System.arraycopy(sequence, 0, tmp, 0, i);
1186     }
1187
1188     int index = i;
1189     while (length > 0)
1190     {
1191       tmp[index++] = c;
1192       length--;
1193     }
1194
1195     if (i < sequence.length)
1196     {
1197       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1198     }
1199
1200     sequence = tmp;
1201     sequenceChanged();
1202   }
1203
1204   @Override
1205   public void insertCharAt(int i, char c)
1206   {
1207     insertCharAt(i, 1, c);
1208   }
1209
1210   @Override
1211   public String getVamsasId()
1212   {
1213     return vamsasId;
1214   }
1215
1216   @Override
1217   public void setVamsasId(String id)
1218   {
1219     vamsasId = id;
1220   }
1221
1222   @Override
1223   public void setDBRefs(DBRefEntry[] dbref)
1224   {
1225     if (dbrefs == null && datasetSequence != null
1226             && this != datasetSequence)
1227     {
1228       datasetSequence.setDBRefs(dbref);
1229       return;
1230     }
1231     dbrefs = dbref;
1232     if (dbrefs != null)
1233     {
1234       DBRefUtils.ensurePrimaries(this);
1235     }
1236   }
1237
1238   @Override
1239   public DBRefEntry[] getDBRefs()
1240   {
1241     if (dbrefs == null && datasetSequence != null
1242             && this != datasetSequence)
1243     {
1244       return datasetSequence.getDBRefs();
1245     }
1246     return dbrefs;
1247   }
1248
1249   @Override
1250   public void addDBRef(DBRefEntry entry)
1251   {
1252     if (datasetSequence != null)
1253     {
1254       datasetSequence.addDBRef(entry);
1255       return;
1256     }
1257
1258     if (dbrefs == null)
1259     {
1260       dbrefs = new DBRefEntry[0];
1261     }
1262
1263     for (DBRefEntryI dbr : dbrefs)
1264     {
1265       if (dbr.updateFrom(entry))
1266       {
1267         /*
1268          * found a dbref that either matched, or could be
1269          * updated from, the new entry - no need to add it
1270          */
1271         return;
1272       }
1273     }
1274
1275     /*
1276      * extend the array to make room for one more
1277      */
1278     // TODO use an ArrayList instead
1279     int j = dbrefs.length;
1280     DBRefEntry[] temp = new DBRefEntry[j + 1];
1281     System.arraycopy(dbrefs, 0, temp, 0, j);
1282     temp[temp.length - 1] = entry;
1283
1284     dbrefs = temp;
1285
1286     DBRefUtils.ensurePrimaries(this);
1287   }
1288
1289   @Override
1290   public void setDatasetSequence(SequenceI seq)
1291   {
1292     if (seq == this)
1293     {
1294       throw new IllegalArgumentException(
1295               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1296     }
1297     if (seq != null && seq.getDatasetSequence() != null)
1298     {
1299       throw new IllegalArgumentException(
1300               "Implementation error: cascading dataset sequences are not allowed.");
1301     }
1302     datasetSequence = seq;
1303   }
1304
1305   @Override
1306   public SequenceI getDatasetSequence()
1307   {
1308     return datasetSequence;
1309   }
1310
1311   @Override
1312   public AlignmentAnnotation[] getAnnotation()
1313   {
1314     return annotation == null ? null : annotation
1315             .toArray(new AlignmentAnnotation[annotation.size()]);
1316   }
1317
1318   @Override
1319   public boolean hasAnnotation(AlignmentAnnotation ann)
1320   {
1321     return annotation == null ? false : annotation.contains(ann);
1322   }
1323
1324   @Override
1325   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1326   {
1327     if (this.annotation == null)
1328     {
1329       this.annotation = new Vector<AlignmentAnnotation>();
1330     }
1331     if (!this.annotation.contains(annotation))
1332     {
1333       this.annotation.addElement(annotation);
1334     }
1335     annotation.setSequenceRef(this);
1336   }
1337
1338   @Override
1339   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1340   {
1341     if (this.annotation != null)
1342     {
1343       this.annotation.removeElement(annotation);
1344       if (this.annotation.size() == 0)
1345       {
1346         this.annotation = null;
1347       }
1348     }
1349   }
1350
1351   /**
1352    * test if this is a valid candidate for another sequence's dataset sequence.
1353    *
1354    */
1355   private boolean isValidDatasetSequence()
1356   {
1357     if (datasetSequence != null)
1358     {
1359       return false;
1360     }
1361     for (int i = 0; i < sequence.length; i++)
1362     {
1363       if (jalview.util.Comparison.isGap(sequence[i]))
1364       {
1365         return false;
1366       }
1367     }
1368     return true;
1369   }
1370
1371   @Override
1372   public SequenceI deriveSequence()
1373   {
1374     Sequence seq = null;
1375     if (datasetSequence == null)
1376     {
1377       if (isValidDatasetSequence())
1378       {
1379         // Use this as dataset sequence
1380         seq = new Sequence(getName(), "", 1, -1);
1381         seq.setDatasetSequence(this);
1382         seq.initSeqFrom(this, getAnnotation());
1383         return seq;
1384       }
1385       else
1386       {
1387         // Create a new, valid dataset sequence
1388         createDatasetSequence();
1389       }
1390     }
1391     return new Sequence(this);
1392   }
1393
1394   private boolean _isNa;
1395
1396   private int _seqhash = 0;
1397
1398   /**
1399    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1400    * true
1401    */
1402   @Override
1403   public boolean isProtein()
1404   {
1405     if (datasetSequence != null)
1406     {
1407       return datasetSequence.isProtein();
1408     }
1409     if (_seqhash != sequence.hashCode())
1410     {
1411       _seqhash = sequence.hashCode();
1412       _isNa = Comparison.isNucleotide(this);
1413     }
1414     return !_isNa;
1415   };
1416
1417   /*
1418    * (non-Javadoc)
1419    *
1420    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1421    */
1422   @Override
1423   public SequenceI createDatasetSequence()
1424   {
1425     if (datasetSequence == null)
1426     {
1427       Sequence dsseq = new Sequence(getName(), AlignSeq.extractGaps(
1428               jalview.util.Comparison.GapChars, getSequenceAsString()),
1429               getStart(), getEnd());
1430
1431       datasetSequence = dsseq;
1432
1433       dsseq.setDescription(description);
1434       // move features and database references onto dataset sequence
1435       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1436       sequenceFeatureStore = null;
1437       dsseq.dbrefs = dbrefs;
1438       dbrefs = null;
1439       // TODO: search and replace any references to this sequence with
1440       // references to the dataset sequence in Mappings on dbref
1441       dsseq.pdbIds = pdbIds;
1442       pdbIds = null;
1443       datasetSequence.updatePDBIds();
1444       if (annotation != null)
1445       {
1446         // annotation is cloned rather than moved, to preserve what's currently
1447         // on the alignment
1448         for (AlignmentAnnotation aa : annotation)
1449         {
1450           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1451           _aa.sequenceRef = datasetSequence;
1452           _aa.adjustForAlignment(); // uses annotation's own record of
1453                                     // sequence-column mapping
1454           datasetSequence.addAlignmentAnnotation(_aa);
1455         }
1456       }
1457     }
1458     return datasetSequence;
1459   }
1460
1461   /*
1462    * (non-Javadoc)
1463    *
1464    * @see
1465    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1466    * annotations)
1467    */
1468   @Override
1469   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1470   {
1471     if (annotation != null)
1472     {
1473       annotation.removeAllElements();
1474     }
1475     if (annotations != null)
1476     {
1477       for (int i = 0; i < annotations.length; i++)
1478       {
1479         if (annotations[i] != null)
1480         {
1481           addAlignmentAnnotation(annotations[i]);
1482         }
1483       }
1484     }
1485   }
1486
1487   @Override
1488   public AlignmentAnnotation[] getAnnotation(String label)
1489   {
1490     if (annotation == null || annotation.size() == 0)
1491     {
1492       return null;
1493     }
1494
1495     Vector<AlignmentAnnotation> subset = new Vector<AlignmentAnnotation>();
1496     Enumeration<AlignmentAnnotation> e = annotation.elements();
1497     while (e.hasMoreElements())
1498     {
1499       AlignmentAnnotation ann = e.nextElement();
1500       if (ann.label != null && ann.label.equals(label))
1501       {
1502         subset.addElement(ann);
1503       }
1504     }
1505     if (subset.size() == 0)
1506     {
1507       return null;
1508     }
1509     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1510     int i = 0;
1511     e = subset.elements();
1512     while (e.hasMoreElements())
1513     {
1514       anns[i++] = e.nextElement();
1515     }
1516     subset.removeAllElements();
1517     return anns;
1518   }
1519
1520   @Override
1521   public boolean updatePDBIds()
1522   {
1523     if (datasetSequence != null)
1524     {
1525       // TODO: could merge DBRefs
1526       return datasetSequence.updatePDBIds();
1527     }
1528     if (dbrefs == null || dbrefs.length == 0)
1529     {
1530       return false;
1531     }
1532     boolean added = false;
1533     for (DBRefEntry dbr : dbrefs)
1534     {
1535       if (DBRefSource.PDB.equals(dbr.getSource()))
1536       {
1537         /*
1538          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1539          * PDB id is not already present in a 'matching' PDBEntry
1540          * Constructor parses out a chain code if appended to the accession id
1541          * (a fudge used to 'store' the chain code in the DBRef)
1542          */
1543         PDBEntry pdbe = new PDBEntry(dbr);
1544         added |= addPDBId(pdbe);
1545       }
1546     }
1547     return added;
1548   }
1549
1550   @Override
1551   public void transferAnnotation(SequenceI entry, Mapping mp)
1552   {
1553     if (datasetSequence != null)
1554     {
1555       datasetSequence.transferAnnotation(entry, mp);
1556       return;
1557     }
1558     if (entry.getDatasetSequence() != null)
1559     {
1560       transferAnnotation(entry.getDatasetSequence(), mp);
1561       return;
1562     }
1563     // transfer any new features from entry onto sequence
1564     if (entry.getSequenceFeatures() != null)
1565     {
1566
1567       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1568       for (SequenceFeature feature : sfs)
1569       {
1570         SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1571                 : new SequenceFeature[] { new SequenceFeature(feature) };
1572         if (sf != null)
1573         {
1574           for (int sfi = 0; sfi < sf.length; sfi++)
1575           {
1576             addSequenceFeature(sf[sfi]);
1577           }
1578         }
1579       }
1580     }
1581
1582     // transfer PDB entries
1583     if (entry.getAllPDBEntries() != null)
1584     {
1585       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1586       while (e.hasMoreElements())
1587       {
1588         PDBEntry pdb = e.nextElement();
1589         addPDBId(pdb);
1590       }
1591     }
1592     // transfer database references
1593     DBRefEntry[] entryRefs = entry.getDBRefs();
1594     if (entryRefs != null)
1595     {
1596       for (int r = 0; r < entryRefs.length; r++)
1597       {
1598         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1599         if (newref.getMap() != null && mp != null)
1600         {
1601           // remap ref using our local mapping
1602         }
1603         // we also assume all version string setting is done by dbSourceProxy
1604         /*
1605          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1606          * newref.setSource(dbSource); }
1607          */
1608         addDBRef(newref);
1609       }
1610     }
1611   }
1612
1613   /**
1614    * @return The index (zero-based) on this sequence in the MSA. It returns
1615    *         {@code -1} if this information is not available.
1616    */
1617   @Override
1618   public int getIndex()
1619   {
1620     return index;
1621   }
1622
1623   /**
1624    * Defines the position of this sequence in the MSA. Use the value {@code -1}
1625    * if this information is undefined.
1626    *
1627    * @param The
1628    *          position for this sequence. This value is zero-based (zero for
1629    *          this first sequence)
1630    */
1631   @Override
1632   public void setIndex(int value)
1633   {
1634     index = value;
1635   }
1636
1637   @Override
1638   public void setRNA(RNA r)
1639   {
1640     rna = r;
1641   }
1642
1643   @Override
1644   public RNA getRNA()
1645   {
1646     return rna;
1647   }
1648
1649   @Override
1650   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1651           String label)
1652   {
1653     List<AlignmentAnnotation> result = new ArrayList<AlignmentAnnotation>();
1654     if (this.annotation != null)
1655     {
1656       for (AlignmentAnnotation ann : annotation)
1657       {
1658         if (ann.calcId != null && ann.calcId.equals(calcId)
1659                 && ann.label != null && ann.label.equals(label))
1660         {
1661           result.add(ann);
1662         }
1663       }
1664     }
1665     return result;
1666   }
1667
1668   @Override
1669   public String toString()
1670   {
1671     return getDisplayId(false);
1672   }
1673
1674   @Override
1675   public PDBEntry getPDBEntry(String pdbIdStr)
1676   {
1677     if (getDatasetSequence() != null)
1678     {
1679       return getDatasetSequence().getPDBEntry(pdbIdStr);
1680     }
1681     if (pdbIds == null)
1682     {
1683       return null;
1684     }
1685     List<PDBEntry> entries = getAllPDBEntries();
1686     for (PDBEntry entry : entries)
1687     {
1688       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1689       {
1690         return entry;
1691       }
1692     }
1693     return null;
1694   }
1695
1696   @Override
1697   public List<DBRefEntry> getPrimaryDBRefs()
1698   {
1699     if (datasetSequence != null)
1700     {
1701       return datasetSequence.getPrimaryDBRefs();
1702     }
1703     if (dbrefs == null || dbrefs.length == 0)
1704     {
1705       return Collections.emptyList();
1706     }
1707     synchronized (dbrefs)
1708     {
1709       List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
1710       DBRefEntry[] tmp = new DBRefEntry[1];
1711       for (DBRefEntry ref : dbrefs)
1712       {
1713         if (!ref.isPrimaryCandidate())
1714         {
1715           continue;
1716         }
1717         if (ref.hasMap())
1718         {
1719           MapList mp = ref.getMap().getMap();
1720           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1721           {
1722             // map only involves a subsequence, so cannot be primary
1723             continue;
1724           }
1725         }
1726         // whilst it looks like it is a primary ref, we also sanity check type
1727         if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals(
1728                 DBRefUtils.getCanonicalName(ref.getSource())))
1729         {
1730           // PDB dbrefs imply there should be a PDBEntry associated
1731           // TODO: tighten PDB dbrefs
1732           // formally imply Jalview has actually downloaded and
1733           // parsed the pdb file. That means there should be a cached file
1734           // handle on the PDBEntry, and a real mapping between sequence and
1735           // extracted sequence from PDB file
1736           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1737           if (pdbentry != null && pdbentry.getFile() != null)
1738           {
1739             primaries.add(ref);
1740           }
1741           continue;
1742         }
1743         // check standard protein or dna sources
1744         tmp[0] = ref;
1745         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1746         if (res != null && res[0] == tmp[0])
1747         {
1748           primaries.add(ref);
1749           continue;
1750         }
1751       }
1752       return primaries;
1753     }
1754   }
1755
1756   /**
1757    * {@inheritDoc}
1758    */
1759   @Override
1760   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1761           String... types)
1762   {
1763     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1764     int endPos = findPosition(toColumn - 1);
1765     // to trace / debug behaviour:
1766     // System.out
1767     // .println(String
1768     // .format("%s.findFeatures columns [%d-%d] positions [%d-%d] leaves cursor %s",
1769     // getName(), fromColumn, toColumn, startPos,
1770     // endPos, cursor));
1771     List<SequenceFeature> result = new ArrayList<>();
1772     if (datasetSequence != null)
1773     {
1774       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
1775               types);
1776     }
1777     else
1778     {
1779       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
1780     }
1781
1782     /*
1783      * if the start or end column is gapped, startPos or endPos may be to the
1784      * left or right, and we may have included adjacent or enclosing features;
1785      * remove any that are not enclosing, non-contact features
1786      */
1787     if (endPos > this.end || Comparison.isGap(sequence[fromColumn - 1])
1788             || Comparison.isGap(sequence[toColumn - 1]))
1789     {
1790       ListIterator<SequenceFeature> it = result.listIterator();
1791       while (it.hasNext())
1792       {
1793         SequenceFeature sf = it.next();
1794         int featureStartColumn = findIndex(sf.getBegin());
1795         int featureEndColumn = findIndex(sf.getEnd());
1796         boolean noOverlap = featureStartColumn > toColumn
1797                         || featureEndColumn < fromColumn;
1798
1799         /*
1800          * reject an 'enclosing' feature if it is actually a contact feature
1801          */
1802         if (sf.isContactFeature() && featureStartColumn < fromColumn
1803                 && featureEndColumn > toColumn)
1804         {
1805           noOverlap = true;
1806         }
1807         if (noOverlap)
1808         {
1809           it.remove();
1810         }
1811       }
1812     }
1813
1814     return result;
1815   }
1816
1817   /**
1818    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1819    * token that has to match the one presented by the cursor
1820    */
1821   @Override
1822   public void sequenceChanged()
1823   {
1824     changeCount++;
1825   }
1826
1827   /**
1828    * {@inheritDoc}
1829    */
1830   @Override
1831   public int replace(char c1, char c2)
1832   {
1833     if (c1 == c2)
1834     {
1835       return 0;
1836     }
1837     int count = 0;
1838     synchronized (sequence)
1839     {
1840       for (int c = 0; c < sequence.length; c++)
1841       {
1842         if (sequence[c] == c1)
1843         {
1844           sequence[c] = c2;
1845           count++;
1846         }
1847       }
1848     }
1849     if (count > 0)
1850     {
1851       sequenceChanged();
1852     }
1853
1854     return count;
1855   }
1856
1857   @Override
1858   public List<SequenceFeature[]> adjustFeatures(int fromColumn, int toColumn)
1859   {
1860     List<SequenceFeature[]> amended = new ArrayList<>();
1861
1862     if (toColumn < fromColumn)
1863     {
1864       return amended;
1865     }
1866
1867     synchronized (sequenceFeatureStore)
1868     {
1869       /*
1870        * get features that overlap or span the cut region
1871        */
1872       List<SequenceFeature> overlaps = findFeatures(fromColumn, toColumn);
1873       int cutWidth = toColumn - fromColumn + 1;
1874
1875       /*
1876        * get features that strictly follow the cut region,
1877        *  and shift them left by the width of the cut
1878        */
1879       List<SequenceFeature> follow = findFeatures(toColumn + 1,
1880               Integer.MAX_VALUE);
1881       follow.removeAll(overlaps);
1882       for (SequenceFeature sf : follow)
1883       {
1884         SequenceFeature copy = new SequenceFeature(sf, sf.getBegin()
1885                 - cutWidth, sf.getEnd() - cutWidth, sf.getFeatureGroup(),
1886                 sf.getScore());
1887         deleteFeature(sf);
1888         addSequenceFeature(copy);
1889       }
1890
1891       /*
1892        * adjust start-end of overlapping features, and delete if enclosed by
1893        * the cut, or a partially overlapping contact feature
1894        */
1895       for (SequenceFeature sf : overlaps)
1896       {
1897         // TODO recode to compute newBegin, newEnd, isDelete
1898         // then perform the action
1899         int sfBegin = sf.getBegin();
1900         int sfEnd = sf.getEnd();
1901         int startCol = findIndex(sfBegin);
1902         int endCol = findIndex(sfEnd);
1903         if (startCol >= fromColumn && endCol <= toColumn)
1904         {
1905           // within cut region - delete feature
1906           deleteFeature(sf);
1907           amended.add(new SequenceFeature[] { sf, null });
1908           continue;
1909         }
1910         if (startCol < fromColumn && endCol > toColumn)
1911         {
1912           // feature spans cut region - shift end left
1913           SequenceFeature copy = new SequenceFeature(sf, sf.getBegin(),
1914                   sf.getEnd() - cutWidth, sf.getFeatureGroup(),
1915                   sf.getScore());
1916           deleteFeature(sf);
1917           addSequenceFeature(copy);
1918           amended.add(new SequenceFeature[] { sf, copy });
1919           continue;
1920         }
1921         // todo partial overlap - delete if contact feature
1922       }
1923     }
1924
1925     return amended;
1926   }
1927 }