src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.BitSet;
  35 import java.util.Collections;
  36 import java.util.Enumeration;
  37 import java.util.List;
  38 import java.util.ListIterator;
  39 import java.util.Vector;
  40
  41 import fr.orsay.lri.varna.models.rna.RNA;
  42
  43 /**
  44  *
  45  * Implements the SequenceI interface for a char[] based sequence object
  46  */
  47 public class Sequence extends ASequence implements SequenceI
  48 {
  49   SequenceI datasetSequence;
  50
  51   String name;
  52
  53   private char[] sequence;
  54
  55   String description;
  56
  57   int start;
  58
  59   int end;
  60
  61   Vector<PDBEntry> pdbIds;
  62
  63   String vamsasId;
  64
  65   DBRefEntry[] dbrefs;
  66
  67   RNA rna;
  68
  69   /**
  70    * This annotation is displayed below the alignment but the positions are tied
  71    * to the residues of this sequence
  72    *
  73    * TODO: change to List<>
  74    */
  75   Vector<AlignmentAnnotation> annotation;
  76
  77   /**
  78    * The index of the sequence in a MSA
  79    */
  80   int index = -1;
  81
  82   private SequenceFeaturesI sequenceFeatureStore;
  83
  84   /*
  85    * A cursor holding the approximate current view position to the sequence,
  86    * as determined by findIndex or findPosition or findPositions.
  87    * Using a cursor as a hint allows these methods to be more performant for
  88    * large sequences.
  89    */
  90   private SequenceCursor cursor;
  91
  92   /*
  93    * A number that should be incremented whenever the sequence is edited.
  94    * If the value matches the cursor token, then we can trust the cursor,
  95    * if not then it should be recomputed.
  96    */
  97   private int changeCount;
  98
  99   /**
 100    * Creates a new Sequence object.
 101    *
 102    * @param name
 103    *          display name string
 104    * @param sequence
 105    *          string to form a possibly gapped sequence out of
 106    * @param start
 107    *          first position of non-gap residue in the sequence
 108    * @param end
 109    *          last position of ungapped residues (nearly always only used for
 110    *          display purposes)
 111    */
 112   public Sequence(String name, String sequence, int start, int end)
 113   {
 114     this();
 115     initSeqAndName(name, sequence.toCharArray(), start, end);
 116   }
 117
 118   public Sequence(String name, char[] sequence, int start, int end)
 119   {
 120     this();
 121     initSeqAndName(name, sequence, start, end);
 122   }
 123
 124   /**
 125    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 126    * start and end are updated values from name2 if it ends with /start-end
 127    *
 128    * @param name2
 129    * @param sequence2
 130    * @param start2
 131    * @param end2
 132    */
 133   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 134           int end2)
 135   {
 136     this.name = name2;
 137     this.sequence = sequence2;
 138     this.start = start2;
 139     this.end = end2;
 140     parseId();
 141     checkValidRange();
 142   }
 143
 144   /**
 145    * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 146    * start and end respectively and removes the suffix from the name
 147    */
 148   void parseId()
 149   {
 150     if (name == null)
 151     {
 152       System.err.println(
 153               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 154       name = "";
 155     }
 156     int slashPos = name.lastIndexOf('/');
 157     if (slashPos > -1 && slashPos < name.length() - 1)
 158     {
 159       String suffix = name.substring(slashPos + 1);
 160       String[] range = suffix.split("-");
 161       if (range.length == 2)
 162       {
 163         try
 164         {
 165           int from = Integer.valueOf(range[0]);
 166           int to = Integer.valueOf(range[1]);
 167           if (from > 0 && to >= from)
 168           {
 169             name = name.substring(0, slashPos);
 170             setStart(from);
 171             setEnd(to);
 172             checkValidRange();
 173           }
 174         } catch (NumberFormatException e)
 175         {
 176           // leave name unchanged if suffix is invalid
 177         }
 178       }
 179     }
 180   }
 181
 182   /**
 183    * Ensures that 'end' is not before the end of the sequence, that is,
 184    * (end-start+1) is at least as long as the count of ungapped positions. Note
 185    * that end is permitted to be beyond the end of the sequence data.
 186    */
 187   void checkValidRange()
 188   {
 189     // Note: JAL-774 :
 190     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 191     {
 192       int endRes = 0;
 193       for (int j = 0; j < sequence.length; j++)
 194       {
 195         if (!Comparison.isGap(sequence[j]))
 196         {
 197           endRes++;
 198         }
 199       }
 200       if (endRes > 0)
 201       {
 202         endRes += start - 1;
 203       }
 204
 205       if (end < endRes)
 206       {
 207         end = endRes;
 208       }
 209     }
 210
 211   }
 212
 213   /**
 214    * default constructor
 215    */
 216   private Sequence()
 217   {
 218     sequenceFeatureStore = new SequenceFeatures();
 219   }
 220
 221   /**
 222    * Creates a new Sequence object.
 223    *
 224    * @param name
 225    *          DOCUMENT ME!
 226    * @param sequence
 227    *          DOCUMENT ME!
 228    */
 229   public Sequence(String name, String sequence)
 230   {
 231     this(name, sequence, 1, -1);
 232   }
 233
 234   /**
 235    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 236    * any existing dataset sequence reference. If non exists, everything is
 237    * copied.
 238    *
 239    * @param seq
 240    *          if seq is a dataset sequence, behaves like a plain old copy
 241    *          constructor
 242    */
 243   public Sequence(SequenceI seq)
 244   {
 245     this(seq, seq.getAnnotation());
 246   }
 247
 248   /**
 249    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 250    * but inherits any existing dataset sequence reference, and duplicate of any
 251    * annotation that is present in the given annotation array.
 252    *
 253    * @param seq
 254    *          the sequence to be copied
 255    * @param alAnnotation
 256    *          an array of annotation including some associated with seq
 257    */
 258   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 259   {
 260     this();
 261     initSeqFrom(seq, alAnnotation);
 262   }
 263
 264   /**
 265    * does the heavy lifting when cloning a dataset sequence, or coping data from
 266    * dataset to a new derived sequence.
 267    *
 268    * @param seq
 269    *          - source of attributes.
 270    * @param alAnnotation
 271    *          - alignment annotation present on seq that should be copied onto
 272    *          this sequence
 273    */
 274   protected void initSeqFrom(SequenceI seq,
 275           AlignmentAnnotation[] alAnnotation)
 276   {
 277     char[] oseq = seq.getSequence(); // returns a copy of the array
 278     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 279
 280     description = seq.getDescription();
 281     if (seq != datasetSequence)
 282     {
 283       setDatasetSequence(seq.getDatasetSequence());
 284     }
 285
 286     /*
 287      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 288      */
 289     if (datasetSequence == null)
 290     {
 291       if (seq.getDBRefs() != null)
 292       {
 293         DBRefEntry[] dbr = seq.getDBRefs();
 294         for (int i = 0; i < dbr.length; i++)
 295         {
 296           addDBRef(new DBRefEntry(dbr[i]));
 297         }
 298       }
 299
 300       /*
 301        * make copies of any sequence features
 302        */
 303       for (SequenceFeature sf : seq.getSequenceFeatures())
 304       {
 305         addSequenceFeature(new SequenceFeature(sf));
 306       }
 307     }
 308
 309     if (seq.getAnnotation() != null)
 310     {
 311       AlignmentAnnotation[] sqann = seq.getAnnotation();
 312       for (int i = 0; i < sqann.length; i++)
 313       {
 314         if (sqann[i] == null)
 315         {
 316           continue;
 317         }
 318         boolean found = (alAnnotation == null);
 319         if (!found)
 320         {
 321           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 322           {
 323             found = (alAnnotation[apos] == sqann[i]);
 324           }
 325         }
 326         if (found)
 327         {
 328           // only copy the given annotation
 329           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 330           addAlignmentAnnotation(newann);
 331         }
 332       }
 333     }
 334     if (seq.getAllPDBEntries() != null)
 335     {
 336       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 337       for (PDBEntry pdb : ids)
 338       {
 339         this.addPDBId(new PDBEntry(pdb));
 340       }
 341     }
 342   }
 343
 344   @Override
 345   public void setSequenceFeatures(List<SequenceFeature> features)
 346   {
 347     if (datasetSequence != null)
 348     {
 349       datasetSequence.setSequenceFeatures(features);
 350       return;
 351     }
 352     sequenceFeatureStore = new SequenceFeatures(features);
 353   }
 354
 355   @Override
 356   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 357   {
 358     if (sf.getType() == null)
 359     {
 360       System.err.println("SequenceFeature type may not be null: "
 361               + sf.toString());
 362       return false;
 363     }
 364
 365     if (datasetSequence != null)
 366     {
 367       return datasetSequence.addSequenceFeature(sf);
 368     }
 369
 370     return sequenceFeatureStore.add(sf);
 371   }
 372
 373   @Override
 374   public void deleteFeature(SequenceFeature sf)
 375   {
 376     if (datasetSequence != null)
 377     {
 378       datasetSequence.deleteFeature(sf);
 379     }
 380     else
 381     {
 382       sequenceFeatureStore.delete(sf);
 383     }
 384   }
 385
 386   /**
 387    * {@inheritDoc}
 388    *
 389    * @return
 390    */
 391   @Override
 392   public List<SequenceFeature> getSequenceFeatures()
 393   {
 394     if (datasetSequence != null)
 395     {
 396       return datasetSequence.getSequenceFeatures();
 397     }
 398     return sequenceFeatureStore.getAllFeatures();
 399   }
 400
 401   @Override
 402   public SequenceFeaturesI getFeatures()
 403   {
 404     return datasetSequence != null ? datasetSequence.getFeatures()
 405             : sequenceFeatureStore;
 406   }
 407
 408   @Override
 409   public boolean addPDBId(PDBEntry entry)
 410   {
 411     if (pdbIds == null)
 412     {
 413       pdbIds = new Vector<PDBEntry>();
 414       pdbIds.add(entry);
 415       return true;
 416     }
 417
 418     for (PDBEntry pdbe : pdbIds)
 419     {
 420       if (pdbe.updateFrom(entry))
 421       {
 422         return false;
 423       }
 424     }
 425     pdbIds.addElement(entry);
 426     return true;
 427   }
 428
 429   /**
 430    * DOCUMENT ME!
 431    *
 432    * @param id
 433    *          DOCUMENT ME!
 434    */
 435   @Override
 436   public void setPDBId(Vector<PDBEntry> id)
 437   {
 438     pdbIds = id;
 439   }
 440
 441   /**
 442    * DOCUMENT ME!
 443    *
 444    * @return DOCUMENT ME!
 445    */
 446   @Override
 447   public Vector<PDBEntry> getAllPDBEntries()
 448   {
 449     return pdbIds == null ? new Vector<PDBEntry>() : pdbIds;
 450   }
 451
 452   /**
 453    * DOCUMENT ME!
 454    *
 455    * @return DOCUMENT ME!
 456    */
 457   @Override
 458   public String getDisplayId(boolean jvsuffix)
 459   {
 460     StringBuffer result = new StringBuffer(name);
 461     if (jvsuffix)
 462     {
 463       result.append("/" + start + "-" + end);
 464     }
 465
 466     return result.toString();
 467   }
 468
 469   /**
 470    * Sets the sequence name. If the name ends in /start-end, then the start-end
 471    * values are parsed out and set, and the suffix is removed from the name.
 472    *
 473    * @param theName
 474    */
 475   @Override
 476   public void setName(String theName)
 477   {
 478     this.name = theName;
 479     this.parseId();
 480   }
 481
 482   /**
 483    * DOCUMENT ME!
 484    *
 485    * @return DOCUMENT ME!
 486    */
 487   @Override
 488   public String getName()
 489   {
 490     return this.name;
 491   }
 492
 493   /**
 494    * DOCUMENT ME!
 495    *
 496    * @param start
 497    *          DOCUMENT ME!
 498    */
 499   @Override
 500   public void setStart(int start)
 501   {
 502     this.start = start;
 503   }
 504
 505   /**
 506    * DOCUMENT ME!
 507    *
 508    * @return DOCUMENT ME!
 509    */
 510   @Override
 511   public int getStart()
 512   {
 513     return this.start;
 514   }
 515
 516   /**
 517    * DOCUMENT ME!
 518    *
 519    * @param end
 520    *          DOCUMENT ME!
 521    */
 522   @Override
 523   public void setEnd(int end)
 524   {
 525     this.end = end;
 526   }
 527
 528   /**
 529    * DOCUMENT ME!
 530    *
 531    * @return DOCUMENT ME!
 532    */
 533   @Override
 534   public int getEnd()
 535   {
 536     return this.end;
 537   }
 538
 539   /**
 540    * DOCUMENT ME!
 541    *
 542    * @return DOCUMENT ME!
 543    */
 544   @Override
 545   public int getLength()
 546   {
 547     return this.sequence.length;
 548   }
 549
 550   /**
 551    * DOCUMENT ME!
 552    *
 553    * @param seq
 554    *          DOCUMENT ME!
 555    */
 556   @Override
 557   public void setSequence(String seq)
 558   {
 559     this.sequence = seq.toCharArray();
 560     checkValidRange();
 561     sequenceChanged();
 562   }
 563
 564   @Override
 565   public String getSequenceAsString()
 566   {
 567     return new String(sequence);
 568   }
 569
 570   @Override
 571   public String getSequenceAsString(int start, int end)
 572   {
 573     return new String(getSequence(start, end));
 574   }
 575
 576   @Override
 577   public char[] getSequence()
 578   {
 579     // return sequence;
 580     return sequence == null ? null : Arrays.copyOf(sequence,
 581             sequence.length);
 582   }
 583
 584   /*
 585    * (non-Javadoc)
 586    *
 587    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 588    */
 589   @Override
 590   public char[] getSequence(int start, int end)
 591   {
 592     if (start < 0)
 593     {
 594       start = 0;
 595     }
 596     // JBPNote - left to user to pad the result here (TODO:Decide on this
 597     // policy)
 598     if (start >= sequence.length)
 599     {
 600       return new char[0];
 601     }
 602
 603     if (end >= sequence.length)
 604     {
 605       end = sequence.length;
 606     }
 607
 608     char[] reply = new char[end - start];
 609     System.arraycopy(sequence, start, reply, 0, end - start);
 610
 611     return reply;
 612   }
 613
 614   @Override
 615   public SequenceI getSubSequence(int start, int end)
 616   {
 617     if (start < 0)
 618     {
 619       start = 0;
 620     }
 621     char[] seq = getSequence(start, end);
 622     if (seq.length == 0)
 623     {
 624       return null;
 625     }
 626     int nstart = findPosition(start);
 627     int nend = findPosition(end) - 1;
 628     // JBPNote - this is an incomplete copy.
 629     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 630     nseq.setDescription(description);
 631     if (datasetSequence != null)
 632     {
 633       nseq.setDatasetSequence(datasetSequence);
 634     }
 635     else
 636     {
 637       nseq.setDatasetSequence(this);
 638     }
 639     return nseq;
 640   }
 641
 642   /**
 643    * Returns the character of the aligned sequence at the given position (base
 644    * zero), or space if the position is not within the sequence's bounds
 645    *
 646    * @return
 647    */
 648   @Override
 649   public char getCharAt(int i)
 650   {
 651     if (i >= 0 && i < sequence.length)
 652     {
 653       return sequence[i];
 654     }
 655     else
 656     {
 657       return ' ';
 658     }
 659   }
 660
 661   /**
 662    * DOCUMENT ME!
 663    *
 664    * @param desc
 665    *          DOCUMENT ME!
 666    */
 667   @Override
 668   public void setDescription(String desc)
 669   {
 670     this.description = desc;
 671   }
 672
 673   /**
 674    * DOCUMENT ME!
 675    *
 676    * @return DOCUMENT ME!
 677    */
 678   @Override
 679   public String getDescription()
 680   {
 681     return this.description;
 682   }
 683
 684   /**
 685    * {@inheritDoc}
 686    */
 687   @Override
 688   public int findIndex(int pos)
 689   {
 690     /*
 691      * use a valid, hopefully nearby, cursor if available
 692      */
 693     if (isValidCursor(cursor))
 694     {
 695       return findIndex(pos, cursor);
 696     }
 697
 698     int j = start;
 699     int i = 0;
 700     int startColumn = 0;
 701
 702     /*
 703      * traverse sequence from the start counting gaps; make a note of
 704      * the column of the first residue to save in the cursor
 705      */
 706     while ((i < sequence.length) && (j <= end) && (j <= pos))
 707     {
 708       if (!Comparison.isGap(sequence[i]))
 709       {
 710         if (j == start)
 711         {
 712           startColumn = i;
 713         }
 714         j++;
 715       }
 716       i++;
 717     }
 718
 719     if (j == end && j < pos)
 720     {
 721       return end + 1;
 722     }
 723
 724     updateCursor(pos, i, startColumn);
 725     return i;
 726   }
 727
 728   /**
 729    * Updates the cursor to the latest found residue and column position
 730    *
 731    * @param residuePos
 732    *          (start..)
 733    * @param column
 734    *          (1..)
 735    * @param startColumn
 736    *          column position of the first sequence residue
 737    */
 738   protected void updateCursor(int residuePos, int column, int startColumn)
 739   {
 740     /*
 741      * preserve end residue column provided cursor was valid
 742      */
 743     int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
 744
 745     if (residuePos == this.end)
 746     {
 747       endColumn = column;
 748     }
 749
 750     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 751             endColumn, this.changeCount);
 752   }
 753
 754   /**
 755    * Answers the aligned column position (1..) for the given residue position
 756    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 757    * The hint may be left of, at, or to the right of the required position.
 758    *
 759    * @param pos
 760    * @param curs
 761    * @return
 762    */
 763   protected int findIndex(int pos, SequenceCursor curs)
 764   {
 765     if (!isValidCursor(curs))
 766     {
 767       /*
 768        * wrong or invalidated cursor, compute de novo
 769        */
 770       return findIndex(pos);
 771     }
 772
 773     if (curs.residuePosition == pos)
 774     {
 775       return curs.columnPosition;
 776     }
 777
 778     /*
 779      * move left or right to find pos from hint.position
 780      */
 781     int col = curs.columnPosition - 1; // convert from base 1 to base 0
 782     int newPos = curs.residuePosition;
 783     int delta = newPos > pos ? -1 : 1;
 784
 785     while (newPos != pos)
 786     {
 787       col += delta; // shift one column left or right
 788       if (col < 0 || col == sequence.length)
 789       {
 790         break;
 791       }
 792       if (!Comparison.isGap(sequence[col]))
 793       {
 794         newPos += delta;
 795       }
 796     }
 797
 798     col++; // convert back to base 1
 799     updateCursor(pos, col, curs.firstColumnPosition);
 800
 801     return col;
 802   }
 803
 804   /**
 805    * {@inheritDoc}
 806    */
 807   @Override
 808   public int findPosition(final int column)
 809   {
 810     /*
 811      * use a valid, hopefully nearby, cursor if available
 812      */
 813     if (isValidCursor(cursor))
 814     {
 815       return findPosition(column + 1, cursor);
 816     }
 817
 818     // TODO recode this more naturally i.e. count residues only
 819     // as they are found, not 'in anticipation'
 820
 821     /*
 822      * traverse the sequence counting gaps; note the column position
 823      * of the first residue, to save in the cursor
 824      */
 825     int firstResidueColumn = 0;
 826     int lastPosFound = 0;
 827     int lastPosFoundColumn = 0;
 828     int seqlen = sequence.length;
 829
 830     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 831     {
 832       lastPosFound = start;
 833       lastPosFoundColumn = 0;
 834     }
 835
 836     int j = 0;
 837     int pos = start;
 838
 839     while (j < column && j < seqlen)
 840     {
 841       if (!Comparison.isGap(sequence[j]))
 842       {
 843         lastPosFound = pos;
 844         lastPosFoundColumn = j;
 845         if (pos == this.start)
 846         {
 847           firstResidueColumn = j;
 848         }
 849         pos++;
 850       }
 851       j++;
 852     }
 853     if (j < seqlen && !Comparison.isGap(sequence[j]))
 854     {
 855       lastPosFound = pos;
 856       lastPosFoundColumn = j;
 857       if (pos == this.start)
 858       {
 859         firstResidueColumn = j;
 860       }
 861     }
 862
 863     /*
 864      * update the cursor to the last residue position found (if any)
 865      * (converting column position to base 1)
 866      */
 867     if (lastPosFound != 0)
 868     {
 869       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 870               firstResidueColumn + 1);
 871     }
 872
 873     return pos;
 874   }
 875
 876   /**
 877    * Answers true if the given cursor is not null, is for this sequence object,
 878    * and has a token value that matches this object's changeCount, else false.
 879    * This allows us to ignore a cursor as 'stale' if the sequence has been
 880    * modified since the cursor was created.
 881    *
 882    * @param curs
 883    * @return
 884    */
 885   protected boolean isValidCursor(SequenceCursor curs)
 886   {
 887     if (curs == null || curs.sequence != this || curs.token != changeCount)
 888     {
 889       return false;
 890     }
 891     /*
 892      * sanity check against range
 893      */
 894     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 895     {
 896       return false;
 897     }
 898     if (curs.residuePosition < start || curs.residuePosition > end)
 899     {
 900       return false;
 901     }
 902     return true;
 903   }
 904
 905   /**
 906    * Answers the sequence position (start..) for the given aligned column
 907    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 908    * may lie left of, at, or to the right of the column position.
 909    *
 910    * @param col
 911    * @param curs
 912    * @return
 913    */
 914   protected int findPosition(final int col, SequenceCursor curs)
 915   {
 916     if (!isValidCursor(curs))
 917     {
 918       /*
 919        * wrong or invalidated cursor, compute de novo
 920        */
 921       return findPosition(col - 1);// ugh back to base 0
 922     }
 923
 924     if (curs.columnPosition == col)
 925     {
 926       cursor = curs; // in case this method becomes public
 927       return curs.residuePosition; // easy case :-)
 928     }
 929
 930     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 931     {
 932       /*
 933        * sequence lies entirely to the left of col
 934        * - return last residue + 1
 935        */
 936       return end + 1;
 937     }
 938
 939     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
 940     {
 941       /*
 942        * sequence lies entirely to the right of col
 943        * - return first residue
 944        */
 945       return start;
 946     }
 947
 948     // todo could choose closest to col out of column,
 949     // firstColumnPosition, lastColumnPosition as a start point
 950
 951     /*
 952      * move left or right to find pos from cursor position
 953      */
 954     int firstResidueColumn = curs.firstColumnPosition;
 955     int column = curs.columnPosition - 1; // to base 0
 956     int newPos = curs.residuePosition;
 957     int delta = curs.columnPosition > col ? -1 : 1;
 958     boolean gapped = false;
 959     int lastFoundPosition = curs.residuePosition;
 960     int lastFoundPositionColumn = curs.columnPosition;
 961
 962     while (column != col - 1)
 963     {
 964       column += delta; // shift one column left or right
 965       if (column < 0 || column == sequence.length)
 966       {
 967         break;
 968       }
 969       gapped = Comparison.isGap(sequence[column]);
 970       if (!gapped)
 971       {
 972         newPos += delta;
 973         lastFoundPosition = newPos;
 974         lastFoundPositionColumn = column + 1;
 975         if (lastFoundPosition == this.start)
 976         {
 977           firstResidueColumn = column + 1;
 978         }
 979       }
 980     }
 981
 982     if (cursor == null || lastFoundPosition != cursor.residuePosition)
 983     {
 984       updateCursor(lastFoundPosition, lastFoundPositionColumn,
 985               firstResidueColumn);
 986     }
 987
 988     /*
 989      * hack to give position to the right if on a gap
 990      * or beyond the length of the sequence (see JAL-2562)
 991      */
 992     if (delta > 0 && (gapped || column >= sequence.length))
 993     {
 994       newPos++;
 995     }
 996
 997     return newPos;
 998   }
 999
1000   /**
1001    * {@inheritDoc}
1002    */
1003   @Override
1004   public Range findPositions(int fromColumn, int toColumn)
1005   {
1006     if (toColumn < fromColumn || fromColumn < 1)
1007     {
1008       return null;
1009     }
1010
1011     /*
1012      * find the first non-gapped position, if any
1013      */
1014     int firstPosition = 0;
1015     int col = fromColumn - 1;
1016     int length = sequence.length;
1017     while (col < length && col < toColumn)
1018     {
1019       if (!Comparison.isGap(sequence[col]))
1020       {
1021         firstPosition = findPosition(col++);
1022         break;
1023       }
1024       col++;
1025     }
1026
1027     if (firstPosition == 0)
1028     {
1029       return null;
1030     }
1031
1032     /*
1033      * find the last non-gapped position
1034      */
1035     int lastPosition = firstPosition;
1036     while (col < length && col < toColumn)
1037     {
1038       if (!Comparison.isGap(sequence[col++]))
1039       {
1040         lastPosition++;
1041       }
1042     }
1043
1044     return new Range(firstPosition, lastPosition);
1045   }
1046
1047   /**
1048    * Returns an int array where indices correspond to each residue in the
1049    * sequence and the element value gives its position in the alignment
1050    *
1051    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
1052    *         residues in SequenceI object
1053    */
1054   @Override
1055   public int[] gapMap()
1056   {
1057     String seq = jalview.analysis.AlignSeq.extractGaps(
1058             jalview.util.Comparison.GapChars, new String(sequence));
1059     int[] map = new int[seq.length()];
1060     int j = 0;
1061     int p = 0;
1062
1063     while (j < sequence.length)
1064     {
1065       if (!jalview.util.Comparison.isGap(sequence[j]))
1066       {
1067         map[p++] = j;
1068       }
1069
1070       j++;
1071     }
1072
1073     return map;
1074   }
1075
1076   @Override
1077   public int[] findPositionMap()
1078   {
1079     int map[] = new int[sequence.length];
1080     int j = 0;
1081     int pos = start;
1082     int seqlen = sequence.length;
1083     while ((j < seqlen))
1084     {
1085       map[j] = pos;
1086       if (!jalview.util.Comparison.isGap(sequence[j]))
1087       {
1088         pos++;
1089       }
1090
1091       j++;
1092     }
1093     return map;
1094   }
1095
1096   @Override
1097   public List<int[]> getInsertions()
1098   {
1099     ArrayList<int[]> map = new ArrayList<int[]>();
1100     int lastj = -1, j = 0;
1101     int pos = start;
1102     int seqlen = sequence.length;
1103     while ((j < seqlen))
1104     {
1105       if (jalview.util.Comparison.isGap(sequence[j]))
1106       {
1107         if (lastj == -1)
1108         {
1109           lastj = j;
1110         }
1111       }
1112       else
1113       {
1114         if (lastj != -1)
1115         {
1116           map.add(new int[] { lastj, j - 1 });
1117           lastj = -1;
1118         }
1119       }
1120       j++;
1121     }
1122     if (lastj != -1)
1123     {
1124       map.add(new int[] { lastj, j - 1 });
1125       lastj = -1;
1126     }
1127     return map;
1128   }
1129
1130   @Override
1131   public BitSet getInsertionsAsBits()
1132   {
1133     BitSet map = new BitSet();
1134     int lastj = -1, j = 0;
1135     int pos = start;
1136     int seqlen = sequence.length;
1137     while ((j < seqlen))
1138     {
1139       if (jalview.util.Comparison.isGap(sequence[j]))
1140       {
1141         if (lastj == -1)
1142         {
1143           lastj = j;
1144         }
1145       }
1146       else
1147       {
1148         if (lastj != -1)
1149         {
1150           map.set(lastj, j);
1151           lastj = -1;
1152         }
1153       }
1154       j++;
1155     }
1156     if (lastj != -1)
1157     {
1158       map.set(lastj, j);
1159       lastj = -1;
1160     }
1161     return map;
1162   }
1163
1164   @Override
1165   public void deleteChars(final int i, final int j)
1166   {
1167     int newstart = start, newend = end;
1168     if (i >= sequence.length || i < 0)
1169     {
1170       return;
1171     }
1172
1173     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1174     boolean createNewDs = false;
1175     // TODO: take a (second look) at the dataset creation validation method for
1176     // the very large sequence case
1177
1178     int startIndex = findIndex(start) - 1;
1179     int endIndex = findIndex(end) - 1;
1180     int startDeleteColumn = -1; // for dataset sequence deletions
1181     int deleteCount = 0;
1182
1183     for (int s = i; s < j && s < sequence.length; s++)
1184     {
1185       if (Comparison.isGap(sequence[s]))
1186       {
1187         continue;
1188       }
1189       deleteCount++;
1190       if (startDeleteColumn == -1)
1191       {
1192         startDeleteColumn = findPosition(s) - start;
1193       }
1194       if (createNewDs)
1195       {
1196         newend--;
1197       }
1198       else
1199       {
1200         if (startIndex == s)
1201         {
1202           /*
1203            * deleting characters from start of sequence; new start is the
1204            * sequence position of the next column (position to the right
1205            * if the column position is gapped)
1206            */
1207           newstart = findPosition(j);
1208           break;
1209         }
1210         else
1211         {
1212           if (endIndex < j)
1213           {
1214             /*
1215              * deleting characters at end of sequence; new end is the sequence
1216              * position of the column before the deletion; subtract 1 if this is
1217              * gapped since findPosition returns the next sequence position
1218              */
1219             newend = findPosition(i - 1);
1220             if (Comparison.isGap(sequence[i - 1]))
1221             {
1222               newend--;
1223             }
1224             break;
1225           }
1226           else
1227           {
1228             createNewDs = true;
1229             newend--;
1230           }
1231         }
1232       }
1233     }
1234
1235     if (createNewDs && this.datasetSequence != null)
1236     {
1237       /*
1238        * if deletion occured in the middle of the sequence,
1239        * construct a new dataset sequence and delete the residues
1240        * that were deleted from the aligned sequence
1241        */
1242       Sequence ds = new Sequence(datasetSequence);
1243       ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount);
1244       datasetSequence = ds;
1245       // TODO: remove any non-inheritable properties ?
1246       // TODO: create a sequence mapping (since there is a relation here ?)
1247     }
1248     start = newstart;
1249     end = newend;
1250     sequence = tmp;
1251     sequenceChanged();
1252   }
1253
1254   @Override
1255   public void insertCharAt(int i, int length, char c)
1256   {
1257     char[] tmp = new char[sequence.length + length];
1258
1259     if (i >= sequence.length)
1260     {
1261       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1262       i = sequence.length;
1263     }
1264     else
1265     {
1266       System.arraycopy(sequence, 0, tmp, 0, i);
1267     }
1268
1269     int index = i;
1270     while (length > 0)
1271     {
1272       tmp[index++] = c;
1273       length--;
1274     }
1275
1276     if (i < sequence.length)
1277     {
1278       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1279     }
1280
1281     sequence = tmp;
1282     sequenceChanged();
1283   }
1284
1285   @Override
1286   public void insertCharAt(int i, char c)
1287   {
1288     insertCharAt(i, 1, c);
1289   }
1290
1291   @Override
1292   public String getVamsasId()
1293   {
1294     return vamsasId;
1295   }
1296
1297   @Override
1298   public void setVamsasId(String id)
1299   {
1300     vamsasId = id;
1301   }
1302
1303   @Override
1304   public void setDBRefs(DBRefEntry[] dbref)
1305   {
1306     if (dbrefs == null && datasetSequence != null
1307             && this != datasetSequence)
1308     {
1309       datasetSequence.setDBRefs(dbref);
1310       return;
1311     }
1312     dbrefs = dbref;
1313     if (dbrefs != null)
1314     {
1315       DBRefUtils.ensurePrimaries(this);
1316     }
1317   }
1318
1319   @Override
1320   public DBRefEntry[] getDBRefs()
1321   {
1322     if (dbrefs == null && datasetSequence != null
1323             && this != datasetSequence)
1324     {
1325       return datasetSequence.getDBRefs();
1326     }
1327     return dbrefs;
1328   }
1329
1330   @Override
1331   public void addDBRef(DBRefEntry entry)
1332   {
1333     if (datasetSequence != null)
1334     {
1335       datasetSequence.addDBRef(entry);
1336       return;
1337     }
1338
1339     if (dbrefs == null)
1340     {
1341       dbrefs = new DBRefEntry[0];
1342     }
1343
1344     for (DBRefEntryI dbr : dbrefs)
1345     {
1346       if (dbr.updateFrom(entry))
1347       {
1348         /*
1349          * found a dbref that either matched, or could be
1350          * updated from, the new entry - no need to add it
1351          */
1352         return;
1353       }
1354     }
1355
1356     /*
1357      * extend the array to make room for one more
1358      */
1359     // TODO use an ArrayList instead
1360     int j = dbrefs.length;
1361     DBRefEntry[] temp = new DBRefEntry[j + 1];
1362     System.arraycopy(dbrefs, 0, temp, 0, j);
1363     temp[temp.length - 1] = entry;
1364
1365     dbrefs = temp;
1366
1367     DBRefUtils.ensurePrimaries(this);
1368   }
1369
1370   @Override
1371   public void setDatasetSequence(SequenceI seq)
1372   {
1373     if (seq == this)
1374     {
1375       throw new IllegalArgumentException(
1376               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1377     }
1378     if (seq != null && seq.getDatasetSequence() != null)
1379     {
1380       throw new IllegalArgumentException(
1381               "Implementation error: cascading dataset sequences are not allowed.");
1382     }
1383     datasetSequence = seq;
1384   }
1385
1386   @Override
1387   public SequenceI getDatasetSequence()
1388   {
1389     return datasetSequence;
1390   }
1391
1392   @Override
1393   public AlignmentAnnotation[] getAnnotation()
1394   {
1395     return annotation == null ? null
1396             : annotation
1397                     .toArray(new AlignmentAnnotation[annotation.size()]);
1398   }
1399
1400   @Override
1401   public boolean hasAnnotation(AlignmentAnnotation ann)
1402   {
1403     return annotation == null ? false : annotation.contains(ann);
1404   }
1405
1406   @Override
1407   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1408   {
1409     if (this.annotation == null)
1410     {
1411       this.annotation = new Vector<AlignmentAnnotation>();
1412     }
1413     if (!this.annotation.contains(annotation))
1414     {
1415       this.annotation.addElement(annotation);
1416     }
1417     annotation.setSequenceRef(this);
1418   }
1419
1420   @Override
1421   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1422   {
1423     if (this.annotation != null)
1424     {
1425       this.annotation.removeElement(annotation);
1426       if (this.annotation.size() == 0)
1427       {
1428         this.annotation = null;
1429       }
1430     }
1431   }
1432
1433   /**
1434    * test if this is a valid candidate for another sequence's dataset sequence.
1435    *
1436    */
1437   private boolean isValidDatasetSequence()
1438   {
1439     if (datasetSequence != null)
1440     {
1441       return false;
1442     }
1443     for (int i = 0; i < sequence.length; i++)
1444     {
1445       if (jalview.util.Comparison.isGap(sequence[i]))
1446       {
1447         return false;
1448       }
1449     }
1450     return true;
1451   }
1452
1453   @Override
1454   public SequenceI deriveSequence()
1455   {
1456     Sequence seq = null;
1457     if (datasetSequence == null)
1458     {
1459       if (isValidDatasetSequence())
1460       {
1461         // Use this as dataset sequence
1462         seq = new Sequence(getName(), "", 1, -1);
1463         seq.setDatasetSequence(this);
1464         seq.initSeqFrom(this, getAnnotation());
1465         return seq;
1466       }
1467       else
1468       {
1469         // Create a new, valid dataset sequence
1470         createDatasetSequence();
1471       }
1472     }
1473     return new Sequence(this);
1474   }
1475
1476   private boolean _isNa;
1477
1478   private int _seqhash = 0;
1479
1480   /**
1481    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1482    * true
1483    */
1484   @Override
1485   public boolean isProtein()
1486   {
1487     if (datasetSequence != null)
1488     {
1489       return datasetSequence.isProtein();
1490     }
1491     if (_seqhash != sequence.hashCode())
1492     {
1493       _seqhash = sequence.hashCode();
1494       _isNa = Comparison.isNucleotide(this);
1495     }
1496     return !_isNa;
1497   };
1498
1499   /*
1500    * (non-Javadoc)
1501    *
1502    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1503    */
1504   @Override
1505   public SequenceI createDatasetSequence()
1506   {
1507     if (datasetSequence == null)
1508     {
1509       Sequence dsseq = new Sequence(getName(),
1510               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1511                       getSequenceAsString()),
1512               getStart(), getEnd());
1513
1514       datasetSequence = dsseq;
1515
1516       dsseq.setDescription(description);
1517       // move features and database references onto dataset sequence
1518       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1519       sequenceFeatureStore = null;
1520       dsseq.dbrefs = dbrefs;
1521       dbrefs = null;
1522       // TODO: search and replace any references to this sequence with
1523       // references to the dataset sequence in Mappings on dbref
1524       dsseq.pdbIds = pdbIds;
1525       pdbIds = null;
1526       datasetSequence.updatePDBIds();
1527       if (annotation != null)
1528       {
1529         // annotation is cloned rather than moved, to preserve what's currently
1530         // on the alignment
1531         for (AlignmentAnnotation aa : annotation)
1532         {
1533           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1534           _aa.sequenceRef = datasetSequence;
1535           _aa.adjustForAlignment(); // uses annotation's own record of
1536                                     // sequence-column mapping
1537           datasetSequence.addAlignmentAnnotation(_aa);
1538         }
1539       }
1540     }
1541     return datasetSequence;
1542   }
1543
1544   /*
1545    * (non-Javadoc)
1546    *
1547    * @see
1548    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1549    * annotations)
1550    */
1551   @Override
1552   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1553   {
1554     if (annotation != null)
1555     {
1556       annotation.removeAllElements();
1557     }
1558     if (annotations != null)
1559     {
1560       for (int i = 0; i < annotations.length; i++)
1561       {
1562         if (annotations[i] != null)
1563         {
1564           addAlignmentAnnotation(annotations[i]);
1565         }
1566       }
1567     }
1568   }
1569
1570   @Override
1571   public AlignmentAnnotation[] getAnnotation(String label)
1572   {
1573     if (annotation == null || annotation.size() == 0)
1574     {
1575       return null;
1576     }
1577
1578     Vector<AlignmentAnnotation> subset = new Vector<AlignmentAnnotation>();
1579     Enumeration<AlignmentAnnotation> e = annotation.elements();
1580     while (e.hasMoreElements())
1581     {
1582       AlignmentAnnotation ann = e.nextElement();
1583       if (ann.label != null && ann.label.equals(label))
1584       {
1585         subset.addElement(ann);
1586       }
1587     }
1588     if (subset.size() == 0)
1589     {
1590       return null;
1591     }
1592     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1593     int i = 0;
1594     e = subset.elements();
1595     while (e.hasMoreElements())
1596     {
1597       anns[i++] = e.nextElement();
1598     }
1599     subset.removeAllElements();
1600     return anns;
1601   }
1602
1603   @Override
1604   public boolean updatePDBIds()
1605   {
1606     if (datasetSequence != null)
1607     {
1608       // TODO: could merge DBRefs
1609       return datasetSequence.updatePDBIds();
1610     }
1611     if (dbrefs == null || dbrefs.length == 0)
1612     {
1613       return false;
1614     }
1615     boolean added = false;
1616     for (DBRefEntry dbr : dbrefs)
1617     {
1618       if (DBRefSource.PDB.equals(dbr.getSource()))
1619       {
1620         /*
1621          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1622          * PDB id is not already present in a 'matching' PDBEntry
1623          * Constructor parses out a chain code if appended to the accession id
1624          * (a fudge used to 'store' the chain code in the DBRef)
1625          */
1626         PDBEntry pdbe = new PDBEntry(dbr);
1627         added |= addPDBId(pdbe);
1628       }
1629     }
1630     return added;
1631   }
1632
1633   @Override
1634   public void transferAnnotation(SequenceI entry, Mapping mp)
1635   {
1636     if (datasetSequence != null)
1637     {
1638       datasetSequence.transferAnnotation(entry, mp);
1639       return;
1640     }
1641     if (entry.getDatasetSequence() != null)
1642     {
1643       transferAnnotation(entry.getDatasetSequence(), mp);
1644       return;
1645     }
1646     // transfer any new features from entry onto sequence
1647     if (entry.getSequenceFeatures() != null)
1648     {
1649
1650       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1651       for (SequenceFeature feature : sfs)
1652       {
1653        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1654                 : new SequenceFeature[] { new SequenceFeature(feature) };
1655         if (sf != null)
1656         {
1657           for (int sfi = 0; sfi < sf.length; sfi++)
1658           {
1659             addSequenceFeature(sf[sfi]);
1660           }
1661         }
1662       }
1663     }
1664
1665     // transfer PDB entries
1666     if (entry.getAllPDBEntries() != null)
1667     {
1668       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1669       while (e.hasMoreElements())
1670       {
1671         PDBEntry pdb = e.nextElement();
1672         addPDBId(pdb);
1673       }
1674     }
1675     // transfer database references
1676     DBRefEntry[] entryRefs = entry.getDBRefs();
1677     if (entryRefs != null)
1678     {
1679       for (int r = 0; r < entryRefs.length; r++)
1680       {
1681         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1682         if (newref.getMap() != null && mp != null)
1683         {
1684           // remap ref using our local mapping
1685         }
1686         // we also assume all version string setting is done by dbSourceProxy
1687         /*
1688          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1689          * newref.setSource(dbSource); }
1690          */
1691         addDBRef(newref);
1692       }
1693     }
1694   }
1695
1696   /**
1697    * @return The index (zero-based) of this sequence in the MSA. It returns
1698    *         {@code -1} if this information is not available.
1699    */
1700   @Override
1701   public int getIndex()
1702   {
1703     return index;
1704   }
1705
1706   /**
1707    * Defines the (zero-based) position of this sequence in the MSA. Use the
1708    * value {@code -1} if this information is undefined.
1709    *
1710    * @param value
1711    */
1712   @Override
1713   public void setIndex(int value)
1714   {
1715     index = value;
1716   }
1717
1718   @Override
1719   public void setRNA(RNA r)
1720   {
1721     rna = r;
1722   }
1723
1724   @Override
1725   public RNA getRNA()
1726   {
1727     return rna;
1728   }
1729
1730   @Override
1731   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1732           String label)
1733   {
1734     List<AlignmentAnnotation> result = new ArrayList<AlignmentAnnotation>();
1735     if (this.annotation != null)
1736     {
1737       for (AlignmentAnnotation ann : annotation)
1738       {
1739         if (ann.calcId != null && ann.calcId.equals(calcId)
1740                 && ann.label != null && ann.label.equals(label))
1741         {
1742           result.add(ann);
1743         }
1744       }
1745     }
1746     return result;
1747   }
1748
1749   @Override
1750   public String toString()
1751   {
1752     return getDisplayId(false);
1753   }
1754
1755   @Override
1756   public PDBEntry getPDBEntry(String pdbIdStr)
1757   {
1758     if (getDatasetSequence() != null)
1759     {
1760       return getDatasetSequence().getPDBEntry(pdbIdStr);
1761     }
1762     if (pdbIds == null)
1763     {
1764       return null;
1765     }
1766     List<PDBEntry> entries = getAllPDBEntries();
1767     for (PDBEntry entry : entries)
1768     {
1769       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1770       {
1771         return entry;
1772       }
1773     }
1774     return null;
1775   }
1776
1777   @Override
1778   public List<DBRefEntry> getPrimaryDBRefs()
1779   {
1780     if (datasetSequence != null)
1781     {
1782       return datasetSequence.getPrimaryDBRefs();
1783     }
1784     if (dbrefs == null || dbrefs.length == 0)
1785     {
1786       return Collections.emptyList();
1787     }
1788     synchronized (dbrefs)
1789     {
1790       List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
1791       DBRefEntry[] tmp = new DBRefEntry[1];
1792       for (DBRefEntry ref : dbrefs)
1793       {
1794         if (!ref.isPrimaryCandidate())
1795         {
1796           continue;
1797         }
1798         if (ref.hasMap())
1799         {
1800           MapList mp = ref.getMap().getMap();
1801           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1802           {
1803             // map only involves a subsequence, so cannot be primary
1804             continue;
1805           }
1806         }
1807         // whilst it looks like it is a primary ref, we also sanity check type
1808         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1809                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1810         {
1811           // PDB dbrefs imply there should be a PDBEntry associated
1812           // TODO: tighten PDB dbrefs
1813           // formally imply Jalview has actually downloaded and
1814           // parsed the pdb file. That means there should be a cached file
1815           // handle on the PDBEntry, and a real mapping between sequence and
1816           // extracted sequence from PDB file
1817           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1818           if (pdbentry != null && pdbentry.getFile() != null)
1819           {
1820             primaries.add(ref);
1821           }
1822           continue;
1823         }
1824         // check standard protein or dna sources
1825         tmp[0] = ref;
1826         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1827         if (res != null && res[0] == tmp[0])
1828         {
1829           primaries.add(ref);
1830           continue;
1831         }
1832       }
1833       return primaries;
1834     }
1835   }
1836
1837   /**
1838    * {@inheritDoc}
1839    */
1840   @Override
1841   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1842           String... types)
1843   {
1844     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1845     int endPos = fromColumn == toColumn ? startPos
1846             : findPosition(toColumn - 1);
1847
1848     List<SequenceFeature> result = getFeatures().findFeatures(startPos,
1849             endPos, types);
1850     if (datasetSequence != null)
1851     {
1852       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
1853               types);
1854     }
1855     else
1856     {
1857       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
1858     }
1859
1860     /*
1861      * if end column is gapped, endPos may be to the right,
1862      * and we may have included adjacent or enclosing features;
1863      * remove any that are not enclosing, non-contact features
1864      */
1865     boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
1866             && Comparison.isGap(sequence[toColumn - 1]);
1867     if (endPos > this.end || endColumnIsGapped)
1868     {
1869       ListIterator<SequenceFeature> it = result.listIterator();
1870       while (it.hasNext())
1871       {
1872         SequenceFeature sf = it.next();
1873         int sfBegin = sf.getBegin();
1874         int sfEnd = sf.getEnd();
1875         int featureStartColumn = findIndex(sfBegin);
1876         if (featureStartColumn > toColumn)
1877         {
1878           it.remove();
1879         }
1880         else if (featureStartColumn < fromColumn)
1881         {
1882           int featureEndColumn = sfEnd == sfBegin ? featureStartColumn
1883                   : findIndex(sfEnd);
1884           if (featureEndColumn < fromColumn)
1885           {
1886             it.remove();
1887           }
1888           else if (featureEndColumn > toColumn && sf.isContactFeature())
1889           {
1890             /*
1891              * remove an enclosing feature if it is a contact feature
1892              */
1893             it.remove();
1894           }
1895         }
1896       }
1897     }
1898
1899     return result;
1900   }
1901
1902   /**
1903    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1904    * token that has to match the one presented by the cursor
1905    */
1906   @Override
1907   public void sequenceChanged()
1908   {
1909     changeCount++;
1910   }
1911
1912   /**
1913    * {@inheritDoc}
1914    */
1915   @Override
1916   public int replace(char c1, char c2)
1917   {
1918     if (c1 == c2)
1919     {
1920       return 0;
1921     }
1922     int count = 0;
1923     synchronized (sequence)
1924     {
1925       for (int c = 0; c < sequence.length; c++)
1926       {
1927         if (sequence[c] == c1)
1928         {
1929           sequence[c] = c2;
1930           count++;
1931         }
1932       }
1933     }
1934     if (count > 0)
1935     {
1936       sequenceChanged();
1937     }
1938
1939     return count;
1940   }
1941 }