src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.BitSet;
  35 import java.util.Collections;
  36 import java.util.Enumeration;
  37 import java.util.List;
  38 import java.util.ListIterator;
  39 import java.util.Vector;
  40
  41 import fr.orsay.lri.varna.models.rna.RNA;
  42
  43 /**
  44  *
  45  * Implements the SequenceI interface for a char[] based sequence object
  46  */
  47 public class Sequence extends ASequence implements SequenceI
  48 {
  49   SequenceI datasetSequence;
  50
  51   String name;
  52
  53   private char[] sequence;
  54
  55   String description;
  56
  57   int start;
  58
  59   int end;
  60
  61   Vector<PDBEntry> pdbIds;
  62
  63   String vamsasId;
  64
  65   DBRefEntry[] dbrefs;
  66
  67   RNA rna;
  68
  69   /**
  70    * This annotation is displayed below the alignment but the positions are tied
  71    * to the residues of this sequence
  72    *
  73    * TODO: change to List<>
  74    */
  75   Vector<AlignmentAnnotation> annotation;
  76
  77   /**
  78    * The index of the sequence in a MSA
  79    */
  80   int index = -1;
  81
  82   private SequenceFeaturesI sequenceFeatureStore;
  83
  84   /*
  85    * A cursor holding the approximate current view position to the sequence,
  86    * as determined by findIndex or findPosition or findPositions.
  87    * Using a cursor as a hint allows these methods to be more performant for
  88    * large sequences.
  89    */
  90   private SequenceCursor cursor;
  91
  92   /*
  93    * A number that should be incremented whenever the sequence is edited.
  94    * If the value matches the cursor token, then we can trust the cursor,
  95    * if not then it should be recomputed.
  96    */
  97   private int changeCount;
  98
  99   /**
 100    * Creates a new Sequence object.
 101    *
 102    * @param name
 103    *          display name string
 104    * @param sequence
 105    *          string to form a possibly gapped sequence out of
 106    * @param start
 107    *          first position of non-gap residue in the sequence
 108    * @param end
 109    *          last position of ungapped residues (nearly always only used for
 110    *          display purposes)
 111    */
 112   public Sequence(String name, String sequence, int start, int end)
 113   {
 114     this();
 115     initSeqAndName(name, sequence.toCharArray(), start, end);
 116   }
 117
 118   public Sequence(String name, char[] sequence, int start, int end)
 119   {
 120     this();
 121     initSeqAndName(name, sequence, start, end);
 122   }
 123
 124   /**
 125    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 126    * start and end are updated values from name2 if it ends with /start-end
 127    *
 128    * @param name2
 129    * @param sequence2
 130    * @param start2
 131    * @param end2
 132    */
 133   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 134           int end2)
 135   {
 136     this.name = name2;
 137     this.sequence = sequence2;
 138     this.start = start2;
 139     this.end = end2;
 140     parseId();
 141     checkValidRange();
 142   }
 143
 144   /**
 145    * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 146    * start and end respectively and removes the suffix from the name
 147    */
 148   void parseId()
 149   {
 150     if (name == null)
 151     {
 152       System.err.println(
 153               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 154       name = "";
 155     }
 156     int slashPos = name.lastIndexOf('/');
 157     if (slashPos > -1 && slashPos < name.length() - 1)
 158     {
 159       String suffix = name.substring(slashPos + 1);
 160       String[] range = suffix.split("-");
 161       if (range.length == 2)
 162       {
 163         try
 164         {
 165           int from = Integer.valueOf(range[0]);
 166           int to = Integer.valueOf(range[1]);
 167           if (from > 0 && to >= from)
 168           {
 169             name = name.substring(0, slashPos);
 170             setStart(from);
 171             setEnd(to);
 172             checkValidRange();
 173           }
 174         } catch (NumberFormatException e)
 175         {
 176           // leave name unchanged if suffix is invalid
 177         }
 178       }
 179     }
 180   }
 181
 182   /**
 183    * Ensures that 'end' is not before the end of the sequence, that is,
 184    * (end-start+1) is at least as long as the count of ungapped positions. Note
 185    * that end is permitted to be beyond the end of the sequence data.
 186    */
 187   void checkValidRange()
 188   {
 189     // Note: JAL-774 :
 190     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 191     {
 192       int endRes = 0;
 193       for (int j = 0; j < sequence.length; j++)
 194       {
 195         if (!Comparison.isGap(sequence[j]))
 196         {
 197           endRes++;
 198         }
 199       }
 200       if (endRes > 0)
 201       {
 202         endRes += start - 1;
 203       }
 204
 205       if (end < endRes)
 206       {
 207         end = endRes;
 208       }
 209     }
 210
 211   }
 212
 213   /**
 214    * default constructor
 215    */
 216   private Sequence()
 217   {
 218     sequenceFeatureStore = new SequenceFeatures();
 219   }
 220
 221   /**
 222    * Creates a new Sequence object.
 223    *
 224    * @param name
 225    *          DOCUMENT ME!
 226    * @param sequence
 227    *          DOCUMENT ME!
 228    */
 229   public Sequence(String name, String sequence)
 230   {
 231     this(name, sequence, 1, -1);
 232   }
 233
 234   /**
 235    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 236    * any existing dataset sequence reference. If non exists, everything is
 237    * copied.
 238    *
 239    * @param seq
 240    *          if seq is a dataset sequence, behaves like a plain old copy
 241    *          constructor
 242    */
 243   public Sequence(SequenceI seq)
 244   {
 245     this(seq, seq.getAnnotation());
 246   }
 247
 248   /**
 249    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 250    * but inherits any existing dataset sequence reference, and duplicate of any
 251    * annotation that is present in the given annotation array.
 252    *
 253    * @param seq
 254    *          the sequence to be copied
 255    * @param alAnnotation
 256    *          an array of annotation including some associated with seq
 257    */
 258   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 259   {
 260     this();
 261     initSeqFrom(seq, alAnnotation);
 262   }
 263
 264   /**
 265    * does the heavy lifting when cloning a dataset sequence, or coping data from
 266    * dataset to a new derived sequence.
 267    *
 268    * @param seq
 269    *          - source of attributes.
 270    * @param alAnnotation
 271    *          - alignment annotation present on seq that should be copied onto
 272    *          this sequence
 273    */
 274   protected void initSeqFrom(SequenceI seq,
 275           AlignmentAnnotation[] alAnnotation)
 276   {
 277     char[] oseq = seq.getSequence(); // returns a copy of the array
 278     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 279
 280     description = seq.getDescription();
 281     if (seq != datasetSequence)
 282     {
 283       setDatasetSequence(seq.getDatasetSequence());
 284     }
 285
 286     /*
 287      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 288      */
 289     if (datasetSequence == null)
 290     {
 291       if (seq.getDBRefs() != null)
 292       {
 293         DBRefEntry[] dbr = seq.getDBRefs();
 294         for (int i = 0; i < dbr.length; i++)
 295         {
 296           addDBRef(new DBRefEntry(dbr[i]));
 297         }
 298       }
 299
 300       /*
 301        * make copies of any sequence features
 302        */
 303       for (SequenceFeature sf : seq.getSequenceFeatures())
 304       {
 305         addSequenceFeature(new SequenceFeature(sf));
 306       }
 307     }
 308
 309     if (seq.getAnnotation() != null)
 310     {
 311       AlignmentAnnotation[] sqann = seq.getAnnotation();
 312       for (int i = 0; i < sqann.length; i++)
 313       {
 314         if (sqann[i] == null)
 315         {
 316           continue;
 317         }
 318         boolean found = (alAnnotation == null);
 319         if (!found)
 320         {
 321           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 322           {
 323             found = (alAnnotation[apos] == sqann[i]);
 324           }
 325         }
 326         if (found)
 327         {
 328           // only copy the given annotation
 329           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 330           addAlignmentAnnotation(newann);
 331         }
 332       }
 333     }
 334     if (seq.getAllPDBEntries() != null)
 335     {
 336       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 337       for (PDBEntry pdb : ids)
 338       {
 339         this.addPDBId(new PDBEntry(pdb));
 340       }
 341     }
 342   }
 343
 344   @Override
 345   public void setSequenceFeatures(List<SequenceFeature> features)
 346   {
 347     if (datasetSequence != null)
 348     {
 349       datasetSequence.setSequenceFeatures(features);
 350       return;
 351     }
 352     sequenceFeatureStore = new SequenceFeatures(features);
 353   }
 354
 355   @Override
 356   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 357   {
 358     if (sf.getType() == null)
 359     {
 360       System.err.println("SequenceFeature type may not be null: "
 361               + sf.toString());
 362       return false;
 363     }
 364
 365     if (datasetSequence != null)
 366     {
 367       return datasetSequence.addSequenceFeature(sf);
 368     }
 369
 370     return sequenceFeatureStore.add(sf);
 371   }
 372
 373   @Override
 374   public void deleteFeature(SequenceFeature sf)
 375   {
 376     if (datasetSequence != null)
 377     {
 378       datasetSequence.deleteFeature(sf);
 379     }
 380     else
 381     {
 382       sequenceFeatureStore.delete(sf);
 383     }
 384   }
 385
 386   /**
 387    * {@inheritDoc}
 388    *
 389    * @return
 390    */
 391   @Override
 392   public List<SequenceFeature> getSequenceFeatures()
 393   {
 394     if (datasetSequence != null)
 395     {
 396       return datasetSequence.getSequenceFeatures();
 397     }
 398     return sequenceFeatureStore.getAllFeatures();
 399   }
 400
 401   @Override
 402   public SequenceFeaturesI getFeatures()
 403   {
 404     return datasetSequence != null ? datasetSequence.getFeatures()
 405             : sequenceFeatureStore;
 406   }
 407
 408   @Override
 409   public boolean addPDBId(PDBEntry entry)
 410   {
 411     if (pdbIds == null)
 412     {
 413       pdbIds = new Vector<PDBEntry>();
 414       pdbIds.add(entry);
 415       return true;
 416     }
 417
 418     for (PDBEntry pdbe : pdbIds)
 419     {
 420       if (pdbe.updateFrom(entry))
 421       {
 422         return false;
 423       }
 424     }
 425     pdbIds.addElement(entry);
 426     return true;
 427   }
 428
 429   /**
 430    * DOCUMENT ME!
 431    *
 432    * @param id
 433    *          DOCUMENT ME!
 434    */
 435   @Override
 436   public void setPDBId(Vector<PDBEntry> id)
 437   {
 438     pdbIds = id;
 439   }
 440
 441   /**
 442    * DOCUMENT ME!
 443    *
 444    * @return DOCUMENT ME!
 445    */
 446   @Override
 447   public Vector<PDBEntry> getAllPDBEntries()
 448   {
 449     return pdbIds == null ? new Vector<PDBEntry>() : pdbIds;
 450   }
 451
 452   /**
 453    * DOCUMENT ME!
 454    *
 455    * @return DOCUMENT ME!
 456    */
 457   @Override
 458   public String getDisplayId(boolean jvsuffix)
 459   {
 460     StringBuffer result = new StringBuffer(name);
 461     if (jvsuffix)
 462     {
 463       result.append("/" + start + "-" + end);
 464     }
 465
 466     return result.toString();
 467   }
 468
 469   /**
 470    * Sets the sequence name. If the name ends in /start-end, then the start-end
 471    * values are parsed out and set, and the suffix is removed from the name.
 472    *
 473    * @param theName
 474    */
 475   @Override
 476   public void setName(String theName)
 477   {
 478     this.name = theName;
 479     this.parseId();
 480   }
 481
 482   /**
 483    * DOCUMENT ME!
 484    *
 485    * @return DOCUMENT ME!
 486    */
 487   @Override
 488   public String getName()
 489   {
 490     return this.name;
 491   }
 492
 493   /**
 494    * DOCUMENT ME!
 495    *
 496    * @param start
 497    *          DOCUMENT ME!
 498    */
 499   @Override
 500   public void setStart(int start)
 501   {
 502     this.start = start;
 503   }
 504
 505   /**
 506    * DOCUMENT ME!
 507    *
 508    * @return DOCUMENT ME!
 509    */
 510   @Override
 511   public int getStart()
 512   {
 513     return this.start;
 514   }
 515
 516   /**
 517    * DOCUMENT ME!
 518    *
 519    * @param end
 520    *          DOCUMENT ME!
 521    */
 522   @Override
 523   public void setEnd(int end)
 524   {
 525     this.end = end;
 526   }
 527
 528   /**
 529    * DOCUMENT ME!
 530    *
 531    * @return DOCUMENT ME!
 532    */
 533   @Override
 534   public int getEnd()
 535   {
 536     return this.end;
 537   }
 538
 539   /**
 540    * DOCUMENT ME!
 541    *
 542    * @return DOCUMENT ME!
 543    */
 544   @Override
 545   public int getLength()
 546   {
 547     return this.sequence.length;
 548   }
 549
 550   /**
 551    * DOCUMENT ME!
 552    *
 553    * @param seq
 554    *          DOCUMENT ME!
 555    */
 556   @Override
 557   public void setSequence(String seq)
 558   {
 559     this.sequence = seq.toCharArray();
 560     checkValidRange();
 561     sequenceChanged();
 562   }
 563
 564   @Override
 565   public String getSequenceAsString()
 566   {
 567     return new String(sequence);
 568   }
 569
 570   @Override
 571   public String getSequenceAsString(int start, int end)
 572   {
 573     return new String(getSequence(start, end));
 574   }
 575
 576   @Override
 577   public char[] getSequence()
 578   {
 579     // return sequence;
 580     return sequence == null ? null : Arrays.copyOf(sequence,
 581             sequence.length);
 582   }
 583
 584   /*
 585    * (non-Javadoc)
 586    *
 587    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 588    */
 589   @Override
 590   public char[] getSequence(int start, int end)
 591   {
 592     if (start < 0)
 593     {
 594       start = 0;
 595     }
 596     // JBPNote - left to user to pad the result here (TODO:Decide on this
 597     // policy)
 598     if (start >= sequence.length)
 599     {
 600       return new char[0];
 601     }
 602
 603     if (end >= sequence.length)
 604     {
 605       end = sequence.length;
 606     }
 607
 608     char[] reply = new char[end - start];
 609     System.arraycopy(sequence, start, reply, 0, end - start);
 610
 611     return reply;
 612   }
 613
 614   @Override
 615   public SequenceI getSubSequence(int start, int end)
 616   {
 617     if (start < 0)
 618     {
 619       start = 0;
 620     }
 621     char[] seq = getSequence(start, end);
 622     if (seq.length == 0)
 623     {
 624       return null;
 625     }
 626     int nstart = findPosition(start);
 627     int nend = findPosition(end) - 1;
 628     // JBPNote - this is an incomplete copy.
 629     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 630     nseq.setDescription(description);
 631     if (datasetSequence != null)
 632     {
 633       nseq.setDatasetSequence(datasetSequence);
 634     }
 635     else
 636     {
 637       nseq.setDatasetSequence(this);
 638     }
 639     return nseq;
 640   }
 641
 642   /**
 643    * Returns the character of the aligned sequence at the given position (base
 644    * zero), or space if the position is not within the sequence's bounds
 645    *
 646    * @return
 647    */
 648   @Override
 649   public char getCharAt(int i)
 650   {
 651     if (i >= 0 && i < sequence.length)
 652     {
 653       return sequence[i];
 654     }
 655     else
 656     {
 657       return ' ';
 658     }
 659   }
 660
 661   /**
 662    * DOCUMENT ME!
 663    *
 664    * @param desc
 665    *          DOCUMENT ME!
 666    */
 667   @Override
 668   public void setDescription(String desc)
 669   {
 670     this.description = desc;
 671   }
 672
 673   /**
 674    * DOCUMENT ME!
 675    *
 676    * @return DOCUMENT ME!
 677    */
 678   @Override
 679   public String getDescription()
 680   {
 681     return this.description;
 682   }
 683
 684   /**
 685    * {@inheritDoc}
 686    */
 687   @Override
 688   public int findIndex(int pos)
 689   {
 690     /*
 691      * use a valid, hopefully nearby, cursor if available
 692      */
 693     if (isValidCursor(cursor))
 694     {
 695       return findIndex(pos, cursor);
 696     }
 697
 698     int j = start;
 699     int i = 0;
 700     int startColumn = 0;
 701
 702     /*
 703      * traverse sequence from the start counting gaps; make a note of
 704      * the column of the first residue to save in the cursor
 705      */
 706     while ((i < sequence.length) && (j <= end) && (j <= pos))
 707     {
 708       if (!Comparison.isGap(sequence[i]))
 709       {
 710         if (j == start)
 711         {
 712           startColumn = i;
 713         }
 714         j++;
 715       }
 716       i++;
 717     }
 718
 719     if (j == end && j < pos)
 720     {
 721       return end + 1;
 722     }
 723
 724     updateCursor(pos, i, startColumn);
 725     return i;
 726   }
 727
 728   /**
 729    * Updates the cursor to the latest found residue and column position
 730    *
 731    * @param residuePos
 732    *          (start..)
 733    * @param column
 734    *          (1..)
 735    * @param startColumn
 736    *          column position of the first sequence residue
 737    */
 738   protected void updateCursor(int residuePos, int column, int startColumn)
 739   {
 740     /*
 741      * preserve end residue column provided cursor was valid
 742      */
 743     int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
 744
 745     if (residuePos == this.end)
 746     {
 747       endColumn = column;
 748     }
 749
 750     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 751             endColumn, this.changeCount);
 752   }
 753
 754   /**
 755    * Answers the aligned column position (1..) for the given residue position
 756    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 757    * The hint may be left of, at, or to the right of the required position.
 758    *
 759    * @param pos
 760    * @param curs
 761    * @return
 762    */
 763   protected int findIndex(int pos, SequenceCursor curs)
 764   {
 765     if (!isValidCursor(curs))
 766     {
 767       /*
 768        * wrong or invalidated cursor, compute de novo
 769        */
 770       return findIndex(pos);
 771     }
 772
 773     if (curs.residuePosition == pos)
 774     {
 775       return curs.columnPosition;
 776     }
 777
 778     /*
 779      * move left or right to find pos from hint.position
 780      */
 781     int col = curs.columnPosition - 1; // convert from base 1 to base 0
 782     int newPos = curs.residuePosition;
 783     int delta = newPos > pos ? -1 : 1;
 784
 785     while (newPos != pos)
 786     {
 787       col += delta; // shift one column left or right
 788       if (col < 0 || col == sequence.length)
 789       {
 790         break;
 791       }
 792       if (!Comparison.isGap(sequence[col]))
 793       {
 794         newPos += delta;
 795       }
 796     }
 797
 798     col++; // convert back to base 1
 799     updateCursor(pos, col, curs.firstColumnPosition);
 800
 801     return col;
 802   }
 803
 804   /**
 805    * {@inheritDoc}
 806    */
 807   @Override
 808   public int findPosition(final int column)
 809   {
 810     /*
 811      * use a valid, hopefully nearby, cursor if available
 812      */
 813     if (isValidCursor(cursor))
 814     {
 815       return findPosition(column + 1, cursor);
 816     }
 817
 818     // TODO recode this more naturally i.e. count residues only
 819     // as they are found, not 'in anticipation'
 820
 821     /*
 822      * traverse the sequence counting gaps; note the column position
 823      * of the first residue, to save in the cursor
 824      */
 825     int firstResidueColumn = 0;
 826     int lastPosFound = 0;
 827     int lastPosFoundColumn = 0;
 828     int seqlen = sequence.length;
 829
 830     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 831     {
 832       lastPosFound = start;
 833       lastPosFoundColumn = 0;
 834     }
 835
 836     int j = 0;
 837     int pos = start;
 838
 839     while (j < column && j < seqlen)
 840     {
 841       if (!Comparison.isGap(sequence[j]))
 842       {
 843         lastPosFound = pos;
 844         lastPosFoundColumn = j;
 845         if (pos == this.start)
 846         {
 847           firstResidueColumn = j;
 848         }
 849         pos++;
 850       }
 851       j++;
 852     }
 853     if (j < seqlen && !Comparison.isGap(sequence[j]))
 854     {
 855       lastPosFound = pos;
 856       lastPosFoundColumn = j;
 857       if (pos == this.start)
 858       {
 859         firstResidueColumn = j;
 860       }
 861     }
 862
 863     /*
 864      * update the cursor to the last residue position found (if any)
 865      * (converting column position to base 1)
 866      */
 867     if (lastPosFound != 0)
 868     {
 869       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 870               firstResidueColumn + 1);
 871     }
 872
 873     return pos;
 874   }
 875
 876   /**
 877    * Answers true if the given cursor is not null, is for this sequence object,
 878    * and has a token value that matches this object's changeCount, else false.
 879    * This allows us to ignore a cursor as 'stale' if the sequence has been
 880    * modified since the cursor was created.
 881    *
 882    * @param curs
 883    * @return
 884    */
 885   protected boolean isValidCursor(SequenceCursor curs)
 886   {
 887     if (curs == null || curs.sequence != this || curs.token != changeCount)
 888     {
 889       return false;
 890     }
 891     /*
 892      * sanity check against range
 893      */
 894     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 895     {
 896       return false;
 897     }
 898     if (curs.residuePosition < start || curs.residuePosition > end)
 899     {
 900       return false;
 901     }
 902     return true;
 903   }
 904
 905   /**
 906    * Answers the sequence position (start..) for the given aligned column
 907    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 908    * may lie left of, at, or to the right of the column position.
 909    *
 910    * @param col
 911    * @param curs
 912    * @return
 913    */
 914   protected int findPosition(final int col, SequenceCursor curs)
 915   {
 916     if (!isValidCursor(curs))
 917     {
 918       /*
 919        * wrong or invalidated cursor, compute de novo
 920        */
 921       return findPosition(col - 1);// ugh back to base 0
 922     }
 923
 924     if (curs.columnPosition == col)
 925     {
 926       cursor = curs; // in case this method becomes public
 927       return curs.residuePosition; // easy case :-)
 928     }
 929
 930     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 931     {
 932       /*
 933        * sequence lies entirely to the left of col
 934        * - return last residue + 1
 935        */
 936       return end + 1;
 937     }
 938
 939     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
 940     {
 941       /*
 942        * sequence lies entirely to the right of col
 943        * - return first residue
 944        */
 945       return start;
 946     }
 947
 948     // todo could choose closest to col out of column,
 949     // firstColumnPosition, lastColumnPosition as a start point
 950
 951     /*
 952      * move left or right to find pos from cursor position
 953      */
 954     int firstResidueColumn = curs.firstColumnPosition;
 955     int column = curs.columnPosition - 1; // to base 0
 956     int newPos = curs.residuePosition;
 957     int delta = curs.columnPosition > col ? -1 : 1;
 958     boolean gapped = false;
 959     int lastFoundPosition = curs.residuePosition;
 960     int lastFoundPositionColumn = curs.columnPosition;
 961
 962     while (column != col - 1)
 963     {
 964       column += delta; // shift one column left or right
 965       if (column < 0 || column == sequence.length)
 966       {
 967         break;
 968       }
 969       gapped = Comparison.isGap(sequence[column]);
 970       if (!gapped)
 971       {
 972         newPos += delta;
 973         lastFoundPosition = newPos;
 974         lastFoundPositionColumn = column + 1;
 975         if (lastFoundPosition == this.start)
 976         {
 977           firstResidueColumn = column + 1;
 978         }
 979       }
 980     }
 981
 982     if (cursor == null || lastFoundPosition != cursor.residuePosition)
 983     {
 984       updateCursor(lastFoundPosition, lastFoundPositionColumn,
 985               firstResidueColumn);
 986     }
 987
 988     /*
 989      * hack to give position to the right if on a gap
 990      * or beyond the length of the sequence (see JAL-2562)
 991      */
 992     if (delta > 0 && (gapped || column >= sequence.length))
 993     {
 994       newPos++;
 995     }
 996
 997     return newPos;
 998   }
 999
1000   /**
1001    * {@inheritDoc}
1002    */
1003   @Override
1004   public Range findPositions(int fromColumn, int toColumn)
1005   {
1006     if (toColumn < fromColumn || fromColumn < 1)
1007     {
1008       return null;
1009     }
1010
1011     /*
1012      * find the first non-gapped position, if any
1013      */
1014     int firstPosition = 0;
1015     int col = fromColumn - 1;
1016     int length = sequence.length;
1017     while (col < length && col < toColumn)
1018     {
1019       if (!Comparison.isGap(sequence[col]))
1020       {
1021         firstPosition = findPosition(col++);
1022         break;
1023       }
1024       col++;
1025     }
1026
1027     if (firstPosition == 0)
1028     {
1029       return null;
1030     }
1031
1032     /*
1033      * find the last non-gapped position
1034      */
1035     int lastPosition = firstPosition;
1036     while (col < length && col < toColumn)
1037     {
1038       if (!Comparison.isGap(sequence[col++]))
1039       {
1040         lastPosition++;
1041       }
1042     }
1043
1044     return new Range(firstPosition, lastPosition);
1045   }
1046
1047   /**
1048    * Returns an int array where indices correspond to each residue in the
1049    * sequence and the element value gives its position in the alignment
1050    *
1051    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
1052    *         residues in SequenceI object
1053    */
1054   @Override
1055   public int[] gapMap()
1056   {
1057     String seq = jalview.analysis.AlignSeq.extractGaps(
1058             jalview.util.Comparison.GapChars, new String(sequence));
1059     int[] map = new int[seq.length()];
1060     int j = 0;
1061     int p = 0;
1062
1063     while (j < sequence.length)
1064     {
1065       if (!jalview.util.Comparison.isGap(sequence[j]))
1066       {
1067         map[p++] = j;
1068       }
1069
1070       j++;
1071     }
1072
1073     return map;
1074   }
1075
1076   @Override
1077   public int[] findPositionMap()
1078   {
1079     int map[] = new int[sequence.length];
1080     int j = 0;
1081     int pos = start;
1082     int seqlen = sequence.length;
1083     while ((j < seqlen))
1084     {
1085       map[j] = pos;
1086       if (!jalview.util.Comparison.isGap(sequence[j]))
1087       {
1088         pos++;
1089       }
1090
1091       j++;
1092     }
1093     return map;
1094   }
1095
1096   @Override
1097   public List<int[]> getInsertions()
1098   {
1099     ArrayList<int[]> map = new ArrayList<int[]>();
1100     int lastj = -1, j = 0;
1101     int pos = start;
1102     int seqlen = sequence.length;
1103     while ((j < seqlen))
1104     {
1105       if (jalview.util.Comparison.isGap(sequence[j]))
1106       {
1107         if (lastj == -1)
1108         {
1109           lastj = j;
1110         }
1111       }
1112       else
1113       {
1114         if (lastj != -1)
1115         {
1116           map.add(new int[] { lastj, j - 1 });
1117           lastj = -1;
1118         }
1119       }
1120       j++;
1121     }
1122     if (lastj != -1)
1123     {
1124       map.add(new int[] { lastj, j - 1 });
1125       lastj = -1;
1126     }
1127     return map;
1128   }
1129
1130   @Override
1131   public BitSet getInsertionsAsBits()
1132   {
1133     BitSet map = new BitSet();
1134     int lastj = -1, j = 0;
1135     int pos = start;
1136     int seqlen = sequence.length;
1137     while ((j < seqlen))
1138     {
1139       if (jalview.util.Comparison.isGap(sequence[j]))
1140       {
1141         if (lastj == -1)
1142         {
1143           lastj = j;
1144         }
1145       }
1146       else
1147       {
1148         if (lastj != -1)
1149         {
1150           map.set(lastj, j);
1151           lastj = -1;
1152         }
1153       }
1154       j++;
1155     }
1156     if (lastj != -1)
1157     {
1158       map.set(lastj, j);
1159       lastj = -1;
1160     }
1161     return map;
1162   }
1163
1164   @Override
1165   public void deleteChars(int i, int j)
1166   {
1167     int newstart = start, newend = end;
1168     if (i >= sequence.length || i < 0)
1169     {
1170       return;
1171     }
1172
1173     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1174     boolean createNewDs = false;
1175     // TODO: take a (second look) at the dataset creation validation method for
1176     // the very large sequence case
1177     int eindex = -1, sindex = -1;
1178     boolean ecalc = false, scalc = false;
1179     for (int s = i; s < j && s < sequence.length; s++)
1180     {
1181       if (!Comparison.isGap(sequence[s]))
1182       {
1183         if (createNewDs)
1184         {
1185           newend--;
1186         }
1187         else
1188         {
1189           if (!scalc)
1190           {
1191             sindex = findIndex(start) - 1;
1192             scalc = true;
1193           }
1194           if (sindex == s)
1195           {
1196             // delete characters including start of sequence
1197             newstart = findPosition(j);
1198             break; // don't need to search for any more residue characters.
1199           }
1200           else
1201           {
1202             // delete characters after start.
1203             if (!ecalc)
1204             {
1205               eindex = findIndex(end) - 1;
1206               ecalc = true;
1207             }
1208             if (eindex < j)
1209             {
1210               // delete characters at end of sequence
1211               newend = findPosition(i - 1);
1212               break; // don't need to search for any more residue characters.
1213             }
1214             else
1215             {
1216               createNewDs = true;
1217               newend--; // decrease end position by one for the deleted residue
1218               // and search further
1219             }
1220           }
1221         }
1222       }
1223     }
1224     // deletion occured in the middle of the sequence
1225     if (createNewDs && this.datasetSequence != null)
1226     {
1227       // construct a new sequence
1228       Sequence ds = new Sequence(datasetSequence);
1229       // TODO: remove any non-inheritable properties ?
1230       // TODO: create a sequence mapping (since there is a relation here ?)
1231       ds.deleteChars(i, j);
1232       datasetSequence = ds;
1233     }
1234     start = newstart;
1235     end = newend;
1236     sequence = tmp;
1237     sequenceChanged();
1238   }
1239
1240   @Override
1241   public void insertCharAt(int i, int length, char c)
1242   {
1243     char[] tmp = new char[sequence.length + length];
1244
1245     if (i >= sequence.length)
1246     {
1247       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1248       i = sequence.length;
1249     }
1250     else
1251     {
1252       System.arraycopy(sequence, 0, tmp, 0, i);
1253     }
1254
1255     int index = i;
1256     while (length > 0)
1257     {
1258       tmp[index++] = c;
1259       length--;
1260     }
1261
1262     if (i < sequence.length)
1263     {
1264       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1265     }
1266
1267     sequence = tmp;
1268     sequenceChanged();
1269   }
1270
1271   @Override
1272   public void insertCharAt(int i, char c)
1273   {
1274     insertCharAt(i, 1, c);
1275   }
1276
1277   @Override
1278   public String getVamsasId()
1279   {
1280     return vamsasId;
1281   }
1282
1283   @Override
1284   public void setVamsasId(String id)
1285   {
1286     vamsasId = id;
1287   }
1288
1289   @Override
1290   public void setDBRefs(DBRefEntry[] dbref)
1291   {
1292     if (dbrefs == null && datasetSequence != null
1293             && this != datasetSequence)
1294     {
1295       datasetSequence.setDBRefs(dbref);
1296       return;
1297     }
1298     dbrefs = dbref;
1299     if (dbrefs != null)
1300     {
1301       DBRefUtils.ensurePrimaries(this);
1302     }
1303   }
1304
1305   @Override
1306   public DBRefEntry[] getDBRefs()
1307   {
1308     if (dbrefs == null && datasetSequence != null
1309             && this != datasetSequence)
1310     {
1311       return datasetSequence.getDBRefs();
1312     }
1313     return dbrefs;
1314   }
1315
1316   @Override
1317   public void addDBRef(DBRefEntry entry)
1318   {
1319     if (datasetSequence != null)
1320     {
1321       datasetSequence.addDBRef(entry);
1322       return;
1323     }
1324
1325     if (dbrefs == null)
1326     {
1327       dbrefs = new DBRefEntry[0];
1328     }
1329
1330     for (DBRefEntryI dbr : dbrefs)
1331     {
1332       if (dbr.updateFrom(entry))
1333       {
1334         /*
1335          * found a dbref that either matched, or could be
1336          * updated from, the new entry - no need to add it
1337          */
1338         return;
1339       }
1340     }
1341
1342     /*
1343      * extend the array to make room for one more
1344      */
1345     // TODO use an ArrayList instead
1346     int j = dbrefs.length;
1347     DBRefEntry[] temp = new DBRefEntry[j + 1];
1348     System.arraycopy(dbrefs, 0, temp, 0, j);
1349     temp[temp.length - 1] = entry;
1350
1351     dbrefs = temp;
1352
1353     DBRefUtils.ensurePrimaries(this);
1354   }
1355
1356   @Override
1357   public void setDatasetSequence(SequenceI seq)
1358   {
1359     if (seq == this)
1360     {
1361       throw new IllegalArgumentException(
1362               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1363     }
1364     if (seq != null && seq.getDatasetSequence() != null)
1365     {
1366       throw new IllegalArgumentException(
1367               "Implementation error: cascading dataset sequences are not allowed.");
1368     }
1369     datasetSequence = seq;
1370   }
1371
1372   @Override
1373   public SequenceI getDatasetSequence()
1374   {
1375     return datasetSequence;
1376   }
1377
1378   @Override
1379   public AlignmentAnnotation[] getAnnotation()
1380   {
1381     return annotation == null ? null
1382             : annotation
1383                     .toArray(new AlignmentAnnotation[annotation.size()]);
1384   }
1385
1386   @Override
1387   public boolean hasAnnotation(AlignmentAnnotation ann)
1388   {
1389     return annotation == null ? false : annotation.contains(ann);
1390   }
1391
1392   @Override
1393   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1394   {
1395     if (this.annotation == null)
1396     {
1397       this.annotation = new Vector<AlignmentAnnotation>();
1398     }
1399     if (!this.annotation.contains(annotation))
1400     {
1401       this.annotation.addElement(annotation);
1402     }
1403     annotation.setSequenceRef(this);
1404   }
1405
1406   @Override
1407   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1408   {
1409     if (this.annotation != null)
1410     {
1411       this.annotation.removeElement(annotation);
1412       if (this.annotation.size() == 0)
1413       {
1414         this.annotation = null;
1415       }
1416     }
1417   }
1418
1419   /**
1420    * test if this is a valid candidate for another sequence's dataset sequence.
1421    *
1422    */
1423   private boolean isValidDatasetSequence()
1424   {
1425     if (datasetSequence != null)
1426     {
1427       return false;
1428     }
1429     for (int i = 0; i < sequence.length; i++)
1430     {
1431       if (jalview.util.Comparison.isGap(sequence[i]))
1432       {
1433         return false;
1434       }
1435     }
1436     return true;
1437   }
1438
1439   @Override
1440   public SequenceI deriveSequence()
1441   {
1442     Sequence seq = null;
1443     if (datasetSequence == null)
1444     {
1445       if (isValidDatasetSequence())
1446       {
1447         // Use this as dataset sequence
1448         seq = new Sequence(getName(), "", 1, -1);
1449         seq.setDatasetSequence(this);
1450         seq.initSeqFrom(this, getAnnotation());
1451         return seq;
1452       }
1453       else
1454       {
1455         // Create a new, valid dataset sequence
1456         createDatasetSequence();
1457       }
1458     }
1459     return new Sequence(this);
1460   }
1461
1462   private boolean _isNa;
1463
1464   private int _seqhash = 0;
1465
1466   /**
1467    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1468    * true
1469    */
1470   @Override
1471   public boolean isProtein()
1472   {
1473     if (datasetSequence != null)
1474     {
1475       return datasetSequence.isProtein();
1476     }
1477     if (_seqhash != sequence.hashCode())
1478     {
1479       _seqhash = sequence.hashCode();
1480       _isNa = Comparison.isNucleotide(this);
1481     }
1482     return !_isNa;
1483   };
1484
1485   /*
1486    * (non-Javadoc)
1487    *
1488    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1489    */
1490   @Override
1491   public SequenceI createDatasetSequence()
1492   {
1493     if (datasetSequence == null)
1494     {
1495       Sequence dsseq = new Sequence(getName(),
1496               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1497                       getSequenceAsString()),
1498               getStart(), getEnd());
1499
1500       datasetSequence = dsseq;
1501
1502       dsseq.setDescription(description);
1503       // move features and database references onto dataset sequence
1504       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1505       sequenceFeatureStore = null;
1506       dsseq.dbrefs = dbrefs;
1507       dbrefs = null;
1508       // TODO: search and replace any references to this sequence with
1509       // references to the dataset sequence in Mappings on dbref
1510       dsseq.pdbIds = pdbIds;
1511       pdbIds = null;
1512       datasetSequence.updatePDBIds();
1513       if (annotation != null)
1514       {
1515         // annotation is cloned rather than moved, to preserve what's currently
1516         // on the alignment
1517         for (AlignmentAnnotation aa : annotation)
1518         {
1519           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1520           _aa.sequenceRef = datasetSequence;
1521           _aa.adjustForAlignment(); // uses annotation's own record of
1522                                     // sequence-column mapping
1523           datasetSequence.addAlignmentAnnotation(_aa);
1524         }
1525       }
1526     }
1527     return datasetSequence;
1528   }
1529
1530   /*
1531    * (non-Javadoc)
1532    *
1533    * @see
1534    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1535    * annotations)
1536    */
1537   @Override
1538   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1539   {
1540     if (annotation != null)
1541     {
1542       annotation.removeAllElements();
1543     }
1544     if (annotations != null)
1545     {
1546       for (int i = 0; i < annotations.length; i++)
1547       {
1548         if (annotations[i] != null)
1549         {
1550           addAlignmentAnnotation(annotations[i]);
1551         }
1552       }
1553     }
1554   }
1555
1556   @Override
1557   public AlignmentAnnotation[] getAnnotation(String label)
1558   {
1559     if (annotation == null || annotation.size() == 0)
1560     {
1561       return null;
1562     }
1563
1564     Vector<AlignmentAnnotation> subset = new Vector<AlignmentAnnotation>();
1565     Enumeration<AlignmentAnnotation> e = annotation.elements();
1566     while (e.hasMoreElements())
1567     {
1568       AlignmentAnnotation ann = e.nextElement();
1569       if (ann.label != null && ann.label.equals(label))
1570       {
1571         subset.addElement(ann);
1572       }
1573     }
1574     if (subset.size() == 0)
1575     {
1576       return null;
1577     }
1578     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1579     int i = 0;
1580     e = subset.elements();
1581     while (e.hasMoreElements())
1582     {
1583       anns[i++] = e.nextElement();
1584     }
1585     subset.removeAllElements();
1586     return anns;
1587   }
1588
1589   @Override
1590   public boolean updatePDBIds()
1591   {
1592     if (datasetSequence != null)
1593     {
1594       // TODO: could merge DBRefs
1595       return datasetSequence.updatePDBIds();
1596     }
1597     if (dbrefs == null || dbrefs.length == 0)
1598     {
1599       return false;
1600     }
1601     boolean added = false;
1602     for (DBRefEntry dbr : dbrefs)
1603     {
1604       if (DBRefSource.PDB.equals(dbr.getSource()))
1605       {
1606         /*
1607          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1608          * PDB id is not already present in a 'matching' PDBEntry
1609          * Constructor parses out a chain code if appended to the accession id
1610          * (a fudge used to 'store' the chain code in the DBRef)
1611          */
1612         PDBEntry pdbe = new PDBEntry(dbr);
1613         added |= addPDBId(pdbe);
1614       }
1615     }
1616     return added;
1617   }
1618
1619   @Override
1620   public void transferAnnotation(SequenceI entry, Mapping mp)
1621   {
1622     if (datasetSequence != null)
1623     {
1624       datasetSequence.transferAnnotation(entry, mp);
1625       return;
1626     }
1627     if (entry.getDatasetSequence() != null)
1628     {
1629       transferAnnotation(entry.getDatasetSequence(), mp);
1630       return;
1631     }
1632     // transfer any new features from entry onto sequence
1633     if (entry.getSequenceFeatures() != null)
1634     {
1635
1636       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1637       for (SequenceFeature feature : sfs)
1638       {
1639        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1640                 : new SequenceFeature[] { new SequenceFeature(feature) };
1641         if (sf != null)
1642         {
1643           for (int sfi = 0; sfi < sf.length; sfi++)
1644           {
1645             addSequenceFeature(sf[sfi]);
1646           }
1647         }
1648       }
1649     }
1650
1651     // transfer PDB entries
1652     if (entry.getAllPDBEntries() != null)
1653     {
1654       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1655       while (e.hasMoreElements())
1656       {
1657         PDBEntry pdb = e.nextElement();
1658         addPDBId(pdb);
1659       }
1660     }
1661     // transfer database references
1662     DBRefEntry[] entryRefs = entry.getDBRefs();
1663     if (entryRefs != null)
1664     {
1665       for (int r = 0; r < entryRefs.length; r++)
1666       {
1667         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1668         if (newref.getMap() != null && mp != null)
1669         {
1670           // remap ref using our local mapping
1671         }
1672         // we also assume all version string setting is done by dbSourceProxy
1673         /*
1674          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1675          * newref.setSource(dbSource); }
1676          */
1677         addDBRef(newref);
1678       }
1679     }
1680   }
1681
1682   /**
1683    * @return The index (zero-based) on this sequence in the MSA. It returns
1684    *         {@code -1} if this information is not available.
1685    */
1686   @Override
1687   public int getIndex()
1688   {
1689     return index;
1690   }
1691
1692   /**
1693    * Defines the position of this sequence in the MSA. Use the value {@code -1}
1694    * if this information is undefined.
1695    *
1696    * @param The
1697    *          position for this sequence. This value is zero-based (zero for
1698    *          this first sequence)
1699    */
1700   @Override
1701   public void setIndex(int value)
1702   {
1703     index = value;
1704   }
1705
1706   @Override
1707   public void setRNA(RNA r)
1708   {
1709     rna = r;
1710   }
1711
1712   @Override
1713   public RNA getRNA()
1714   {
1715     return rna;
1716   }
1717
1718   @Override
1719   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1720           String label)
1721   {
1722     List<AlignmentAnnotation> result = new ArrayList<AlignmentAnnotation>();
1723     if (this.annotation != null)
1724     {
1725       for (AlignmentAnnotation ann : annotation)
1726       {
1727         if (ann.calcId != null && ann.calcId.equals(calcId)
1728                 && ann.label != null && ann.label.equals(label))
1729         {
1730           result.add(ann);
1731         }
1732       }
1733     }
1734     return result;
1735   }
1736
1737   @Override
1738   public String toString()
1739   {
1740     return getDisplayId(false);
1741   }
1742
1743   @Override
1744   public PDBEntry getPDBEntry(String pdbIdStr)
1745   {
1746     if (getDatasetSequence() != null)
1747     {
1748       return getDatasetSequence().getPDBEntry(pdbIdStr);
1749     }
1750     if (pdbIds == null)
1751     {
1752       return null;
1753     }
1754     List<PDBEntry> entries = getAllPDBEntries();
1755     for (PDBEntry entry : entries)
1756     {
1757       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1758       {
1759         return entry;
1760       }
1761     }
1762     return null;
1763   }
1764
1765   @Override
1766   public List<DBRefEntry> getPrimaryDBRefs()
1767   {
1768     if (datasetSequence != null)
1769     {
1770       return datasetSequence.getPrimaryDBRefs();
1771     }
1772     if (dbrefs == null || dbrefs.length == 0)
1773     {
1774       return Collections.emptyList();
1775     }
1776     synchronized (dbrefs)
1777     {
1778       List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
1779       DBRefEntry[] tmp = new DBRefEntry[1];
1780       for (DBRefEntry ref : dbrefs)
1781       {
1782         if (!ref.isPrimaryCandidate())
1783         {
1784           continue;
1785         }
1786         if (ref.hasMap())
1787         {
1788           MapList mp = ref.getMap().getMap();
1789           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1790           {
1791             // map only involves a subsequence, so cannot be primary
1792             continue;
1793           }
1794         }
1795         // whilst it looks like it is a primary ref, we also sanity check type
1796         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1797                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1798         {
1799           // PDB dbrefs imply there should be a PDBEntry associated
1800           // TODO: tighten PDB dbrefs
1801           // formally imply Jalview has actually downloaded and
1802           // parsed the pdb file. That means there should be a cached file
1803           // handle on the PDBEntry, and a real mapping between sequence and
1804           // extracted sequence from PDB file
1805           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1806           if (pdbentry != null && pdbentry.getFile() != null)
1807           {
1808             primaries.add(ref);
1809           }
1810           continue;
1811         }
1812         // check standard protein or dna sources
1813         tmp[0] = ref;
1814         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1815         if (res != null && res[0] == tmp[0])
1816         {
1817           primaries.add(ref);
1818           continue;
1819         }
1820       }
1821       return primaries;
1822     }
1823   }
1824
1825   /**
1826    * {@inheritDoc}
1827    */
1828   @Override
1829   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1830           String... types)
1831   {
1832     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1833     int endPos = fromColumn == toColumn ? startPos
1834             : findPosition(toColumn - 1);
1835
1836     List<SequenceFeature> result = getFeatures().findFeatures(startPos,
1837             endPos, types);
1838     if (datasetSequence != null)
1839     {
1840       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
1841               types);
1842     }
1843     else
1844     {
1845       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
1846     }
1847
1848     /*
1849      * if end column is gapped, endPos may be to the right,
1850      * and we may have included adjacent or enclosing features;
1851      * remove any that are not enclosing, non-contact features
1852      */
1853     boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
1854             && Comparison.isGap(sequence[toColumn - 1]);
1855     if (endPos > this.end || endColumnIsGapped)
1856     {
1857       ListIterator<SequenceFeature> it = result.listIterator();
1858       while (it.hasNext())
1859       {
1860         SequenceFeature sf = it.next();
1861         int sfBegin = sf.getBegin();
1862         int sfEnd = sf.getEnd();
1863         int featureStartColumn = findIndex(sfBegin);
1864         if (featureStartColumn > toColumn)
1865         {
1866           it.remove();
1867         }
1868         else if (featureStartColumn < fromColumn)
1869         {
1870           int featureEndColumn = sfEnd == sfBegin ? featureStartColumn
1871                   : findIndex(sfEnd);
1872           if (featureEndColumn < fromColumn)
1873           {
1874             it.remove();
1875           }
1876           else if (featureEndColumn > toColumn && sf.isContactFeature())
1877           {
1878             /*
1879              * remove an enclosing feature if it is a contact feature
1880              */
1881             it.remove();
1882           }
1883         }
1884       }
1885     }
1886
1887     return result;
1888   }
1889
1890   /**
1891    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1892    * token that has to match the one presented by the cursor
1893    */
1894   @Override
1895   public void sequenceChanged()
1896   {
1897     changeCount++;
1898   }
1899
1900   /**
1901    * {@inheritDoc}
1902    */
1903   @Override
1904   public int replace(char c1, char c2)
1905   {
1906     if (c1 == c2)
1907     {
1908       return 0;
1909     }
1910     int count = 0;
1911     synchronized (sequence)
1912     {
1913       for (int c = 0; c < sequence.length; c++)
1914       {
1915         if (sequence[c] == c1)
1916         {
1917           sequence[c] = c2;
1918           count++;
1919         }
1920       }
1921     }
1922     if (count > 0)
1923     {
1924       sequenceChanged();
1925     }
1926
1927     return count;
1928   }
1929 }