src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.BitSet;
  35 import java.util.Collections;
  36 import java.util.Enumeration;
  37 import java.util.Iterator;
  38 import java.util.List;
  39 import java.util.ListIterator;
  40 import java.util.Vector;
  41
  42 import fr.orsay.lri.varna.models.rna.RNA;
  43
  44 /**
  45  *
  46  * Implements the SequenceI interface for a char[] based sequence object.
  47  *
  48  * @author $author$
  49  * @version $Revision$
  50  */
  51 public class Sequence extends ASequence implements SequenceI
  52 {
  53   SequenceI datasetSequence;
  54
  55   String name;
  56
  57   private char[] sequence;
  58
  59   String description;
  60
  61   int start;
  62
  63   int end;
  64
  65   Vector<PDBEntry> pdbIds;
  66
  67   String vamsasId;
  68
  69   DBRefEntry[] dbrefs;
  70
  71   RNA rna;
  72
  73   /**
  74    * This annotation is displayed below the alignment but the positions are tied
  75    * to the residues of this sequence
  76    *
  77    * TODO: change to List<>
  78    */
  79   Vector<AlignmentAnnotation> annotation;
  80
  81   private SequenceFeaturesI sequenceFeatureStore;
  82
  83   /*
  84    * A cursor holding the approximate current view position to the sequence,
  85    * as determined by findIndex or findPosition or findPositions.
  86    * Using a cursor as a hint allows these methods to be more performant for
  87    * large sequences.
  88    */
  89   private SequenceCursor cursor;
  90
  91   /*
  92    * A number that should be incremented whenever the sequence is edited.
  93    * If the value matches the cursor token, then we can trust the cursor,
  94    * if not then it should be recomputed.
  95    */
  96   private int changeCount;
  97
  98   /**
  99    * Creates a new Sequence object.
 100    *
 101    * @param name
 102    *          display name string
 103    * @param sequence
 104    *          string to form a possibly gapped sequence out of
 105    * @param start
 106    *          first position of non-gap residue in the sequence
 107    * @param end
 108    *          last position of ungapped residues (nearly always only used for
 109    *          display purposes)
 110    */
 111   public Sequence(String name, String sequence, int start, int end)
 112   {
 113     this();
 114     initSeqAndName(name, sequence.toCharArray(), start, end);
 115   }
 116
 117   public Sequence(String name, char[] sequence, int start, int end)
 118   {
 119     this();
 120     initSeqAndName(name, sequence, start, end);
 121   }
 122
 123   /**
 124    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 125    * start and end are updated values from name2 if it ends with /start-end
 126    *
 127    * @param name2
 128    * @param sequence2
 129    * @param start2
 130    * @param end2
 131    */
 132   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 133           int end2)
 134   {
 135     this.name = name2;
 136     this.sequence = sequence2;
 137     this.start = start2;
 138     this.end = end2;
 139     parseId();
 140     checkValidRange();
 141   }
 142
 143   /**
 144    * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 145    * start and end respectively and removes the suffix from the name
 146    */
 147   void parseId()
 148   {
 149     if (name == null)
 150     {
 151       System.err.println(
 152               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 153       name = "";
 154     }
 155     int slashPos = name.lastIndexOf('/');
 156     if (slashPos > -1 && slashPos < name.length() - 1)
 157     {
 158       String suffix = name.substring(slashPos + 1);
 159       String[] range = suffix.split("-");
 160       if (range.length == 2)
 161       {
 162         try
 163         {
 164           int from = Integer.valueOf(range[0]);
 165           int to = Integer.valueOf(range[1]);
 166           if (from > 0 && to >= from)
 167           {
 168             name = name.substring(0, slashPos);
 169             setStart(from);
 170             setEnd(to);
 171             checkValidRange();
 172           }
 173         } catch (NumberFormatException e)
 174         {
 175           // leave name unchanged if suffix is invalid
 176         }
 177       }
 178     }
 179   }
 180
 181   /**
 182    * Ensures that 'end' is not before the end of the sequence, that is,
 183    * (end-start+1) is at least as long as the count of ungapped positions. Note
 184    * that end is permitted to be beyond the end of the sequence data.
 185    */
 186   void checkValidRange()
 187   {
 188     // Note: JAL-774 :
 189     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 190     {
 191       int endRes = 0;
 192       for (int j = 0; j < sequence.length; j++)
 193       {
 194         if (!Comparison.isGap(sequence[j]))
 195         {
 196           endRes++;
 197         }
 198       }
 199       if (endRes > 0)
 200       {
 201         endRes += start - 1;
 202       }
 203
 204       if (end < endRes)
 205       {
 206         end = endRes;
 207       }
 208     }
 209
 210   }
 211
 212   /**
 213    * default constructor
 214    */
 215   private Sequence()
 216   {
 217     sequenceFeatureStore = new SequenceFeatures();
 218   }
 219
 220   /**
 221    * Creates a new Sequence object.
 222    *
 223    * @param name
 224    *          DOCUMENT ME!
 225    * @param sequence
 226    *          DOCUMENT ME!
 227    */
 228   public Sequence(String name, String sequence)
 229   {
 230     this(name, sequence, 1, -1);
 231   }
 232
 233   /**
 234    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 235    * any existing dataset sequence reference. If non exists, everything is
 236    * copied.
 237    *
 238    * @param seq
 239    *          if seq is a dataset sequence, behaves like a plain old copy
 240    *          constructor
 241    */
 242   public Sequence(SequenceI seq)
 243   {
 244     this(seq, seq.getAnnotation());
 245   }
 246
 247   /**
 248    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 249    * but inherits any existing dataset sequence reference, and duplicate of any
 250    * annotation that is present in the given annotation array.
 251    *
 252    * @param seq
 253    *          the sequence to be copied
 254    * @param alAnnotation
 255    *          an array of annotation including some associated with seq
 256    */
 257   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 258   {
 259     this();
 260     initSeqFrom(seq, alAnnotation);
 261   }
 262
 263   /**
 264    * does the heavy lifting when cloning a dataset sequence, or coping data from
 265    * dataset to a new derived sequence.
 266    *
 267    * @param seq
 268    *          - source of attributes.
 269    * @param alAnnotation
 270    *          - alignment annotation present on seq that should be copied onto
 271    *          this sequence
 272    */
 273   protected void initSeqFrom(SequenceI seq,
 274           AlignmentAnnotation[] alAnnotation)
 275   {
 276     char[] oseq = seq.getSequence(); // returns a copy of the array
 277     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 278
 279     description = seq.getDescription();
 280     if (seq != datasetSequence)
 281     {
 282       setDatasetSequence(seq.getDatasetSequence());
 283     }
 284
 285     /*
 286      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 287      */
 288     if (datasetSequence == null)
 289     {
 290       if (seq.getDBRefs() != null)
 291       {
 292         DBRefEntry[] dbr = seq.getDBRefs();
 293         for (int i = 0; i < dbr.length; i++)
 294         {
 295           addDBRef(new DBRefEntry(dbr[i]));
 296         }
 297       }
 298
 299       /*
 300        * make copies of any sequence features
 301        */
 302       for (SequenceFeature sf : seq.getSequenceFeatures())
 303       {
 304         addSequenceFeature(new SequenceFeature(sf));
 305       }
 306     }
 307
 308     if (seq.getAnnotation() != null)
 309     {
 310       AlignmentAnnotation[] sqann = seq.getAnnotation();
 311       for (int i = 0; i < sqann.length; i++)
 312       {
 313         if (sqann[i] == null)
 314         {
 315           continue;
 316         }
 317         boolean found = (alAnnotation == null);
 318         if (!found)
 319         {
 320           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 321           {
 322             found = (alAnnotation[apos] == sqann[i]);
 323           }
 324         }
 325         if (found)
 326         {
 327           // only copy the given annotation
 328           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 329           addAlignmentAnnotation(newann);
 330         }
 331       }
 332     }
 333     if (seq.getAllPDBEntries() != null)
 334     {
 335       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 336       for (PDBEntry pdb : ids)
 337       {
 338         this.addPDBId(new PDBEntry(pdb));
 339       }
 340     }
 341   }
 342
 343   @Override
 344   public void setSequenceFeatures(List<SequenceFeature> features)
 345   {
 346     if (datasetSequence != null)
 347     {
 348       datasetSequence.setSequenceFeatures(features);
 349       return;
 350     }
 351     sequenceFeatureStore = new SequenceFeatures(features);
 352   }
 353
 354   @Override
 355   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 356   {
 357     if (sf.getType() == null)
 358     {
 359       System.err.println("SequenceFeature type may not be null: "
 360               + sf.toString());
 361       return false;
 362     }
 363
 364     if (datasetSequence != null)
 365     {
 366       return datasetSequence.addSequenceFeature(sf);
 367     }
 368
 369     return sequenceFeatureStore.add(sf);
 370   }
 371
 372   @Override
 373   public void deleteFeature(SequenceFeature sf)
 374   {
 375     if (datasetSequence != null)
 376     {
 377       datasetSequence.deleteFeature(sf);
 378     }
 379     else
 380     {
 381       sequenceFeatureStore.delete(sf);
 382     }
 383   }
 384
 385   /**
 386    * {@inheritDoc}
 387    *
 388    * @return
 389    */
 390   @Override
 391   public List<SequenceFeature> getSequenceFeatures()
 392   {
 393     if (datasetSequence != null)
 394     {
 395       return datasetSequence.getSequenceFeatures();
 396     }
 397     return sequenceFeatureStore.getAllFeatures();
 398   }
 399
 400   @Override
 401   public SequenceFeaturesI getFeatures()
 402   {
 403     return datasetSequence != null ? datasetSequence.getFeatures()
 404             : sequenceFeatureStore;
 405   }
 406
 407   @Override
 408   public boolean addPDBId(PDBEntry entry)
 409   {
 410     if (pdbIds == null)
 411     {
 412       pdbIds = new Vector<>();
 413       pdbIds.add(entry);
 414       return true;
 415     }
 416
 417     for (PDBEntry pdbe : pdbIds)
 418     {
 419       if (pdbe.updateFrom(entry))
 420       {
 421         return false;
 422       }
 423     }
 424     pdbIds.addElement(entry);
 425     return true;
 426   }
 427
 428   /**
 429    * DOCUMENT ME!
 430    *
 431    * @param id
 432    *          DOCUMENT ME!
 433    */
 434   @Override
 435   public void setPDBId(Vector<PDBEntry> id)
 436   {
 437     pdbIds = id;
 438   }
 439
 440   /**
 441    * DOCUMENT ME!
 442    *
 443    * @return DOCUMENT ME!
 444    */
 445   @Override
 446   public Vector<PDBEntry> getAllPDBEntries()
 447   {
 448     return pdbIds == null ? new Vector<>() : pdbIds;
 449   }
 450
 451   /**
 452    * DOCUMENT ME!
 453    *
 454    * @return DOCUMENT ME!
 455    */
 456   @Override
 457   public String getDisplayId(boolean jvsuffix)
 458   {
 459     StringBuffer result = new StringBuffer(name);
 460     if (jvsuffix)
 461     {
 462       result.append("/" + start + "-" + end);
 463     }
 464
 465     return result.toString();
 466   }
 467
 468   /**
 469    * Sets the sequence name. If the name ends in /start-end, then the start-end
 470    * values are parsed out and set, and the suffix is removed from the name.
 471    *
 472    * @param theName
 473    */
 474   @Override
 475   public void setName(String theName)
 476   {
 477     this.name = theName;
 478     this.parseId();
 479   }
 480
 481   /**
 482    * DOCUMENT ME!
 483    *
 484    * @return DOCUMENT ME!
 485    */
 486   @Override
 487   public String getName()
 488   {
 489     return this.name;
 490   }
 491
 492   /**
 493    * DOCUMENT ME!
 494    *
 495    * @param start
 496    *          DOCUMENT ME!
 497    */
 498   @Override
 499   public void setStart(int start)
 500   {
 501     this.start = start;
 502   }
 503
 504   /**
 505    * DOCUMENT ME!
 506    *
 507    * @return DOCUMENT ME!
 508    */
 509   @Override
 510   public int getStart()
 511   {
 512     return this.start;
 513   }
 514
 515   /**
 516    * DOCUMENT ME!
 517    *
 518    * @param end
 519    *          DOCUMENT ME!
 520    */
 521   @Override
 522   public void setEnd(int end)
 523   {
 524     this.end = end;
 525   }
 526
 527   /**
 528    * DOCUMENT ME!
 529    *
 530    * @return DOCUMENT ME!
 531    */
 532   @Override
 533   public int getEnd()
 534   {
 535     return this.end;
 536   }
 537
 538   /**
 539    * DOCUMENT ME!
 540    *
 541    * @return DOCUMENT ME!
 542    */
 543   @Override
 544   public int getLength()
 545   {
 546     return this.sequence.length;
 547   }
 548
 549   /**
 550    * DOCUMENT ME!
 551    *
 552    * @param seq
 553    *          DOCUMENT ME!
 554    */
 555   @Override
 556   public void setSequence(String seq)
 557   {
 558     this.sequence = seq.toCharArray();
 559     checkValidRange();
 560     sequenceChanged();
 561   }
 562
 563   @Override
 564   public String getSequenceAsString()
 565   {
 566     return new String(sequence);
 567   }
 568
 569   @Override
 570   public String getSequenceAsString(int start, int end)
 571   {
 572     return new String(getSequence(start, end));
 573   }
 574
 575   @Override
 576   public char[] getSequence()
 577   {
 578     // return sequence;
 579     return sequence == null ? null : Arrays.copyOf(sequence,
 580             sequence.length);
 581   }
 582
 583   /*
 584    * (non-Javadoc)
 585    *
 586    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 587    */
 588   @Override
 589   public char[] getSequence(int start, int end)
 590   {
 591     if (start < 0)
 592     {
 593       start = 0;
 594     }
 595     // JBPNote - left to user to pad the result here (TODO:Decide on this
 596     // policy)
 597     if (start >= sequence.length)
 598     {
 599       return new char[0];
 600     }
 601
 602     if (end >= sequence.length)
 603     {
 604       end = sequence.length;
 605     }
 606
 607     char[] reply = new char[end - start];
 608     System.arraycopy(sequence, start, reply, 0, end - start);
 609
 610     return reply;
 611   }
 612
 613   @Override
 614   public SequenceI getSubSequence(int start, int end)
 615   {
 616     if (start < 0)
 617     {
 618       start = 0;
 619     }
 620     char[] seq = getSequence(start, end);
 621     if (seq.length == 0)
 622     {
 623       return null;
 624     }
 625     int nstart = findPosition(start);
 626     int nend = findPosition(end) - 1;
 627     // JBPNote - this is an incomplete copy.
 628     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 629     nseq.setDescription(description);
 630     if (datasetSequence != null)
 631     {
 632       nseq.setDatasetSequence(datasetSequence);
 633     }
 634     else
 635     {
 636       nseq.setDatasetSequence(this);
 637     }
 638     return nseq;
 639   }
 640
 641   /**
 642    * Returns the character of the aligned sequence at the given position (base
 643    * zero), or space if the position is not within the sequence's bounds
 644    *
 645    * @return
 646    */
 647   @Override
 648   public char getCharAt(int i)
 649   {
 650     if (i >= 0 && i < sequence.length)
 651     {
 652       return sequence[i];
 653     }
 654     else
 655     {
 656       return ' ';
 657     }
 658   }
 659
 660   /**
 661    * Sets the sequence description, and also parses out any special formats of
 662    * interest
 663    *
 664    * @param desc
 665    */
 666   @Override
 667   public void setDescription(String desc)
 668   {
 669     this.description = desc;
 670   }
 671
 672   @Override
 673   public void setGeneLoci(String speciesId, String assemblyId,
 674           String chromosomeId, MapList map)
 675   {
 676     addDBRef(new DBRefEntry(speciesId, assemblyId, DBRefEntry.CHROMOSOME
 677             + ":" + chromosomeId, new Mapping(map)));
 678   }
 679
 680   /**
 681    * Returns the gene loci mapping for the sequence (may be null)
 682    *
 683    * @return
 684    */
 685   @Override
 686   public GeneLociI getGeneLoci()
 687   {
 688     DBRefEntry[] refs = getDBRefs();
 689     if (refs != null)
 690     {
 691       for (final DBRefEntry ref : refs)
 692       {
 693         if (ref.isChromosome())
 694         {
 695           return new GeneLociI()
 696           {
 697             @Override
 698             public String getSpeciesId()
 699             {
 700               return ref.getSource();
 701             }
 702
 703             @Override
 704             public String getAssemblyId()
 705             {
 706               return ref.getVersion();
 707             }
 708
 709             @Override
 710             public String getChromosomeId()
 711             {
 712               // strip off "chromosome:" prefix to chrId
 713               return ref.getAccessionId().substring(
 714                       DBRefEntry.CHROMOSOME.length() + 1);
 715             }
 716
 717             @Override
 718             public MapList getMap()
 719             {
 720               return ref.getMap().getMap();
 721             }
 722           };
 723         }
 724       }
 725     }
 726     return null;
 727   }
 728
 729   /**
 730    * Answers the description
 731    *
 732    * @return
 733    */
 734   @Override
 735   public String getDescription()
 736   {
 737     return this.description;
 738   }
 739
 740   /**
 741    * {@inheritDoc}
 742    */
 743   @Override
 744   public int findIndex(int pos)
 745   {
 746     /*
 747      * use a valid, hopefully nearby, cursor if available
 748      */
 749     if (isValidCursor(cursor))
 750     {
 751       return findIndex(pos, cursor);
 752     }
 753
 754     int j = start;
 755     int i = 0;
 756     int startColumn = 0;
 757
 758     /*
 759      * traverse sequence from the start counting gaps; make a note of
 760      * the column of the first residue to save in the cursor
 761      */
 762     while ((i < sequence.length) && (j <= end) && (j <= pos))
 763     {
 764       if (!Comparison.isGap(sequence[i]))
 765       {
 766         if (j == start)
 767         {
 768           startColumn = i;
 769         }
 770         j++;
 771       }
 772       i++;
 773     }
 774
 775     if (j == end && j < pos)
 776     {
 777       return end + 1;
 778     }
 779
 780     updateCursor(pos, i, startColumn);
 781     return i;
 782   }
 783
 784   /**
 785    * Updates the cursor to the latest found residue and column position
 786    *
 787    * @param residuePos
 788    *          (start..)
 789    * @param column
 790    *          (1..)
 791    * @param startColumn
 792    *          column position of the first sequence residue
 793    */
 794   protected void updateCursor(int residuePos, int column, int startColumn)
 795   {
 796     /*
 797      * preserve end residue column provided cursor was valid
 798      */
 799     int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
 800     if (residuePos == this.end)
 801     {
 802       endColumn = column;
 803     }
 804
 805     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 806             endColumn, this.changeCount);
 807   }
 808
 809   /**
 810    * Answers the aligned column position (1..) for the given residue position
 811    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 812    * The hint may be left of, at, or to the right of the required position.
 813    *
 814    * @param pos
 815    * @param curs
 816    * @return
 817    */
 818   protected int findIndex(final int pos, SequenceCursor curs)
 819   {
 820     if (!isValidCursor(curs))
 821     {
 822       /*
 823        * wrong or invalidated cursor, compute de novo
 824        */
 825       return findIndex(pos);
 826     }
 827
 828     if (curs.residuePosition == pos)
 829     {
 830       return curs.columnPosition;
 831     }
 832
 833     /*
 834      * move left or right to find pos from hint.position
 835      */
 836     int col = curs.columnPosition - 1; // convert from base 1 to 0-based array
 837                                        // index
 838     int newPos = curs.residuePosition;
 839     int delta = newPos > pos ? -1 : 1;
 840
 841     while (newPos != pos)
 842     {
 843       col += delta; // shift one column left or right
 844       if (col < 0)
 845       {
 846         break;
 847       }
 848       if (col == sequence.length)
 849       {
 850         col--; // return last column if we failed to reach pos
 851         break;
 852       }
 853       if (!Comparison.isGap(sequence[col]))
 854       {
 855         newPos += delta;
 856       }
 857     }
 858
 859     col++; // convert back to base 1
 860
 861     /*
 862      * only update cursor if we found the target position
 863      */
 864     if (newPos == pos)
 865     {
 866       updateCursor(pos, col, curs.firstColumnPosition);
 867     }
 868
 869     return col;
 870   }
 871
 872   /**
 873    * {@inheritDoc}
 874    */
 875   @Override
 876   public int findPosition(final int column)
 877   {
 878     /*
 879      * use a valid, hopefully nearby, cursor if available
 880      */
 881     if (isValidCursor(cursor))
 882     {
 883       return findPosition(column + 1, cursor);
 884     }
 885
 886     // TODO recode this more naturally i.e. count residues only
 887     // as they are found, not 'in anticipation'
 888
 889     /*
 890      * traverse the sequence counting gaps; note the column position
 891      * of the first residue, to save in the cursor
 892      */
 893     int firstResidueColumn = 0;
 894     int lastPosFound = 0;
 895     int lastPosFoundColumn = 0;
 896     int seqlen = sequence.length;
 897
 898     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 899     {
 900       lastPosFound = start;
 901       lastPosFoundColumn = 0;
 902     }
 903
 904     int j = 0;
 905     int pos = start;
 906
 907     while (j < column && j < seqlen)
 908     {
 909       if (!Comparison.isGap(sequence[j]))
 910       {
 911         lastPosFound = pos;
 912         lastPosFoundColumn = j;
 913         if (pos == this.start)
 914         {
 915           firstResidueColumn = j;
 916         }
 917         pos++;
 918       }
 919       j++;
 920     }
 921     if (j < seqlen && !Comparison.isGap(sequence[j]))
 922     {
 923       lastPosFound = pos;
 924       lastPosFoundColumn = j;
 925       if (pos == this.start)
 926       {
 927         firstResidueColumn = j;
 928       }
 929     }
 930
 931     /*
 932      * update the cursor to the last residue position found (if any)
 933      * (converting column position to base 1)
 934      */
 935     if (lastPosFound != 0)
 936     {
 937       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 938               firstResidueColumn + 1);
 939     }
 940
 941     return pos;
 942   }
 943
 944   /**
 945    * Answers true if the given cursor is not null, is for this sequence object,
 946    * and has a token value that matches this object's changeCount, else false.
 947    * This allows us to ignore a cursor as 'stale' if the sequence has been
 948    * modified since the cursor was created.
 949    *
 950    * @param curs
 951    * @return
 952    */
 953   protected boolean isValidCursor(SequenceCursor curs)
 954   {
 955     if (curs == null || curs.sequence != this || curs.token != changeCount)
 956     {
 957       return false;
 958     }
 959     /*
 960      * sanity check against range
 961      */
 962     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 963     {
 964       return false;
 965     }
 966     if (curs.residuePosition < start || curs.residuePosition > end)
 967     {
 968       return false;
 969     }
 970     return true;
 971   }
 972
 973   /**
 974    * Answers the sequence position (start..) for the given aligned column
 975    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 976    * may lie left of, at, or to the right of the column position.
 977    *
 978    * @param col
 979    * @param curs
 980    * @return
 981    */
 982   protected int findPosition(final int col, SequenceCursor curs)
 983   {
 984     if (!isValidCursor(curs))
 985     {
 986       /*
 987        * wrong or invalidated cursor, compute de novo
 988        */
 989       return findPosition(col - 1);// ugh back to base 0
 990     }
 991
 992     if (curs.columnPosition == col)
 993     {
 994       cursor = curs; // in case this method becomes public
 995       return curs.residuePosition; // easy case :-)
 996     }
 997
 998     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 999     {
1000       /*
1001        * sequence lies entirely to the left of col
1002        * - return last residue + 1
1003        */
1004       return end + 1;
1005     }
1006
1007     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
1008     {
1009       /*
1010        * sequence lies entirely to the right of col
1011        * - return first residue
1012        */
1013       return start;
1014     }
1015
1016     // todo could choose closest to col out of column,
1017     // firstColumnPosition, lastColumnPosition as a start point
1018
1019     /*
1020      * move left or right to find pos from cursor position
1021      */
1022     int firstResidueColumn = curs.firstColumnPosition;
1023     int column = curs.columnPosition - 1; // to base 0
1024     int newPos = curs.residuePosition;
1025     int delta = curs.columnPosition > col ? -1 : 1;
1026     boolean gapped = false;
1027     int lastFoundPosition = curs.residuePosition;
1028     int lastFoundPositionColumn = curs.columnPosition;
1029
1030     while (column != col - 1)
1031     {
1032       column += delta; // shift one column left or right
1033       if (column < 0 || column == sequence.length)
1034       {
1035         break;
1036       }
1037       gapped = Comparison.isGap(sequence[column]);
1038       if (!gapped)
1039       {
1040         newPos += delta;
1041         lastFoundPosition = newPos;
1042         lastFoundPositionColumn = column + 1;
1043         if (lastFoundPosition == this.start)
1044         {
1045           firstResidueColumn = column + 1;
1046         }
1047       }
1048     }
1049
1050     if (cursor == null || lastFoundPosition != cursor.residuePosition)
1051     {
1052       updateCursor(lastFoundPosition, lastFoundPositionColumn,
1053               firstResidueColumn);
1054     }
1055
1056     /*
1057      * hack to give position to the right if on a gap
1058      * or beyond the length of the sequence (see JAL-2562)
1059      */
1060     if (delta > 0 && (gapped || column >= sequence.length))
1061     {
1062       newPos++;
1063     }
1064
1065     return newPos;
1066   }
1067
1068   /**
1069    * {@inheritDoc}
1070    */
1071   @Override
1072   public Range findPositions(int fromColumn, int toColumn)
1073   {
1074     if (toColumn < fromColumn || fromColumn < 1)
1075     {
1076       return null;
1077     }
1078
1079     /*
1080      * find the first non-gapped position, if any
1081      */
1082     int firstPosition = 0;
1083     int col = fromColumn - 1;
1084     int length = sequence.length;
1085     while (col < length && col < toColumn)
1086     {
1087       if (!Comparison.isGap(sequence[col]))
1088       {
1089         firstPosition = findPosition(col++);
1090         break;
1091       }
1092       col++;
1093     }
1094
1095     if (firstPosition == 0)
1096     {
1097       return null;
1098     }
1099
1100     /*
1101      * find the last non-gapped position
1102      */
1103     int lastPosition = firstPosition;
1104     while (col < length && col < toColumn)
1105     {
1106       if (!Comparison.isGap(sequence[col++]))
1107       {
1108         lastPosition++;
1109       }
1110     }
1111
1112     return new Range(firstPosition, lastPosition);
1113   }
1114
1115   /**
1116    * Returns an int array where indices correspond to each residue in the
1117    * sequence and the element value gives its position in the alignment
1118    *
1119    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
1120    *         residues in SequenceI object
1121    */
1122   @Override
1123   public int[] gapMap()
1124   {
1125     String seq = jalview.analysis.AlignSeq.extractGaps(
1126             jalview.util.Comparison.GapChars, new String(sequence));
1127     int[] map = new int[seq.length()];
1128     int j = 0;
1129     int p = 0;
1130
1131     while (j < sequence.length)
1132     {
1133       if (!jalview.util.Comparison.isGap(sequence[j]))
1134       {
1135         map[p++] = j;
1136       }
1137
1138       j++;
1139     }
1140
1141     return map;
1142   }
1143
1144   /**
1145    * Build a bitset corresponding to sequence gaps
1146    *
1147    * @return a BitSet where set values correspond to gaps in the sequence
1148    */
1149   @Override
1150   public BitSet gapBitset()
1151   {
1152     BitSet gaps = new BitSet(sequence.length);
1153     int j = 0;
1154     while (j < sequence.length)
1155     {
1156       if (jalview.util.Comparison.isGap(sequence[j]))
1157       {
1158         gaps.set(j);
1159       }
1160       j++;
1161     }
1162     return gaps;
1163   }
1164
1165   @Override
1166   public int[] findPositionMap()
1167   {
1168     int map[] = new int[sequence.length];
1169     int j = 0;
1170     int pos = start;
1171     int seqlen = sequence.length;
1172     while ((j < seqlen))
1173     {
1174       map[j] = pos;
1175       if (!jalview.util.Comparison.isGap(sequence[j]))
1176       {
1177         pos++;
1178       }
1179
1180       j++;
1181     }
1182     return map;
1183   }
1184
1185   @Override
1186   public List<int[]> getInsertions()
1187   {
1188     ArrayList<int[]> map = new ArrayList<>();
1189     int lastj = -1, j = 0;
1190     int pos = start;
1191     int seqlen = sequence.length;
1192     while ((j < seqlen))
1193     {
1194       if (jalview.util.Comparison.isGap(sequence[j]))
1195       {
1196         if (lastj == -1)
1197         {
1198           lastj = j;
1199         }
1200       }
1201       else
1202       {
1203         if (lastj != -1)
1204         {
1205           map.add(new int[] { lastj, j - 1 });
1206           lastj = -1;
1207         }
1208       }
1209       j++;
1210     }
1211     if (lastj != -1)
1212     {
1213       map.add(new int[] { lastj, j - 1 });
1214       lastj = -1;
1215     }
1216     return map;
1217   }
1218
1219   @Override
1220   public BitSet getInsertionsAsBits()
1221   {
1222     BitSet map = new BitSet();
1223     int lastj = -1, j = 0;
1224     int pos = start;
1225     int seqlen = sequence.length;
1226     while ((j < seqlen))
1227     {
1228       if (jalview.util.Comparison.isGap(sequence[j]))
1229       {
1230         if (lastj == -1)
1231         {
1232           lastj = j;
1233         }
1234       }
1235       else
1236       {
1237         if (lastj != -1)
1238         {
1239           map.set(lastj, j);
1240           lastj = -1;
1241         }
1242       }
1243       j++;
1244     }
1245     if (lastj != -1)
1246     {
1247       map.set(lastj, j);
1248       lastj = -1;
1249     }
1250     return map;
1251   }
1252
1253   @Override
1254   public void deleteChars(final int i, final int j)
1255   {
1256     int newstart = start, newend = end;
1257     if (i >= sequence.length || i < 0)
1258     {
1259       return;
1260     }
1261
1262     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1263     boolean createNewDs = false;
1264     // TODO: take a (second look) at the dataset creation validation method for
1265     // the very large sequence case
1266     int startIndex = findIndex(start) - 1;
1267     int endIndex = findIndex(end) - 1;
1268     int startDeleteColumn = -1; // for dataset sequence deletions
1269     int deleteCount = 0;
1270
1271     for (int s = i; s < j; s++)
1272     {
1273       if (Comparison.isGap(sequence[s]))
1274       {
1275         continue;
1276       }
1277       deleteCount++;
1278       if (startDeleteColumn == -1)
1279       {
1280         startDeleteColumn = findPosition(s) - start;
1281       }
1282       if (createNewDs)
1283       {
1284         newend--;
1285       }
1286       else
1287       {
1288         if (startIndex == s)
1289         {
1290           /*
1291            * deleting characters from start of sequence; new start is the
1292            * sequence position of the next column (position to the right
1293            * if the column position is gapped)
1294            */
1295           newstart = findPosition(j);
1296           break;
1297         }
1298         else
1299         {
1300           if (endIndex < j)
1301           {
1302             /*
1303              * deleting characters at end of sequence; new end is the sequence
1304              * position of the column before the deletion; subtract 1 if this is
1305              * gapped since findPosition returns the next sequence position
1306              */
1307             newend = findPosition(i - 1);
1308             if (Comparison.isGap(sequence[i - 1]))
1309             {
1310               newend--;
1311             }
1312             break;
1313           }
1314           else
1315           {
1316             createNewDs = true;
1317             newend--;
1318           }
1319         }
1320       }
1321     }
1322
1323     if (createNewDs && this.datasetSequence != null)
1324     {
1325       /*
1326        * if deletion occured in the middle of the sequence,
1327        * construct a new dataset sequence and delete the residues
1328        * that were deleted from the aligned sequence
1329        */
1330       Sequence ds = new Sequence(datasetSequence);
1331       ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount);
1332       datasetSequence = ds;
1333       // TODO: remove any non-inheritable properties ?
1334       // TODO: create a sequence mapping (since there is a relation here ?)
1335     }
1336     start = newstart;
1337     end = newend;
1338     sequence = tmp;
1339     sequenceChanged();
1340   }
1341
1342   @Override
1343   public void insertCharAt(int i, int length, char c)
1344   {
1345     char[] tmp = new char[sequence.length + length];
1346
1347     if (i >= sequence.length)
1348     {
1349       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1350       i = sequence.length;
1351     }
1352     else
1353     {
1354       System.arraycopy(sequence, 0, tmp, 0, i);
1355     }
1356
1357     int index = i;
1358     while (length > 0)
1359     {
1360       tmp[index++] = c;
1361       length--;
1362     }
1363
1364     if (i < sequence.length)
1365     {
1366       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1367     }
1368
1369     sequence = tmp;
1370     sequenceChanged();
1371   }
1372
1373   @Override
1374   public void insertCharAt(int i, char c)
1375   {
1376     insertCharAt(i, 1, c);
1377   }
1378
1379   @Override
1380   public String getVamsasId()
1381   {
1382     return vamsasId;
1383   }
1384
1385   @Override
1386   public void setVamsasId(String id)
1387   {
1388     vamsasId = id;
1389   }
1390
1391   @Override
1392   public void setDBRefs(DBRefEntry[] dbref)
1393   {
1394     if (dbrefs == null && datasetSequence != null
1395             && this != datasetSequence)
1396     {
1397       datasetSequence.setDBRefs(dbref);
1398       return;
1399     }
1400     dbrefs = dbref;
1401     if (dbrefs != null)
1402     {
1403       DBRefUtils.ensurePrimaries(this);
1404     }
1405   }
1406
1407   @Override
1408   public DBRefEntry[] getDBRefs()
1409   {
1410     if (dbrefs == null && datasetSequence != null
1411             && this != datasetSequence)
1412     {
1413       return datasetSequence.getDBRefs();
1414     }
1415     return dbrefs;
1416   }
1417
1418   @Override
1419   public void addDBRef(DBRefEntry entry)
1420   {
1421     if (datasetSequence != null)
1422     {
1423       datasetSequence.addDBRef(entry);
1424       return;
1425     }
1426
1427     if (dbrefs == null)
1428     {
1429       dbrefs = new DBRefEntry[0];
1430     }
1431
1432     for (DBRefEntryI dbr : dbrefs)
1433     {
1434       if (dbr.updateFrom(entry))
1435       {
1436         /*
1437          * found a dbref that either matched, or could be
1438          * updated from, the new entry - no need to add it
1439          */
1440         return;
1441       }
1442     }
1443
1444     /*
1445      * extend the array to make room for one more
1446      */
1447     // TODO use an ArrayList instead
1448     int j = dbrefs.length;
1449     DBRefEntry[] temp = new DBRefEntry[j + 1];
1450     System.arraycopy(dbrefs, 0, temp, 0, j);
1451     temp[temp.length - 1] = entry;
1452
1453     dbrefs = temp;
1454
1455     DBRefUtils.ensurePrimaries(this);
1456   }
1457
1458   @Override
1459   public void setDatasetSequence(SequenceI seq)
1460   {
1461     if (seq == this)
1462     {
1463       throw new IllegalArgumentException(
1464               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1465     }
1466     if (seq != null && seq.getDatasetSequence() != null)
1467     {
1468       throw new IllegalArgumentException(
1469               "Implementation error: cascading dataset sequences are not allowed.");
1470     }
1471     datasetSequence = seq;
1472   }
1473
1474   @Override
1475   public SequenceI getDatasetSequence()
1476   {
1477     return datasetSequence;
1478   }
1479
1480   @Override
1481   public AlignmentAnnotation[] getAnnotation()
1482   {
1483     return annotation == null ? null
1484             : annotation
1485                     .toArray(new AlignmentAnnotation[annotation.size()]);
1486   }
1487
1488   @Override
1489   public boolean hasAnnotation(AlignmentAnnotation ann)
1490   {
1491     return annotation == null ? false : annotation.contains(ann);
1492   }
1493
1494   @Override
1495   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1496   {
1497     if (this.annotation == null)
1498     {
1499       this.annotation = new Vector<>();
1500     }
1501     if (!this.annotation.contains(annotation))
1502     {
1503       this.annotation.addElement(annotation);
1504     }
1505     annotation.setSequenceRef(this);
1506   }
1507
1508   @Override
1509   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1510   {
1511     if (this.annotation != null)
1512     {
1513       this.annotation.removeElement(annotation);
1514       if (this.annotation.size() == 0)
1515       {
1516         this.annotation = null;
1517       }
1518     }
1519   }
1520
1521   /**
1522    * test if this is a valid candidate for another sequence's dataset sequence.
1523    *
1524    */
1525   private boolean isValidDatasetSequence()
1526   {
1527     if (datasetSequence != null)
1528     {
1529       return false;
1530     }
1531     for (int i = 0; i < sequence.length; i++)
1532     {
1533       if (jalview.util.Comparison.isGap(sequence[i]))
1534       {
1535         return false;
1536       }
1537     }
1538     return true;
1539   }
1540
1541   @Override
1542   public SequenceI deriveSequence()
1543   {
1544     Sequence seq = null;
1545     if (datasetSequence == null)
1546     {
1547       if (isValidDatasetSequence())
1548       {
1549         // Use this as dataset sequence
1550         seq = new Sequence(getName(), "", 1, -1);
1551         seq.setDatasetSequence(this);
1552         seq.initSeqFrom(this, getAnnotation());
1553         return seq;
1554       }
1555       else
1556       {
1557         // Create a new, valid dataset sequence
1558         createDatasetSequence();
1559       }
1560     }
1561     return new Sequence(this);
1562   }
1563
1564   private boolean _isNa;
1565
1566   private int _seqhash = 0;
1567
1568   /**
1569    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1570    * true
1571    */
1572   @Override
1573   public boolean isProtein()
1574   {
1575     if (datasetSequence != null)
1576     {
1577       return datasetSequence.isProtein();
1578     }
1579     if (_seqhash != sequence.hashCode())
1580     {
1581       _seqhash = sequence.hashCode();
1582       _isNa = Comparison.isNucleotide(this);
1583     }
1584     return !_isNa;
1585   };
1586
1587   /*
1588    * (non-Javadoc)
1589    *
1590    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1591    */
1592   @Override
1593   public SequenceI createDatasetSequence()
1594   {
1595     if (datasetSequence == null)
1596     {
1597       Sequence dsseq = new Sequence(getName(),
1598               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1599                       getSequenceAsString()),
1600               getStart(), getEnd());
1601
1602       datasetSequence = dsseq;
1603
1604       dsseq.setDescription(description);
1605       // move features and database references onto dataset sequence
1606       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1607       sequenceFeatureStore = null;
1608       dsseq.dbrefs = dbrefs;
1609       dbrefs = null;
1610       // TODO: search and replace any references to this sequence with
1611       // references to the dataset sequence in Mappings on dbref
1612       dsseq.pdbIds = pdbIds;
1613       pdbIds = null;
1614       datasetSequence.updatePDBIds();
1615       if (annotation != null)
1616       {
1617         // annotation is cloned rather than moved, to preserve what's currently
1618         // on the alignment
1619         for (AlignmentAnnotation aa : annotation)
1620         {
1621           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1622           _aa.sequenceRef = datasetSequence;
1623           _aa.adjustForAlignment(); // uses annotation's own record of
1624                                     // sequence-column mapping
1625           datasetSequence.addAlignmentAnnotation(_aa);
1626         }
1627       }
1628     }
1629     return datasetSequence;
1630   }
1631
1632   /*
1633    * (non-Javadoc)
1634    *
1635    * @see
1636    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1637    * annotations)
1638    */
1639   @Override
1640   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1641   {
1642     if (annotation != null)
1643     {
1644       annotation.removeAllElements();
1645     }
1646     if (annotations != null)
1647     {
1648       for (int i = 0; i < annotations.length; i++)
1649       {
1650         if (annotations[i] != null)
1651         {
1652           addAlignmentAnnotation(annotations[i]);
1653         }
1654       }
1655     }
1656   }
1657
1658   @Override
1659   public AlignmentAnnotation[] getAnnotation(String label)
1660   {
1661     if (annotation == null || annotation.size() == 0)
1662     {
1663       return null;
1664     }
1665
1666     Vector<AlignmentAnnotation> subset = new Vector<>();
1667     Enumeration<AlignmentAnnotation> e = annotation.elements();
1668     while (e.hasMoreElements())
1669     {
1670       AlignmentAnnotation ann = e.nextElement();
1671       if (ann.label != null && ann.label.equals(label))
1672       {
1673         subset.addElement(ann);
1674       }
1675     }
1676     if (subset.size() == 0)
1677     {
1678       return null;
1679     }
1680     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1681     int i = 0;
1682     e = subset.elements();
1683     while (e.hasMoreElements())
1684     {
1685       anns[i++] = e.nextElement();
1686     }
1687     subset.removeAllElements();
1688     return anns;
1689   }
1690
1691   @Override
1692   public boolean updatePDBIds()
1693   {
1694     if (datasetSequence != null)
1695     {
1696       // TODO: could merge DBRefs
1697       return datasetSequence.updatePDBIds();
1698     }
1699     if (dbrefs == null || dbrefs.length == 0)
1700     {
1701       return false;
1702     }
1703     boolean added = false;
1704     for (DBRefEntry dbr : dbrefs)
1705     {
1706       if (DBRefSource.PDB.equals(dbr.getSource()))
1707       {
1708         /*
1709          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1710          * PDB id is not already present in a 'matching' PDBEntry
1711          * Constructor parses out a chain code if appended to the accession id
1712          * (a fudge used to 'store' the chain code in the DBRef)
1713          */
1714         PDBEntry pdbe = new PDBEntry(dbr);
1715         added |= addPDBId(pdbe);
1716       }
1717     }
1718     return added;
1719   }
1720
1721   @Override
1722   public void transferAnnotation(SequenceI entry, Mapping mp)
1723   {
1724     if (datasetSequence != null)
1725     {
1726       datasetSequence.transferAnnotation(entry, mp);
1727       return;
1728     }
1729     if (entry.getDatasetSequence() != null)
1730     {
1731       transferAnnotation(entry.getDatasetSequence(), mp);
1732       return;
1733     }
1734     // transfer any new features from entry onto sequence
1735     if (entry.getSequenceFeatures() != null)
1736     {
1737
1738       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1739       for (SequenceFeature feature : sfs)
1740       {
1741        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1742                 : new SequenceFeature[] { new SequenceFeature(feature) };
1743         if (sf != null)
1744         {
1745           for (int sfi = 0; sfi < sf.length; sfi++)
1746           {
1747             addSequenceFeature(sf[sfi]);
1748           }
1749         }
1750       }
1751     }
1752
1753     // transfer PDB entries
1754     if (entry.getAllPDBEntries() != null)
1755     {
1756       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1757       while (e.hasMoreElements())
1758       {
1759         PDBEntry pdb = e.nextElement();
1760         addPDBId(pdb);
1761       }
1762     }
1763     // transfer database references
1764     DBRefEntry[] entryRefs = entry.getDBRefs();
1765     if (entryRefs != null)
1766     {
1767       for (int r = 0; r < entryRefs.length; r++)
1768       {
1769         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1770         if (newref.getMap() != null && mp != null)
1771         {
1772           // remap ref using our local mapping
1773         }
1774         // we also assume all version string setting is done by dbSourceProxy
1775         /*
1776          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1777          * newref.setSource(dbSource); }
1778          */
1779         addDBRef(newref);
1780       }
1781     }
1782   }
1783
1784   @Override
1785   public void setRNA(RNA r)
1786   {
1787     rna = r;
1788   }
1789
1790   @Override
1791   public RNA getRNA()
1792   {
1793     return rna;
1794   }
1795
1796   @Override
1797   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1798           String label)
1799   {
1800     List<AlignmentAnnotation> result = new ArrayList<>();
1801     if (this.annotation != null)
1802     {
1803       for (AlignmentAnnotation ann : annotation)
1804       {
1805         if (ann.calcId != null && ann.calcId.equals(calcId)
1806                 && ann.label != null && ann.label.equals(label))
1807         {
1808           result.add(ann);
1809         }
1810       }
1811     }
1812     return result;
1813   }
1814
1815   @Override
1816   public String toString()
1817   {
1818     return getDisplayId(false);
1819   }
1820
1821   @Override
1822   public PDBEntry getPDBEntry(String pdbIdStr)
1823   {
1824     if (getDatasetSequence() != null)
1825     {
1826       return getDatasetSequence().getPDBEntry(pdbIdStr);
1827     }
1828     if (pdbIds == null)
1829     {
1830       return null;
1831     }
1832     List<PDBEntry> entries = getAllPDBEntries();
1833     for (PDBEntry entry : entries)
1834     {
1835       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1836       {
1837         return entry;
1838       }
1839     }
1840     return null;
1841   }
1842
1843   @Override
1844   public List<DBRefEntry> getPrimaryDBRefs()
1845   {
1846     if (datasetSequence != null)
1847     {
1848       return datasetSequence.getPrimaryDBRefs();
1849     }
1850     if (dbrefs == null || dbrefs.length == 0)
1851     {
1852       return Collections.emptyList();
1853     }
1854     synchronized (dbrefs)
1855     {
1856       List<DBRefEntry> primaries = new ArrayList<>();
1857       DBRefEntry[] tmp = new DBRefEntry[1];
1858       for (DBRefEntry ref : dbrefs)
1859       {
1860         if (!ref.isPrimaryCandidate())
1861         {
1862           continue;
1863         }
1864         if (ref.hasMap())
1865         {
1866           MapList mp = ref.getMap().getMap();
1867           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1868           {
1869             // map only involves a subsequence, so cannot be primary
1870             continue;
1871           }
1872         }
1873         // whilst it looks like it is a primary ref, we also sanity check type
1874         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1875                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1876         {
1877           // PDB dbrefs imply there should be a PDBEntry associated
1878           // TODO: tighten PDB dbrefs
1879           // formally imply Jalview has actually downloaded and
1880           // parsed the pdb file. That means there should be a cached file
1881           // handle on the PDBEntry, and a real mapping between sequence and
1882           // extracted sequence from PDB file
1883           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1884           if (pdbentry != null && pdbentry.getFile() != null)
1885           {
1886             primaries.add(ref);
1887           }
1888           continue;
1889         }
1890         // check standard protein or dna sources
1891         tmp[0] = ref;
1892         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1893         if (res != null && res[0] == tmp[0])
1894         {
1895           primaries.add(ref);
1896           continue;
1897         }
1898       }
1899       return primaries;
1900     }
1901   }
1902
1903   /**
1904    * {@inheritDoc}
1905    */
1906   @Override
1907   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1908           String... types)
1909   {
1910     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1911     int endPos = fromColumn == toColumn ? startPos
1912             : findPosition(toColumn - 1);
1913
1914     List<SequenceFeature> result = getFeatures().findFeatures(startPos,
1915             endPos, types);
1916
1917     /*
1918      * if end column is gapped, endPos may be to the right,
1919      * and we may have included adjacent or enclosing features;
1920      * remove any that are not enclosing, non-contact features
1921      */
1922     boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
1923             && Comparison.isGap(sequence[toColumn - 1]);
1924     if (endPos > this.end || endColumnIsGapped)
1925     {
1926       ListIterator<SequenceFeature> it = result.listIterator();
1927       while (it.hasNext())
1928       {
1929         SequenceFeature sf = it.next();
1930         int sfBegin = sf.getBegin();
1931         int sfEnd = sf.getEnd();
1932         int featureStartColumn = findIndex(sfBegin);
1933         if (featureStartColumn > toColumn)
1934         {
1935           it.remove();
1936         }
1937         else if (featureStartColumn < fromColumn)
1938         {
1939           int featureEndColumn = sfEnd == sfBegin ? featureStartColumn
1940                   : findIndex(sfEnd);
1941           if (featureEndColumn < fromColumn)
1942           {
1943             it.remove();
1944           }
1945           else if (featureEndColumn > toColumn && sf.isContactFeature())
1946           {
1947             /*
1948              * remove an enclosing feature if it is a contact feature
1949              */
1950             it.remove();
1951           }
1952         }
1953       }
1954     }
1955
1956     return result;
1957   }
1958
1959   /**
1960    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1961    * token that has to match the one presented by the cursor
1962    */
1963   @Override
1964   public void sequenceChanged()
1965   {
1966     changeCount++;
1967   }
1968
1969   /**
1970    * {@inheritDoc}
1971    */
1972   @Override
1973   public int replace(char c1, char c2)
1974   {
1975     if (c1 == c2)
1976     {
1977       return 0;
1978     }
1979     int count = 0;
1980     synchronized (sequence)
1981     {
1982       for (int c = 0; c < sequence.length; c++)
1983       {
1984         if (sequence[c] == c1)
1985         {
1986           sequence[c] = c2;
1987           count++;
1988         }
1989       }
1990     }
1991     if (count > 0)
1992     {
1993       sequenceChanged();
1994     }
1995
1996     return count;
1997   }
1998
1999   @Override
2000   public String getSequenceStringFromIterator(Iterator<int[]> it)
2001   {
2002     StringBuilder newSequence = new StringBuilder();
2003     while (it.hasNext())
2004     {
2005       int[] block = it.next();
2006       if (it.hasNext())
2007       {
2008         newSequence.append(getSequence(block[0], block[1] + 1));
2009       }
2010       else
2011       {
2012         newSequence.append(getSequence(block[0], block[1]));
2013       }
2014     }
2015
2016     return newSequence.toString();
2017   }
2018
2019   @Override
2020   public int firstResidueOutsideIterator(Iterator<int[]> regions)
2021   {
2022     int start = 0;
2023
2024     if (!regions.hasNext())
2025     {
2026       return findIndex(getStart()) - 1;
2027     }
2028
2029     // Simply walk along the sequence whilst watching for region
2030     // boundaries
2031     int hideStart = getLength();
2032     int hideEnd = -1;
2033     boolean foundStart = false;
2034
2035     // step through the non-gapped positions of the sequence
2036     for (int i = getStart(); i <= getEnd() && (!foundStart); i++)
2037     {
2038       // get alignment position of this residue in the sequence
2039       int p = findIndex(i) - 1;
2040
2041       // update region start/end
2042       while (hideEnd < p && regions.hasNext())
2043       {
2044         int[] region = regions.next();
2045         hideStart = region[0];
2046         hideEnd = region[1];
2047       }
2048       if (hideEnd < p)
2049       {
2050         hideStart = getLength();
2051       }
2052       // update boundary for sequence
2053       if (p < hideStart)
2054       {
2055         start = p;
2056         foundStart = true;
2057       }
2058     }
2059
2060     if (foundStart)
2061     {
2062       return start;
2063     }
2064     // otherwise, sequence was completely hidden
2065     return 0;
2066   }
2067 }