src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31 import jalview.workers.InformationThread;
  32
  33 import java.util.ArrayList;
  34 import java.util.Arrays;
  35 import java.util.BitSet;
  36 import java.util.Collections;
  37 import java.util.Enumeration;
  38 import java.util.Iterator;
  39 import java.util.List;
  40 import java.util.ListIterator;
  41 import java.util.Vector;
  42
  43 import fr.orsay.lri.varna.models.rna.RNA;
  44
  45 /**
  46  *
  47  * Implements the SequenceI interface for a char[] based sequence object
  48  */
  49 public class Sequence extends ASequence implements SequenceI
  50 {
  51   SequenceI datasetSequence;
  52
  53   String name;
  54
  55   private char[] sequence;
  56
  57   String description;
  58
  59   int start;
  60
  61   int end;
  62
  63   HiddenMarkovModel hmm;
  64
  65   boolean isHMMConsensusSequence = false;
  66
  67   Vector<PDBEntry> pdbIds;
  68
  69   String vamsasId;
  70
  71   DBRefEntry[] dbrefs;
  72
  73   RNA rna;
  74
  75   /**
  76    * This annotation is displayed below the alignment but the positions are tied
  77    * to the residues of this sequence
  78    *
  79    * TODO: change to List<>
  80    */
  81   Vector<AlignmentAnnotation> annotation;
  82
  83   private SequenceFeaturesI sequenceFeatureStore;
  84
  85   /*
  86    * A cursor holding the approximate current view position to the sequence,
  87    * as determined by findIndex or findPosition or findPositions.
  88    * Using a cursor as a hint allows these methods to be more performant for
  89    * large sequences.
  90    */
  91   private SequenceCursor cursor;
  92
  93   /*
  94    * A number that should be incremented whenever the sequence is edited.
  95    * If the value matches the cursor token, then we can trust the cursor,
  96    * if not then it should be recomputed.
  97    */
  98   private int changeCount;
  99
 100   /**
 101    * Creates a new Sequence object.
 102    *
 103    * @param name
 104    *          display name string
 105    * @param sequence
 106    *          string to form a possibly gapped sequence out of
 107    * @param start
 108    *          first position of non-gap residue in the sequence
 109    * @param end
 110    *          last position of ungapped residues (nearly always only used for
 111    *          display purposes)
 112    */
 113   public Sequence(String name, String sequence, int start, int end)
 114   {
 115     this();
 116     initSeqAndName(name, sequence.toCharArray(), start, end);
 117   }
 118
 119   public Sequence(String name, char[] sequence, int start, int end)
 120   {
 121     this();
 122     initSeqAndName(name, sequence, start, end);
 123   }
 124
 125   /**
 126    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 127    * start and end are updated values from name2 if it ends with /start-end
 128    *
 129    * @param name2
 130    * @param sequence2
 131    * @param start2
 132    * @param end2
 133    */
 134   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 135           int end2)
 136   {
 137     this.name = name2;
 138     this.sequence = sequence2;
 139     this.start = start2;
 140     this.end = end2;
 141     parseId();
 142     checkValidRange();
 143   }
 144
 145   /**
 146    * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 147    * start and end respectively and removes the suffix from the name
 148    */
 149   void parseId()
 150   {
 151     if (name == null)
 152     {
 153       System.err.println(
 154               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 155       name = "";
 156     }
 157     int slashPos = name.lastIndexOf('/');
 158     if (slashPos > -1 && slashPos < name.length() - 1)
 159     {
 160       String suffix = name.substring(slashPos + 1);
 161       String[] range = suffix.split("-");
 162       if (range.length == 2)
 163       {
 164         try
 165         {
 166           int from = Integer.valueOf(range[0]);
 167           int to = Integer.valueOf(range[1]);
 168           if (from > 0 && to >= from)
 169           {
 170             name = name.substring(0, slashPos);
 171             setStart(from);
 172             setEnd(to);
 173             checkValidRange();
 174           }
 175         } catch (NumberFormatException e)
 176         {
 177           // leave name unchanged if suffix is invalid
 178         }
 179       }
 180     }
 181   }
 182
 183   /**
 184    * Ensures that 'end' is not before the end of the sequence, that is,
 185    * (end-start+1) is at least as long as the count of ungapped positions. Note
 186    * that end is permitted to be beyond the end of the sequence data.
 187    */
 188   void checkValidRange()
 189   {
 190     // Note: JAL-774 :
 191     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 192     {
 193       int endRes = 0;
 194       for (int j = 0; j < sequence.length; j++)
 195       {
 196         if (!Comparison.isGap(sequence[j]))
 197         {
 198           endRes++;
 199         }
 200       }
 201       if (endRes > 0)
 202       {
 203         endRes += start - 1;
 204       }
 205
 206       if (end < endRes)
 207       {
 208         end = endRes;
 209       }
 210     }
 211
 212   }
 213
 214   /**
 215    * default constructor
 216    */
 217   private Sequence()
 218   {
 219     sequenceFeatureStore = new SequenceFeatures();
 220   }
 221
 222   /**
 223    * Creates a new Sequence object.
 224    *
 225    * @param name
 226    *          DOCUMENT ME!
 227    * @param sequence
 228    *          DOCUMENT ME!
 229    */
 230   public Sequence(String name, String sequence)
 231   {
 232     this(name, sequence, 1, -1);
 233   }
 234
 235   /**
 236    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 237    * any existing dataset sequence reference. If non exists, everything is
 238    * copied.
 239    *
 240    * @param seq
 241    *          if seq is a dataset sequence, behaves like a plain old copy
 242    *          constructor
 243    */
 244   public Sequence(SequenceI seq)
 245   {
 246     this(seq, seq.getAnnotation());
 247   }
 248
 249   /**
 250    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 251    * but inherits any existing dataset sequence reference, and duplicate of any
 252    * annotation that is present in the given annotation array.
 253    *
 254    * @param seq
 255    *          the sequence to be copied
 256    * @param alAnnotation
 257    *          an array of annotation including some associated with seq
 258    */
 259   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 260   {
 261     this();
 262     initSeqFrom(seq, alAnnotation);
 263   }
 264
 265   /**
 266    * does the heavy lifting when cloning a dataset sequence, or coping data from
 267    * dataset to a new derived sequence.
 268    *
 269    * @param seq
 270    *          - source of attributes.
 271    * @param alAnnotation
 272    *          - alignment annotation present on seq that should be copied onto
 273    *          this sequence
 274    */
 275   protected void initSeqFrom(SequenceI seq,
 276           AlignmentAnnotation[] alAnnotation)
 277   {
 278     char[] oseq = seq.getSequence(); // returns a copy of the array
 279     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 280
 281     description = seq.getDescription();
 282     if (seq != datasetSequence)
 283     {
 284       setDatasetSequence(seq.getDatasetSequence());
 285     }
 286
 287     /*
 288      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 289      */
 290     if (datasetSequence == null)
 291     {
 292       if (seq.getDBRefs() != null)
 293       {
 294         DBRefEntry[] dbr = seq.getDBRefs();
 295         for (int i = 0; i < dbr.length; i++)
 296         {
 297           addDBRef(new DBRefEntry(dbr[i]));
 298         }
 299       }
 300
 301       /*
 302        * make copies of any sequence features
 303        */
 304       for (SequenceFeature sf : seq.getSequenceFeatures())
 305       {
 306         addSequenceFeature(new SequenceFeature(sf));
 307       }
 308     }
 309
 310     if (seq.getAnnotation() != null)
 311     {
 312       AlignmentAnnotation[] sqann = seq.getAnnotation();
 313       for (int i = 0; i < sqann.length; i++)
 314       {
 315         if (sqann[i] == null)
 316         {
 317           continue;
 318         }
 319         boolean found = (alAnnotation == null);
 320         if (!found)
 321         {
 322           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 323           {
 324             found = (alAnnotation[apos] == sqann[i]);
 325           }
 326         }
 327         if (found)
 328         {
 329           // only copy the given annotation
 330           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 331           addAlignmentAnnotation(newann);
 332         }
 333       }
 334     }
 335     if (seq.getAllPDBEntries() != null)
 336     {
 337       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 338       for (PDBEntry pdb : ids)
 339       {
 340         this.addPDBId(new PDBEntry(pdb));
 341       }
 342     }
 343     if (seq.getHMM() != null)
 344     {
 345       this.hmm = new HiddenMarkovModel(seq.getHMM(), this);
 346     }
 347
 348   }
 349
 350   @Override
 351   public void setSequenceFeatures(List<SequenceFeature> features)
 352   {
 353     if (datasetSequence != null)
 354     {
 355       datasetSequence.setSequenceFeatures(features);
 356       return;
 357     }
 358     sequenceFeatureStore = new SequenceFeatures(features);
 359   }
 360
 361   @Override
 362   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 363   {
 364     if (sf.getType() == null)
 365     {
 366       System.err.println("SequenceFeature type may not be null: "
 367               + sf.toString());
 368       return false;
 369     }
 370
 371     if (datasetSequence != null)
 372     {
 373       return datasetSequence.addSequenceFeature(sf);
 374     }
 375
 376     return sequenceFeatureStore.add(sf);
 377   }
 378
 379   @Override
 380   public void deleteFeature(SequenceFeature sf)
 381   {
 382     if (datasetSequence != null)
 383     {
 384       datasetSequence.deleteFeature(sf);
 385     }
 386     else
 387     {
 388       sequenceFeatureStore.delete(sf);
 389     }
 390   }
 391
 392   /**
 393    * {@inheritDoc}
 394    *
 395    * @return
 396    */
 397   @Override
 398   public List<SequenceFeature> getSequenceFeatures()
 399   {
 400     if (datasetSequence != null)
 401     {
 402       return datasetSequence.getSequenceFeatures();
 403     }
 404     return sequenceFeatureStore.getAllFeatures();
 405   }
 406
 407   @Override
 408   public SequenceFeaturesI getFeatures()
 409   {
 410     return datasetSequence != null ? datasetSequence.getFeatures()
 411             : sequenceFeatureStore;
 412   }
 413
 414   @Override
 415   public boolean addPDBId(PDBEntry entry)
 416   {
 417     if (pdbIds == null)
 418     {
 419       pdbIds = new Vector<>();
 420       pdbIds.add(entry);
 421       return true;
 422     }
 423
 424     for (PDBEntry pdbe : pdbIds)
 425     {
 426       if (pdbe.updateFrom(entry))
 427       {
 428         return false;
 429       }
 430     }
 431     pdbIds.addElement(entry);
 432     return true;
 433   }
 434
 435   /**
 436    * DOCUMENT ME!
 437    *
 438    * @param id
 439    *          DOCUMENT ME!
 440    */
 441   @Override
 442   public void setPDBId(Vector<PDBEntry> id)
 443   {
 444     pdbIds = id;
 445   }
 446
 447   /**
 448    * DOCUMENT ME!
 449    *
 450    * @return DOCUMENT ME!
 451    */
 452   @Override
 453   public Vector<PDBEntry> getAllPDBEntries()
 454   {
 455     return pdbIds == null ? new Vector<>() : pdbIds;
 456   }
 457
 458   /**
 459    * Answers the sequence name, with '/start-end' appended if jvsuffix is true
 460    *
 461    * @return
 462    */
 463   @Override
 464   public String getDisplayId(boolean jvsuffix)
 465   {
 466     if (!jvsuffix)
 467     {
 468       return name;
 469     }
 470     StringBuilder result = new StringBuilder(name);
 471     result.append("/").append(start).append("-").append(end);
 472
 473     return result.toString();
 474   }
 475
 476   /**
 477    * Sets the sequence name. If the name ends in /start-end, then the start-end
 478    * values are parsed out and set, and the suffix is removed from the name.
 479    *
 480    * @param theName
 481    */
 482   @Override
 483   public void setName(String theName)
 484   {
 485     this.name = theName;
 486     this.parseId();
 487   }
 488
 489   /**
 490    * DOCUMENT ME!
 491    *
 492    * @return DOCUMENT ME!
 493    */
 494   @Override
 495   public String getName()
 496   {
 497     return this.name;
 498   }
 499
 500   /**
 501    * DOCUMENT ME!
 502    *
 503    * @param start
 504    *          DOCUMENT ME!
 505    */
 506   @Override
 507   public void setStart(int start)
 508   {
 509     this.start = start;
 510   }
 511
 512   /**
 513    * DOCUMENT ME!
 514    *
 515    * @return DOCUMENT ME!
 516    */
 517   @Override
 518   public int getStart()
 519   {
 520     return this.start;
 521   }
 522
 523   /**
 524    * DOCUMENT ME!
 525    *
 526    * @param end
 527    *          DOCUMENT ME!
 528    */
 529   @Override
 530   public void setEnd(int end)
 531   {
 532     this.end = end;
 533   }
 534
 535   /**
 536    * DOCUMENT ME!
 537    *
 538    * @return DOCUMENT ME!
 539    */
 540   @Override
 541   public int getEnd()
 542   {
 543     return this.end;
 544   }
 545
 546   /**
 547    * DOCUMENT ME!
 548    *
 549    * @return DOCUMENT ME!
 550    */
 551   @Override
 552   public int getLength()
 553   {
 554     return this.sequence.length;
 555   }
 556
 557   /**
 558    * DOCUMENT ME!
 559    *
 560    * @param seq
 561    *          DOCUMENT ME!
 562    */
 563   @Override
 564   public void setSequence(String seq)
 565   {
 566     this.sequence = seq.toCharArray();
 567     checkValidRange();
 568     sequenceChanged();
 569   }
 570
 571   @Override
 572   public String getSequenceAsString()
 573   {
 574     return new String(sequence);
 575   }
 576
 577   @Override
 578   public String getSequenceAsString(int start, int end)
 579   {
 580     return new String(getSequence(start, end));
 581   }
 582
 583   @Override
 584   public char[] getSequence()
 585   {
 586     // return sequence;
 587     return sequence == null ? null : Arrays.copyOf(sequence,
 588             sequence.length);
 589   }
 590
 591   /*
 592    * (non-Javadoc)
 593    *
 594    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 595    */
 596   @Override
 597   public char[] getSequence(int start, int end)
 598   {
 599     if (start < 0)
 600     {
 601       start = 0;
 602     }
 603     // JBPNote - left to user to pad the result here (TODO:Decide on this
 604     // policy)
 605     if (start >= sequence.length)
 606     {
 607       return new char[0];
 608     }
 609
 610     if (end >= sequence.length)
 611     {
 612       end = sequence.length;
 613     }
 614
 615     char[] reply = new char[end - start];
 616     System.arraycopy(sequence, start, reply, 0, end - start);
 617
 618     return reply;
 619   }
 620
 621   @Override
 622   public SequenceI getSubSequence(int start, int end)
 623   {
 624     if (start < 0)
 625     {
 626       start = 0;
 627     }
 628     char[] seq = getSequence(start, end);
 629     if (seq.length == 0)
 630     {
 631       return null;
 632     }
 633     int nstart = findPosition(start);
 634     int nend = findPosition(end) - 1;
 635     // JBPNote - this is an incomplete copy.
 636     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 637     nseq.setDescription(description);
 638     if (datasetSequence != null)
 639     {
 640       nseq.setDatasetSequence(datasetSequence);
 641     }
 642     else
 643     {
 644       nseq.setDatasetSequence(this);
 645     }
 646     return nseq;
 647   }
 648
 649   /**
 650    * Returns the character of the aligned sequence at the given position (base
 651    * zero), or space if the position is not within the sequence's bounds
 652    *
 653    * @return
 654    */
 655   @Override
 656   public char getCharAt(int i)
 657   {
 658     if (i >= 0 && i < sequence.length)
 659     {
 660       return sequence[i];
 661     }
 662     else
 663     {
 664       return ' ';
 665     }
 666   }
 667
 668   /**
 669    * Sets the sequence description, and also parses out any special formats of
 670    * interest
 671    *
 672    * @param desc
 673    */
 674   @Override
 675   public void setDescription(String desc)
 676   {
 677     this.description = desc;
 678   }
 679
 680   @Override
 681   public void setGeneLoci(String speciesId, String assemblyId,
 682           String chromosomeId, MapList map)
 683   {
 684     addDBRef(new GeneLocus(speciesId, assemblyId, chromosomeId,
 685             new Mapping(map)));
 686   }
 687
 688   /**
 689    * Returns the gene loci mapping for the sequence (may be null)
 690    *
 691    * @return
 692    */
 693   @Override
 694   public GeneLociI getGeneLoci()
 695   {
 696     DBRefEntry[] refs = getDBRefs();
 697     if (refs != null)
 698     {
 699       for (final DBRefEntry ref : refs)
 700       {
 701         if (ref instanceof GeneLociI)
 702         {
 703           return (GeneLociI) ref;
 704         }
 705       }
 706     }
 707     return null;
 708   }
 709
 710   /**
 711    * Answers the description
 712    *
 713    * @return
 714    */
 715   @Override
 716   public String getDescription()
 717   {
 718     return this.description;
 719   }
 720
 721   /**
 722    * {@inheritDoc}
 723    */
 724   @Override
 725   public int findIndex(int pos)
 726   {
 727     /*
 728      * use a valid, hopefully nearby, cursor if available
 729      */
 730     if (isValidCursor(cursor))
 731     {
 732       return findIndex(pos, cursor);
 733     }
 734
 735     int j = start;
 736     int i = 0;
 737     int startColumn = 0;
 738
 739     /*
 740      * traverse sequence from the start counting gaps; make a note of
 741      * the column of the first residue to save in the cursor
 742      */
 743     while ((i < sequence.length) && (j <= end) && (j <= pos))
 744     {
 745       if (!Comparison.isGap(sequence[i]))
 746       {
 747         if (j == start)
 748         {
 749           startColumn = i;
 750         }
 751         j++;
 752       }
 753       i++;
 754     }
 755
 756     if (j == end && j < pos)
 757     {
 758       return end + 1;
 759     }
 760
 761     updateCursor(pos, i, startColumn);
 762     return i;
 763   }
 764
 765   /**
 766    * Updates the cursor to the latest found residue and column position
 767    *
 768    * @param residuePos
 769    *          (start..)
 770    * @param column
 771    *          (1..)
 772    * @param startColumn
 773    *          column position of the first sequence residue
 774    */
 775   protected void updateCursor(int residuePos, int column, int startColumn)
 776   {
 777     /*
 778      * preserve end residue column provided cursor was valid
 779      */
 780     int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
 781
 782     if (residuePos == this.end)
 783     {
 784       endColumn = column;
 785     }
 786
 787     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 788             endColumn, this.changeCount);
 789   }
 790
 791   /**
 792    * Answers the aligned column position (1..) for the given residue position
 793    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 794    * The hint may be left of, at, or to the right of the required position.
 795    *
 796    * @param pos
 797    * @param curs
 798    * @return
 799    */
 800   protected int findIndex(final int pos, SequenceCursor curs)
 801   {
 802     if (!isValidCursor(curs))
 803     {
 804       /*
 805        * wrong or invalidated cursor, compute de novo
 806        */
 807       return findIndex(pos);
 808     }
 809
 810     if (curs.residuePosition == pos)
 811     {
 812       return curs.columnPosition;
 813     }
 814
 815     /*
 816      * move left or right to find pos from hint.position
 817      */
 818     int col = curs.columnPosition - 1; // convert from base 1 to base 0
 819     int newPos = curs.residuePosition;
 820     int delta = newPos > pos ? -1 : 1;
 821
 822     while (newPos != pos)
 823     {
 824       col += delta; // shift one column left or right
 825       if (col < 0)
 826       {
 827         break;
 828       }
 829       if (col == sequence.length)
 830       {
 831         col--; // return last column if we failed to reach pos
 832         break;
 833       }
 834       if (!Comparison.isGap(sequence[col]))
 835       {
 836         newPos += delta;
 837       }
 838     }
 839
 840     col++; // convert back to base 1
 841
 842     /*
 843      * only update cursor if we found the target position
 844      */
 845     if (newPos == pos)
 846     {
 847       updateCursor(pos, col, curs.firstColumnPosition);
 848     }
 849
 850     return col;
 851   }
 852
 853   /**
 854    * {@inheritDoc}
 855    */
 856   @Override
 857   public int findPosition(final int column)
 858   {
 859     /*
 860      * use a valid, hopefully nearby, cursor if available
 861      */
 862     if (isValidCursor(cursor))
 863     {
 864       return findPosition(column + 1, cursor);
 865     }
 866
 867     // TODO recode this more naturally i.e. count residues only
 868     // as they are found, not 'in anticipation'
 869
 870     /*
 871      * traverse the sequence counting gaps; note the column position
 872      * of the first residue, to save in the cursor
 873      */
 874     int firstResidueColumn = 0;
 875     int lastPosFound = 0;
 876     int lastPosFoundColumn = 0;
 877     int seqlen = sequence.length;
 878
 879     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 880     {
 881       lastPosFound = start;
 882       lastPosFoundColumn = 0;
 883     }
 884
 885     int j = 0;
 886     int pos = start;
 887
 888     while (j < column && j < seqlen)
 889     {
 890       if (!Comparison.isGap(sequence[j]))
 891       {
 892         lastPosFound = pos;
 893         lastPosFoundColumn = j;
 894         if (pos == this.start)
 895         {
 896           firstResidueColumn = j;
 897         }
 898         pos++;
 899       }
 900       j++;
 901     }
 902     if (j < seqlen && !Comparison.isGap(sequence[j]))
 903     {
 904       lastPosFound = pos;
 905       lastPosFoundColumn = j;
 906       if (pos == this.start)
 907       {
 908         firstResidueColumn = j;
 909       }
 910     }
 911
 912     /*
 913      * update the cursor to the last residue position found (if any)
 914      * (converting column position to base 1)
 915      */
 916     if (lastPosFound != 0)
 917     {
 918       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 919               firstResidueColumn + 1);
 920     }
 921
 922     return pos;
 923   }
 924
 925   /**
 926    * Answers true if the given cursor is not null, is for this sequence object,
 927    * and has a token value that matches this object's changeCount, else false.
 928    * This allows us to ignore a cursor as 'stale' if the sequence has been
 929    * modified since the cursor was created.
 930    *
 931    * @param curs
 932    * @return
 933    */
 934   protected boolean isValidCursor(SequenceCursor curs)
 935   {
 936     if (curs == null || curs.sequence != this || curs.token != changeCount)
 937     {
 938       return false;
 939     }
 940     /*
 941      * sanity check against range
 942      */
 943     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 944     {
 945       return false;
 946     }
 947     if (curs.residuePosition < start || curs.residuePosition > end)
 948     {
 949       return false;
 950     }
 951     return true;
 952   }
 953
 954   /**
 955    * Answers the sequence position (start..) for the given aligned column
 956    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 957    * may lie left of, at, or to the right of the column position.
 958    *
 959    * @param col
 960    * @param curs
 961    * @return
 962    */
 963   protected int findPosition(final int col, SequenceCursor curs)
 964   {
 965     if (!isValidCursor(curs))
 966     {
 967       /*
 968        * wrong or invalidated cursor, compute de novo
 969        */
 970       return findPosition(col - 1);// ugh back to base 0
 971     }
 972
 973     if (curs.columnPosition == col)
 974     {
 975       cursor = curs; // in case this method becomes public
 976       return curs.residuePosition; // easy case :-)
 977     }
 978
 979     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 980     {
 981       /*
 982        * sequence lies entirely to the left of col
 983        * - return last residue + 1
 984        */
 985       return end + 1;
 986     }
 987
 988     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
 989     {
 990       /*
 991        * sequence lies entirely to the right of col
 992        * - return first residue
 993        */
 994       return start;
 995     }
 996
 997     // todo could choose closest to col out of column,
 998     // firstColumnPosition, lastColumnPosition as a start point
 999
1000     /*
1001      * move left or right to find pos from cursor position
1002      */
1003     int firstResidueColumn = curs.firstColumnPosition;
1004     int column = curs.columnPosition - 1; // to base 0
1005     int newPos = curs.residuePosition;
1006     int delta = curs.columnPosition > col ? -1 : 1;
1007     boolean gapped = false;
1008     int lastFoundPosition = curs.residuePosition;
1009     int lastFoundPositionColumn = curs.columnPosition;
1010
1011     while (column != col - 1)
1012     {
1013       column += delta; // shift one column left or right
1014       if (column < 0 || column == sequence.length)
1015       {
1016         break;
1017       }
1018       gapped = Comparison.isGap(sequence[column]);
1019       if (!gapped)
1020       {
1021         newPos += delta;
1022         lastFoundPosition = newPos;
1023         lastFoundPositionColumn = column + 1;
1024         if (lastFoundPosition == this.start)
1025         {
1026           firstResidueColumn = column + 1;
1027         }
1028       }
1029     }
1030
1031     if (cursor == null || lastFoundPosition != cursor.residuePosition)
1032     {
1033       updateCursor(lastFoundPosition, lastFoundPositionColumn,
1034               firstResidueColumn);
1035     }
1036
1037     /*
1038      * hack to give position to the right if on a gap
1039      * or beyond the length of the sequence (see JAL-2562)
1040      */
1041     if (delta > 0 && (gapped || column >= sequence.length))
1042     {
1043       newPos++;
1044     }
1045
1046     return newPos;
1047   }
1048
1049   /**
1050    * {@inheritDoc}
1051    */
1052   @Override
1053   public ContiguousI findPositions(int fromColumn, int toColumn)
1054   {
1055     fromColumn = Math.max(fromColumn, 1);
1056     if (toColumn < fromColumn)
1057     {
1058       return null;
1059     }
1060
1061     /*
1062      * find the first non-gapped position, if any
1063      */
1064     int firstPosition = 0;
1065     int col = fromColumn - 1;
1066     int length = sequence.length;
1067     while (col < length && col < toColumn)
1068     {
1069       if (!Comparison.isGap(sequence[col]))
1070       {
1071         firstPosition = findPosition(col++);
1072         break;
1073       }
1074       col++;
1075     }
1076
1077     if (firstPosition == 0)
1078     {
1079       return null;
1080     }
1081
1082     /*
1083      * find the last non-gapped position
1084      */
1085     int lastPosition = firstPosition;
1086     while (col < length && col < toColumn)
1087     {
1088       if (!Comparison.isGap(sequence[col++]))
1089       {
1090         lastPosition++;
1091       }
1092     }
1093
1094     return new Range(firstPosition, lastPosition);
1095   }
1096
1097   /**
1098    * Returns an int array where indices correspond to each residue in the
1099    * sequence and the element value gives its position in the alignment
1100    *
1101    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
1102    *         residues in SequenceI object
1103    */
1104   @Override
1105   public int[] gapMap()
1106   {
1107     String seq = jalview.analysis.AlignSeq.extractGaps(
1108             jalview.util.Comparison.GapChars, new String(sequence));
1109     int[] map = new int[seq.length()];
1110     int j = 0;
1111     int p = 0;
1112
1113     while (j < sequence.length)
1114     {
1115       if (!jalview.util.Comparison.isGap(sequence[j]))
1116       {
1117         map[p++] = j;
1118       }
1119
1120       j++;
1121     }
1122
1123     return map;
1124   }
1125
1126   /**
1127    * Build a bitset corresponding to sequence gaps
1128    *
1129    * @return a BitSet where set values correspond to gaps in the sequence
1130    */
1131   @Override
1132   public BitSet gapBitset()
1133   {
1134     BitSet gaps = new BitSet(sequence.length);
1135     int j = 0;
1136     while (j < sequence.length)
1137     {
1138       if (jalview.util.Comparison.isGap(sequence[j]))
1139       {
1140         gaps.set(j);
1141       }
1142       j++;
1143     }
1144     return gaps;
1145   }
1146
1147   @Override
1148   public int[] findPositionMap()
1149   {
1150     int map[] = new int[sequence.length];
1151     int j = 0;
1152     int pos = start;
1153     int seqlen = sequence.length;
1154     while ((j < seqlen))
1155     {
1156       map[j] = pos;
1157       if (!jalview.util.Comparison.isGap(sequence[j]))
1158       {
1159         pos++;
1160       }
1161
1162       j++;
1163     }
1164     return map;
1165   }
1166
1167   @Override
1168   public List<int[]> getInsertions()
1169   {
1170     ArrayList<int[]> map = new ArrayList<>();
1171     int lastj = -1, j = 0;
1172     int pos = start;
1173     int seqlen = sequence.length;
1174     while ((j < seqlen))
1175     {
1176       if (jalview.util.Comparison.isGap(sequence[j]))
1177       {
1178         if (lastj == -1)
1179         {
1180           lastj = j;
1181         }
1182       }
1183       else
1184       {
1185         if (lastj != -1)
1186         {
1187           map.add(new int[] { lastj, j - 1 });
1188           lastj = -1;
1189         }
1190       }
1191       j++;
1192     }
1193     if (lastj != -1)
1194     {
1195       map.add(new int[] { lastj, j - 1 });
1196       lastj = -1;
1197     }
1198     return map;
1199   }
1200
1201   @Override
1202   public BitSet getInsertionsAsBits()
1203   {
1204     BitSet map = new BitSet();
1205     int lastj = -1, j = 0;
1206     int pos = start;
1207     int seqlen = sequence.length;
1208     while ((j < seqlen))
1209     {
1210       if (jalview.util.Comparison.isGap(sequence[j]))
1211       {
1212         if (lastj == -1)
1213         {
1214           lastj = j;
1215         }
1216       }
1217       else
1218       {
1219         if (lastj != -1)
1220         {
1221           map.set(lastj, j);
1222           lastj = -1;
1223         }
1224       }
1225       j++;
1226     }
1227     if (lastj != -1)
1228     {
1229       map.set(lastj, j);
1230       lastj = -1;
1231     }
1232     return map;
1233   }
1234
1235   @Override
1236   public void deleteChars(final int i, final int j)
1237   {
1238     int newstart = start, newend = end;
1239     if (i >= sequence.length || i < 0)
1240     {
1241       return;
1242     }
1243
1244     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1245     boolean createNewDs = false;
1246     // TODO: take a (second look) at the dataset creation validation method for
1247     // the very large sequence case
1248
1249     int startIndex = findIndex(start) - 1;
1250     int endIndex = findIndex(end) - 1;
1251     int startDeleteColumn = -1; // for dataset sequence deletions
1252     int deleteCount = 0;
1253
1254     for (int s = i; s < j && s < sequence.length; s++)
1255     {
1256       if (Comparison.isGap(sequence[s]))
1257       {
1258         continue;
1259       }
1260       deleteCount++;
1261       if (startDeleteColumn == -1)
1262       {
1263         startDeleteColumn = findPosition(s) - start;
1264       }
1265       if (createNewDs)
1266       {
1267         newend--;
1268       }
1269       else
1270       {
1271         if (startIndex == s)
1272         {
1273           /*
1274            * deleting characters from start of sequence; new start is the
1275            * sequence position of the next column (position to the right
1276            * if the column position is gapped)
1277            */
1278           newstart = findPosition(j);
1279           break;
1280         }
1281         else
1282         {
1283           if (endIndex < j)
1284           {
1285             /*
1286              * deleting characters at end of sequence; new end is the sequence
1287              * position of the column before the deletion; subtract 1 if this is
1288              * gapped since findPosition returns the next sequence position
1289              */
1290             newend = findPosition(i - 1);
1291             if (Comparison.isGap(sequence[i - 1]))
1292             {
1293               newend--;
1294             }
1295             break;
1296           }
1297           else
1298           {
1299             createNewDs = true;
1300             newend--;
1301           }
1302         }
1303       }
1304     }
1305
1306     if (createNewDs && this.datasetSequence != null)
1307     {
1308       /*
1309        * if deletion occured in the middle of the sequence,
1310        * construct a new dataset sequence and delete the residues
1311        * that were deleted from the aligned sequence
1312        */
1313       Sequence ds = new Sequence(datasetSequence);
1314       ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount);
1315       datasetSequence = ds;
1316       // TODO: remove any non-inheritable properties ?
1317       // TODO: create a sequence mapping (since there is a relation here ?)
1318     }
1319     start = newstart;
1320     end = newend;
1321     sequence = tmp;
1322     sequenceChanged();
1323   }
1324
1325   @Override
1326   public void insertCharAt(int i, int length, char c)
1327   {
1328     char[] tmp = new char[sequence.length + length];
1329
1330     if (i >= sequence.length)
1331     {
1332       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1333       i = sequence.length;
1334     }
1335     else
1336     {
1337       System.arraycopy(sequence, 0, tmp, 0, i);
1338     }
1339
1340     int index = i;
1341     while (length > 0)
1342     {
1343       tmp[index++] = c;
1344       length--;
1345     }
1346
1347     if (i < sequence.length)
1348     {
1349       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1350     }
1351
1352     sequence = tmp;
1353     sequenceChanged();
1354   }
1355
1356   @Override
1357   public void insertCharAt(int i, char c)
1358   {
1359     insertCharAt(i, 1, c);
1360   }
1361
1362   @Override
1363   public String getVamsasId()
1364   {
1365     return vamsasId;
1366   }
1367
1368   @Override
1369   public void setVamsasId(String id)
1370   {
1371     vamsasId = id;
1372   }
1373
1374   @Override
1375   public void setDBRefs(DBRefEntry[] dbref)
1376   {
1377     if (dbrefs == null && datasetSequence != null
1378             && this != datasetSequence)
1379     {
1380       datasetSequence.setDBRefs(dbref);
1381       return;
1382     }
1383     dbrefs = dbref;
1384     if (dbrefs != null)
1385     {
1386       DBRefUtils.ensurePrimaries(this);
1387     }
1388   }
1389
1390   @Override
1391   public DBRefEntry[] getDBRefs()
1392   {
1393     if (dbrefs == null && datasetSequence != null
1394             && this != datasetSequence)
1395     {
1396       return datasetSequence.getDBRefs();
1397     }
1398     return dbrefs;
1399   }
1400
1401   @Override
1402   public void addDBRef(DBRefEntry entry)
1403   {
1404     if (datasetSequence != null)
1405     {
1406       datasetSequence.addDBRef(entry);
1407       return;
1408     }
1409
1410     if (dbrefs == null)
1411     {
1412       dbrefs = new DBRefEntry[0];
1413     }
1414
1415     for (DBRefEntryI dbr : dbrefs)
1416     {
1417       if (dbr.updateFrom(entry))
1418       {
1419         /*
1420          * found a dbref that either matched, or could be
1421          * updated from, the new entry - no need to add it
1422          */
1423         return;
1424       }
1425     }
1426
1427     /*
1428      * extend the array to make room for one more
1429      */
1430     // TODO use an ArrayList instead
1431     int j = dbrefs.length;
1432     DBRefEntry[] temp = new DBRefEntry[j + 1];
1433     System.arraycopy(dbrefs, 0, temp, 0, j);
1434     temp[temp.length - 1] = entry;
1435
1436     dbrefs = temp;
1437
1438     DBRefUtils.ensurePrimaries(this);
1439   }
1440
1441   @Override
1442   public void setDatasetSequence(SequenceI seq)
1443   {
1444     if (seq == this)
1445     {
1446       throw new IllegalArgumentException(
1447               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1448     }
1449     if (seq != null && seq.getDatasetSequence() != null)
1450     {
1451       throw new IllegalArgumentException(
1452               "Implementation error: cascading dataset sequences are not allowed.");
1453     }
1454     datasetSequence = seq;
1455   }
1456
1457   @Override
1458   public SequenceI getDatasetSequence()
1459   {
1460     return datasetSequence;
1461   }
1462
1463   @Override
1464   public AlignmentAnnotation[] getAnnotation()
1465   {
1466     return annotation == null ? null
1467             : annotation
1468                     .toArray(new AlignmentAnnotation[annotation.size()]);
1469   }
1470
1471   @Override
1472   public boolean hasAnnotation(AlignmentAnnotation ann)
1473   {
1474     return annotation == null ? false : annotation.contains(ann);
1475   }
1476
1477   @Override
1478   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1479   {
1480     if (this.annotation == null)
1481     {
1482       this.annotation = new Vector<>();
1483     }
1484     if (!this.annotation.contains(annotation))
1485     {
1486       this.annotation.addElement(annotation);
1487     }
1488     annotation.setSequenceRef(this);
1489   }
1490
1491   @Override
1492   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1493   {
1494     if (this.annotation != null)
1495     {
1496       this.annotation.removeElement(annotation);
1497       if (this.annotation.size() == 0)
1498       {
1499         this.annotation = null;
1500       }
1501     }
1502   }
1503
1504   /**
1505    * test if this is a valid candidate for another sequence's dataset sequence.
1506    *
1507    */
1508   private boolean isValidDatasetSequence()
1509   {
1510     if (datasetSequence != null)
1511     {
1512       return false;
1513     }
1514     for (int i = 0; i < sequence.length; i++)
1515     {
1516       if (jalview.util.Comparison.isGap(sequence[i]))
1517       {
1518         return false;
1519       }
1520     }
1521     return true;
1522   }
1523
1524   @Override
1525   public SequenceI deriveSequence()
1526   {
1527     Sequence seq = null;
1528     if (datasetSequence == null)
1529     {
1530       if (isValidDatasetSequence())
1531       {
1532         // Use this as dataset sequence
1533         seq = new Sequence(getName(), "", 1, -1);
1534         seq.setDatasetSequence(this);
1535         seq.initSeqFrom(this, getAnnotation());
1536         return seq;
1537       }
1538       else
1539       {
1540         // Create a new, valid dataset sequence
1541         createDatasetSequence();
1542       }
1543     }
1544     return new Sequence(this);
1545   }
1546
1547   private boolean _isNa;
1548
1549   private int _seqhash = 0;
1550
1551   /**
1552    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1553    * true
1554    */
1555   @Override
1556   public boolean isProtein()
1557   {
1558     if (datasetSequence != null)
1559     {
1560       return datasetSequence.isProtein();
1561     }
1562     if (_seqhash != sequence.hashCode())
1563     {
1564       _seqhash = sequence.hashCode();
1565       _isNa = Comparison.isNucleotide(this);
1566     }
1567     return !_isNa;
1568   };
1569
1570   /*
1571    * (non-Javadoc)
1572    *
1573    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1574    */
1575   @Override
1576   public SequenceI createDatasetSequence()
1577   {
1578     if (datasetSequence == null)
1579     {
1580       Sequence dsseq = new Sequence(getName(),
1581               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1582                       getSequenceAsString()),
1583               getStart(), getEnd());
1584
1585       datasetSequence = dsseq;
1586
1587       dsseq.setDescription(description);
1588       // move features and database references onto dataset sequence
1589       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1590       sequenceFeatureStore = null;
1591       dsseq.dbrefs = dbrefs;
1592       dbrefs = null;
1593       // TODO: search and replace any references to this sequence with
1594       // references to the dataset sequence in Mappings on dbref
1595       dsseq.pdbIds = pdbIds;
1596       pdbIds = null;
1597       datasetSequence.updatePDBIds();
1598       if (annotation != null)
1599       {
1600         // annotation is cloned rather than moved, to preserve what's currently
1601         // on the alignment
1602         for (AlignmentAnnotation aa : annotation)
1603         {
1604           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1605           _aa.sequenceRef = datasetSequence;
1606           _aa.adjustForAlignment(); // uses annotation's own record of
1607                                     // sequence-column mapping
1608           datasetSequence.addAlignmentAnnotation(_aa);
1609         }
1610       }
1611     }
1612     return datasetSequence;
1613   }
1614
1615   /*
1616    * (non-Javadoc)
1617    *
1618    * @see
1619    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1620    * annotations)
1621    */
1622   @Override
1623   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1624   {
1625     if (annotation != null)
1626     {
1627       annotation.removeAllElements();
1628     }
1629     if (annotations != null)
1630     {
1631       for (int i = 0; i < annotations.length; i++)
1632       {
1633         if (annotations[i] != null)
1634         {
1635           addAlignmentAnnotation(annotations[i]);
1636         }
1637       }
1638     }
1639   }
1640
1641   @Override
1642   public AlignmentAnnotation[] getAnnotation(String label)
1643   {
1644     if (annotation == null || annotation.size() == 0)
1645     {
1646       return null;
1647     }
1648
1649     Vector<AlignmentAnnotation> subset = new Vector<>();
1650     Enumeration<AlignmentAnnotation> e = annotation.elements();
1651     while (e.hasMoreElements())
1652     {
1653       AlignmentAnnotation ann = e.nextElement();
1654       if (ann.label != null && ann.label.equals(label))
1655       {
1656         subset.addElement(ann);
1657       }
1658     }
1659     if (subset.size() == 0)
1660     {
1661       return null;
1662     }
1663     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1664     int i = 0;
1665     e = subset.elements();
1666     while (e.hasMoreElements())
1667     {
1668       anns[i++] = e.nextElement();
1669     }
1670     subset.removeAllElements();
1671     return anns;
1672   }
1673
1674   @Override
1675   public boolean updatePDBIds()
1676   {
1677     if (datasetSequence != null)
1678     {
1679       // TODO: could merge DBRefs
1680       return datasetSequence.updatePDBIds();
1681     }
1682     if (dbrefs == null || dbrefs.length == 0)
1683     {
1684       return false;
1685     }
1686     boolean added = false;
1687     for (DBRefEntry dbr : dbrefs)
1688     {
1689       if (DBRefSource.PDB.equals(dbr.getSource()))
1690       {
1691         /*
1692          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1693          * PDB id is not already present in a 'matching' PDBEntry
1694          * Constructor parses out a chain code if appended to the accession id
1695          * (a fudge used to 'store' the chain code in the DBRef)
1696          */
1697         PDBEntry pdbe = new PDBEntry(dbr);
1698         added |= addPDBId(pdbe);
1699       }
1700     }
1701     return added;
1702   }
1703
1704   @Override
1705   public void transferAnnotation(SequenceI entry, Mapping mp)
1706   {
1707     if (datasetSequence != null)
1708     {
1709       datasetSequence.transferAnnotation(entry, mp);
1710       return;
1711     }
1712     if (entry.getDatasetSequence() != null)
1713     {
1714       transferAnnotation(entry.getDatasetSequence(), mp);
1715       return;
1716     }
1717     // transfer any new features from entry onto sequence
1718     if (entry.getSequenceFeatures() != null)
1719     {
1720
1721       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1722       for (SequenceFeature feature : sfs)
1723       {
1724        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1725                 : new SequenceFeature[] { new SequenceFeature(feature) };
1726         if (sf != null)
1727         {
1728           for (int sfi = 0; sfi < sf.length; sfi++)
1729           {
1730             addSequenceFeature(sf[sfi]);
1731           }
1732         }
1733       }
1734     }
1735
1736     // transfer PDB entries
1737     if (entry.getAllPDBEntries() != null)
1738     {
1739       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1740       while (e.hasMoreElements())
1741       {
1742         PDBEntry pdb = e.nextElement();
1743         addPDBId(pdb);
1744       }
1745     }
1746     // transfer database references
1747     DBRefEntry[] entryRefs = entry.getDBRefs();
1748     if (entryRefs != null)
1749     {
1750       for (int r = 0; r < entryRefs.length; r++)
1751       {
1752         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1753         if (newref.getMap() != null && mp != null)
1754         {
1755           // remap ref using our local mapping
1756         }
1757         // we also assume all version string setting is done by dbSourceProxy
1758         /*
1759          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1760          * newref.setSource(dbSource); }
1761          */
1762         addDBRef(newref);
1763       }
1764     }
1765   }
1766
1767   @Override
1768   public void setRNA(RNA r)
1769   {
1770     rna = r;
1771   }
1772
1773   @Override
1774   public RNA getRNA()
1775   {
1776     return rna;
1777   }
1778
1779   @Override
1780   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1781           String label)
1782   {
1783     List<AlignmentAnnotation> result = new ArrayList<>();
1784     if (this.annotation != null)
1785     {
1786       for (AlignmentAnnotation ann : annotation)
1787       {
1788         String id = ann.getCalcId();
1789         if (id != null && id.equals(calcId)
1790                 && ann.label != null && ann.label.equals(label))
1791         {
1792           result.add(ann);
1793         }
1794       }
1795     }
1796     return result;
1797   }
1798
1799   @Override
1800   public String toString()
1801   {
1802     return getDisplayId(false);
1803   }
1804
1805   @Override
1806   public PDBEntry getPDBEntry(String pdbIdStr)
1807   {
1808     if (getDatasetSequence() != null)
1809     {
1810       return getDatasetSequence().getPDBEntry(pdbIdStr);
1811     }
1812     if (pdbIds == null)
1813     {
1814       return null;
1815     }
1816     List<PDBEntry> entries = getAllPDBEntries();
1817     for (PDBEntry entry : entries)
1818     {
1819       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1820       {
1821         return entry;
1822       }
1823     }
1824     return null;
1825   }
1826
1827   @Override
1828   public List<DBRefEntry> getPrimaryDBRefs()
1829   {
1830     if (datasetSequence != null)
1831     {
1832       return datasetSequence.getPrimaryDBRefs();
1833     }
1834     if (dbrefs == null || dbrefs.length == 0)
1835     {
1836       return Collections.emptyList();
1837     }
1838     synchronized (dbrefs)
1839     {
1840       List<DBRefEntry> primaries = new ArrayList<>();
1841       DBRefEntry[] tmp = new DBRefEntry[1];
1842       for (DBRefEntry ref : dbrefs)
1843       {
1844         if (!ref.isPrimaryCandidate())
1845         {
1846           continue;
1847         }
1848         if (ref.hasMap())
1849         {
1850           MapList mp = ref.getMap().getMap();
1851           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1852           {
1853             // map only involves a subsequence, so cannot be primary
1854             continue;
1855           }
1856         }
1857         // whilst it looks like it is a primary ref, we also sanity check type
1858         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1859                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1860         {
1861           // PDB dbrefs imply there should be a PDBEntry associated
1862           // TODO: tighten PDB dbrefs
1863           // formally imply Jalview has actually downloaded and
1864           // parsed the pdb file. That means there should be a cached file
1865           // handle on the PDBEntry, and a real mapping between sequence and
1866           // extracted sequence from PDB file
1867           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1868           if (pdbentry != null && pdbentry.getFile() != null)
1869           {
1870             primaries.add(ref);
1871           }
1872           continue;
1873         }
1874         // check standard protein or dna sources
1875         tmp[0] = ref;
1876         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1877         if (res != null && res[0] == tmp[0])
1878         {
1879           primaries.add(ref);
1880           continue;
1881         }
1882       }
1883       return primaries;
1884     }
1885   }
1886
1887   @Override
1888   public HiddenMarkovModel getHMM()
1889   {
1890     return hmm;
1891   }
1892
1893   @Override
1894   public void setHMM(HiddenMarkovModel hmm)
1895   {
1896     this.hmm = hmm;
1897   }
1898
1899   @Override
1900   public boolean hasHMMAnnotation()
1901   {
1902     if (this.annotation == null) {
1903       return false;
1904     }
1905     for (AlignmentAnnotation ann : annotation)
1906     {
1907       if (InformationThread.HMM_CALC_ID.equals(ann.getCalcId()))
1908       {
1909         return true;
1910       }
1911     }
1912     return false;
1913   }
1914
1915   /**
1916    * {@inheritDoc}
1917    */
1918   @Override
1919   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1920           String... types)
1921   {
1922     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1923     int endPos = fromColumn == toColumn ? startPos
1924             : findPosition(toColumn - 1);
1925
1926     List<SequenceFeature> result = getFeatures().findFeatures(startPos,
1927             endPos, types);
1928     if (datasetSequence != null)
1929     {
1930       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
1931               types);
1932     }
1933     else
1934     {
1935       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
1936     }
1937
1938     /*
1939      * if end column is gapped, endPos may be to the right,
1940      * and we may have included adjacent or enclosing features;
1941      * remove any that are not enclosing, non-contact features
1942      */
1943     boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
1944             && Comparison.isGap(sequence[toColumn - 1]);
1945     if (endPos > this.end || endColumnIsGapped)
1946     {
1947       ListIterator<SequenceFeature> it = result.listIterator();
1948       while (it.hasNext())
1949       {
1950         SequenceFeature sf = it.next();
1951         int sfBegin = sf.getBegin();
1952         int sfEnd = sf.getEnd();
1953         int featureStartColumn = findIndex(sfBegin);
1954         if (featureStartColumn > toColumn)
1955         {
1956           it.remove();
1957         }
1958         else if (featureStartColumn < fromColumn)
1959         {
1960           int featureEndColumn = sfEnd == sfBegin ? featureStartColumn
1961                   : findIndex(sfEnd);
1962           if (featureEndColumn < fromColumn)
1963           {
1964             it.remove();
1965           }
1966           else if (featureEndColumn > toColumn && sf.isContactFeature())
1967           {
1968             /*
1969              * remove an enclosing feature if it is a contact feature
1970              */
1971             it.remove();
1972           }
1973         }
1974       }
1975     }
1976
1977     return result;
1978   }
1979
1980   /**
1981    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1982    * token that has to match the one presented by the cursor
1983    */
1984   @Override
1985   public void sequenceChanged()
1986   {
1987     changeCount++;
1988   }
1989
1990   /**
1991    * {@inheritDoc}
1992    */
1993   @Override
1994   public int replace(char c1, char c2)
1995   {
1996     if (c1 == c2)
1997     {
1998       return 0;
1999     }
2000     int count = 0;
2001     synchronized (sequence)
2002     {
2003       for (int c = 0; c < sequence.length; c++)
2004       {
2005         if (sequence[c] == c1)
2006         {
2007           sequence[c] = c2;
2008           count++;
2009         }
2010       }
2011     }
2012     if (count > 0)
2013     {
2014       sequenceChanged();
2015     }
2016
2017     return count;
2018   }
2019
2020   @Override
2021   public String getSequenceStringFromIterator(Iterator<int[]> it)
2022   {
2023     StringBuilder newSequence = new StringBuilder();
2024     while (it.hasNext())
2025     {
2026       int[] block = it.next();
2027       if (it.hasNext())
2028       {
2029         newSequence.append(getSequence(block[0], block[1] + 1));
2030       }
2031       else
2032       {
2033         newSequence.append(getSequence(block[0], block[1]));
2034       }
2035     }
2036
2037     return newSequence.toString();
2038   }
2039
2040   @Override
2041   public int firstResidueOutsideIterator(Iterator<int[]> regions)
2042   {
2043     int start = 0;
2044
2045     if (!regions.hasNext())
2046     {
2047       return findIndex(getStart()) - 1;
2048     }
2049
2050     // Simply walk along the sequence whilst watching for region
2051     // boundaries
2052     int hideStart = getLength();
2053     int hideEnd = -1;
2054     boolean foundStart = false;
2055
2056     // step through the non-gapped positions of the sequence
2057     for (int i = getStart(); i <= getEnd() && (!foundStart); i++)
2058     {
2059       // get alignment position of this residue in the sequence
2060       int p = findIndex(i) - 1;
2061
2062       // update region start/end
2063       while (hideEnd < p && regions.hasNext())
2064       {
2065         int[] region = regions.next();
2066         hideStart = region[0];
2067         hideEnd = region[1];
2068       }
2069       if (hideEnd < p)
2070       {
2071         hideStart = getLength();
2072       }
2073       // update boundary for sequence
2074       if (p < hideStart)
2075       {
2076         start = p;
2077         foundStart = true;
2078       }
2079     }
2080
2081     if (foundStart)
2082     {
2083       return start;
2084     }
2085     // otherwise, sequence was completely hidden
2086     return 0;
2087   }
2088
2089   @Override
2090   public boolean hasHMMProfile()
2091   {
2092     return hmm != null;
2093   }
2094 }