src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.datamodel.features.SequenceFeatures;
  26 import jalview.datamodel.features.SequenceFeaturesI;
  27 import jalview.util.Comparison;
  28 import jalview.util.DBRefUtils;
  29 import jalview.util.MapList;
  30 import jalview.util.StringUtils;
  31 import jalview.workers.InformationThread;
  32
  33 import java.util.ArrayList;
  34 import java.util.Arrays;
  35 import java.util.BitSet;
  36 import java.util.Collections;
  37 import java.util.Enumeration;
  38 import java.util.Iterator;
  39 import java.util.List;
  40 import java.util.ListIterator;
  41 import java.util.Vector;
  42
  43 import fr.orsay.lri.varna.models.rna.RNA;
  44
  45 /**
  46  *
  47  * Implements the SequenceI interface for a char[] based sequence object
  48  */
  49 public class Sequence extends ASequence implements SequenceI
  50 {
  51   SequenceI datasetSequence;
  52
  53   String name;
  54
  55   private char[] sequence;
  56
  57   String description;
  58
  59   int start;
  60
  61   int end;
  62
  63   HiddenMarkovModel hmm;
  64
  65   boolean isHMMConsensusSequence = false;
  66
  67   Vector<PDBEntry> pdbIds;
  68
  69   String vamsasId;
  70
  71   DBRefEntry[] dbrefs;
  72
  73   RNA rna;
  74
  75   /**
  76    * This annotation is displayed below the alignment but the positions are tied
  77    * to the residues of this sequence
  78    *
  79    * TODO: change to List<>
  80    */
  81   Vector<AlignmentAnnotation> annotation;
  82
  83   private SequenceFeaturesI sequenceFeatureStore;
  84
  85   /*
  86    * A cursor holding the approximate current view position to the sequence,
  87    * as determined by findIndex or findPosition or findPositions.
  88    * Using a cursor as a hint allows these methods to be more performant for
  89    * large sequences.
  90    */
  91   private SequenceCursor cursor;
  92
  93   /*
  94    * A number that should be incremented whenever the sequence is edited.
  95    * If the value matches the cursor token, then we can trust the cursor,
  96    * if not then it should be recomputed.
  97    */
  98   private int changeCount;
  99
 100   /**
 101    * Creates a new Sequence object.
 102    *
 103    * @param name
 104    *          display name string
 105    * @param sequence
 106    *          string to form a possibly gapped sequence out of
 107    * @param start
 108    *          first position of non-gap residue in the sequence
 109    * @param end
 110    *          last position of ungapped residues (nearly always only used for
 111    *          display purposes)
 112    */
 113   public Sequence(String name, String sequence, int start, int end)
 114   {
 115     this();
 116     initSeqAndName(name, sequence.toCharArray(), start, end);
 117   }
 118
 119   public Sequence(String name, char[] sequence, int start, int end)
 120   {
 121     this();
 122     initSeqAndName(name, sequence, start, end);
 123   }
 124
 125   /**
 126    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 127    * start and end are updated values from name2 if it ends with /start-end
 128    *
 129    * @param name2
 130    * @param sequence2
 131    * @param start2
 132    * @param end2
 133    */
 134   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 135           int end2)
 136   {
 137     this.name = name2;
 138     this.sequence = sequence2;
 139     this.start = start2;
 140     this.end = end2;
 141     parseId();
 142     checkValidRange();
 143   }
 144
 145   /**
 146    * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
 147    * start and end respectively and removes the suffix from the name
 148    */
 149   void parseId()
 150   {
 151     if (name == null)
 152     {
 153       System.err.println(
 154               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 155       name = "";
 156     }
 157     int slashPos = name.lastIndexOf('/');
 158     if (slashPos > -1 && slashPos < name.length() - 1)
 159     {
 160       String suffix = name.substring(slashPos + 1);
 161       String[] range = suffix.split("-");
 162       if (range.length == 2)
 163       {
 164         try
 165         {
 166           int from = Integer.valueOf(range[0]);
 167           int to = Integer.valueOf(range[1]);
 168           if (from > 0 && to >= from)
 169           {
 170             name = name.substring(0, slashPos);
 171             setStart(from);
 172             setEnd(to);
 173             checkValidRange();
 174           }
 175         } catch (NumberFormatException e)
 176         {
 177           // leave name unchanged if suffix is invalid
 178         }
 179       }
 180     }
 181   }
 182
 183   /**
 184    * Ensures that 'end' is not before the end of the sequence, that is,
 185    * (end-start+1) is at least as long as the count of ungapped positions. Note
 186    * that end is permitted to be beyond the end of the sequence data.
 187    */
 188   void checkValidRange()
 189   {
 190     // Note: JAL-774 :
 191     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 192     {
 193       int endRes = 0;
 194       for (int j = 0; j < sequence.length; j++)
 195       {
 196         if (!Comparison.isGap(sequence[j]))
 197         {
 198           endRes++;
 199         }
 200       }
 201       if (endRes > 0)
 202       {
 203         endRes += start - 1;
 204       }
 205
 206       if (end < endRes)
 207       {
 208         end = endRes;
 209       }
 210     }
 211
 212   }
 213
 214   /**
 215    * default constructor
 216    */
 217   private Sequence()
 218   {
 219     sequenceFeatureStore = new SequenceFeatures();
 220   }
 221
 222   /**
 223    * Creates a new Sequence object.
 224    *
 225    * @param name
 226    *          DOCUMENT ME!
 227    * @param sequence
 228    *          DOCUMENT ME!
 229    */
 230   public Sequence(String name, String sequence)
 231   {
 232     this(name, sequence, 1, -1);
 233   }
 234
 235   /**
 236    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 237    * any existing dataset sequence reference. If non exists, everything is
 238    * copied.
 239    *
 240    * @param seq
 241    *          if seq is a dataset sequence, behaves like a plain old copy
 242    *          constructor
 243    */
 244   public Sequence(SequenceI seq)
 245   {
 246     this(seq, seq.getAnnotation());
 247   }
 248
 249   /**
 250    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 251    * but inherits any existing dataset sequence reference, and duplicate of any
 252    * annotation that is present in the given annotation array.
 253    *
 254    * @param seq
 255    *          the sequence to be copied
 256    * @param alAnnotation
 257    *          an array of annotation including some associated with seq
 258    */
 259   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 260   {
 261     this();
 262     initSeqFrom(seq, alAnnotation);
 263   }
 264
 265   /**
 266    * does the heavy lifting when cloning a dataset sequence, or coping data from
 267    * dataset to a new derived sequence.
 268    *
 269    * @param seq
 270    *          - source of attributes.
 271    * @param alAnnotation
 272    *          - alignment annotation present on seq that should be copied onto
 273    *          this sequence
 274    */
 275   protected void initSeqFrom(SequenceI seq,
 276           AlignmentAnnotation[] alAnnotation)
 277   {
 278     char[] oseq = seq.getSequence(); // returns a copy of the array
 279     initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd());
 280
 281     description = seq.getDescription();
 282     if (seq != datasetSequence)
 283     {
 284       setDatasetSequence(seq.getDatasetSequence());
 285     }
 286
 287     /*
 288      * only copy DBRefs and seqfeatures if we really are a dataset sequence
 289      */
 290     if (datasetSequence == null)
 291     {
 292       if (seq.getDBRefs() != null)
 293       {
 294         DBRefEntry[] dbr = seq.getDBRefs();
 295         for (int i = 0; i < dbr.length; i++)
 296         {
 297           addDBRef(new DBRefEntry(dbr[i]));
 298         }
 299       }
 300
 301       /*
 302        * make copies of any sequence features
 303        */
 304       for (SequenceFeature sf : seq.getSequenceFeatures())
 305       {
 306         addSequenceFeature(new SequenceFeature(sf));
 307       }
 308     }
 309
 310     if (seq.getAnnotation() != null)
 311     {
 312       AlignmentAnnotation[] sqann = seq.getAnnotation();
 313       for (int i = 0; i < sqann.length; i++)
 314       {
 315         if (sqann[i] == null)
 316         {
 317           continue;
 318         }
 319         boolean found = (alAnnotation == null);
 320         if (!found)
 321         {
 322           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 323           {
 324             found = (alAnnotation[apos] == sqann[i]);
 325           }
 326         }
 327         if (found)
 328         {
 329           // only copy the given annotation
 330           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 331           addAlignmentAnnotation(newann);
 332         }
 333       }
 334     }
 335     if (seq.getAllPDBEntries() != null)
 336     {
 337       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 338       for (PDBEntry pdb : ids)
 339       {
 340         this.addPDBId(new PDBEntry(pdb));
 341       }
 342     }
 343     if (seq.getHMM() != null)
 344     {
 345       this.hmm = new HiddenMarkovModel(seq.getHMM(), this);
 346     }
 347
 348   }
 349
 350   @Override
 351   public void setSequenceFeatures(List<SequenceFeature> features)
 352   {
 353     if (datasetSequence != null)
 354     {
 355       datasetSequence.setSequenceFeatures(features);
 356       return;
 357     }
 358     sequenceFeatureStore = new SequenceFeatures(features);
 359   }
 360
 361   @Override
 362   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 363   {
 364     if (sf.getType() == null)
 365     {
 366       System.err.println("SequenceFeature type may not be null: "
 367               + sf.toString());
 368       return false;
 369     }
 370
 371     if (datasetSequence != null)
 372     {
 373       return datasetSequence.addSequenceFeature(sf);
 374     }
 375
 376     return sequenceFeatureStore.add(sf);
 377   }
 378
 379   @Override
 380   public void deleteFeature(SequenceFeature sf)
 381   {
 382     if (datasetSequence != null)
 383     {
 384       datasetSequence.deleteFeature(sf);
 385     }
 386     else
 387     {
 388       sequenceFeatureStore.delete(sf);
 389     }
 390   }
 391
 392   /**
 393    * {@inheritDoc}
 394    *
 395    * @return
 396    */
 397   @Override
 398   public List<SequenceFeature> getSequenceFeatures()
 399   {
 400     if (datasetSequence != null)
 401     {
 402       return datasetSequence.getSequenceFeatures();
 403     }
 404     return sequenceFeatureStore.getAllFeatures();
 405   }
 406
 407   @Override
 408   public SequenceFeaturesI getFeatures()
 409   {
 410     return datasetSequence != null ? datasetSequence.getFeatures()
 411             : sequenceFeatureStore;
 412   }
 413
 414   @Override
 415   public boolean addPDBId(PDBEntry entry)
 416   {
 417     if (pdbIds == null)
 418     {
 419       pdbIds = new Vector<>();
 420       pdbIds.add(entry);
 421       return true;
 422     }
 423
 424     for (PDBEntry pdbe : pdbIds)
 425     {
 426       if (pdbe.updateFrom(entry))
 427       {
 428         return false;
 429       }
 430     }
 431     pdbIds.addElement(entry);
 432     return true;
 433   }
 434
 435   /**
 436    * DOCUMENT ME!
 437    *
 438    * @param id
 439    *          DOCUMENT ME!
 440    */
 441   @Override
 442   public void setPDBId(Vector<PDBEntry> id)
 443   {
 444     pdbIds = id;
 445   }
 446
 447   /**
 448    * DOCUMENT ME!
 449    *
 450    * @return DOCUMENT ME!
 451    */
 452   @Override
 453   public Vector<PDBEntry> getAllPDBEntries()
 454   {
 455     return pdbIds == null ? new Vector<>() : pdbIds;
 456   }
 457
 458   /**
 459    * Answers the sequence name, with '/start-end' appended if jvsuffix is true
 460    *
 461    * @return
 462    */
 463   @Override
 464   public String getDisplayId(boolean jvsuffix)
 465   {
 466     if (!jvsuffix)
 467     {
 468       return name;
 469     }
 470     StringBuilder result = new StringBuilder(name);
 471     result.append("/").append(start).append("-").append(end);
 472
 473     return result.toString();
 474   }
 475
 476   /**
 477    * Sets the sequence name. If the name ends in /start-end, then the start-end
 478    * values are parsed out and set, and the suffix is removed from the name.
 479    *
 480    * @param theName
 481    */
 482   @Override
 483   public void setName(String theName)
 484   {
 485     this.name = theName;
 486     this.parseId();
 487   }
 488
 489   /**
 490    * DOCUMENT ME!
 491    *
 492    * @return DOCUMENT ME!
 493    */
 494   @Override
 495   public String getName()
 496   {
 497     return this.name;
 498   }
 499
 500   /**
 501    * DOCUMENT ME!
 502    *
 503    * @param start
 504    *          DOCUMENT ME!
 505    */
 506   @Override
 507   public void setStart(int start)
 508   {
 509     this.start = start;
 510   }
 511
 512   /**
 513    * DOCUMENT ME!
 514    *
 515    * @return DOCUMENT ME!
 516    */
 517   @Override
 518   public int getStart()
 519   {
 520     return this.start;
 521   }
 522
 523   /**
 524    * DOCUMENT ME!
 525    *
 526    * @param end
 527    *          DOCUMENT ME!
 528    */
 529   @Override
 530   public void setEnd(int end)
 531   {
 532     this.end = end;
 533   }
 534
 535   /**
 536    * DOCUMENT ME!
 537    *
 538    * @return DOCUMENT ME!
 539    */
 540   @Override
 541   public int getEnd()
 542   {
 543     return this.end;
 544   }
 545
 546   /**
 547    * DOCUMENT ME!
 548    *
 549    * @return DOCUMENT ME!
 550    */
 551   @Override
 552   public int getLength()
 553   {
 554     return this.sequence.length;
 555   }
 556
 557   /**
 558    * DOCUMENT ME!
 559    *
 560    * @param seq
 561    *          DOCUMENT ME!
 562    */
 563   @Override
 564   public void setSequence(String seq)
 565   {
 566     this.sequence = seq.toCharArray();
 567     checkValidRange();
 568     sequenceChanged();
 569   }
 570
 571   @Override
 572   public String getSequenceAsString()
 573   {
 574     return new String(sequence);
 575   }
 576
 577   @Override
 578   public String getSequenceAsString(int start, int end)
 579   {
 580     return new String(getSequence(start, end));
 581   }
 582
 583   @Override
 584   public char[] getSequence()
 585   {
 586     // return sequence;
 587     return sequence == null ? null : Arrays.copyOf(sequence,
 588             sequence.length);
 589   }
 590
 591   /*
 592    * (non-Javadoc)
 593    *
 594    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 595    */
 596   @Override
 597   public char[] getSequence(int start, int end)
 598   {
 599     if (start < 0)
 600     {
 601       start = 0;
 602     }
 603     // JBPNote - left to user to pad the result here (TODO:Decide on this
 604     // policy)
 605     if (start >= sequence.length)
 606     {
 607       return new char[0];
 608     }
 609
 610     if (end >= sequence.length)
 611     {
 612       end = sequence.length;
 613     }
 614
 615     char[] reply = new char[end - start];
 616     System.arraycopy(sequence, start, reply, 0, end - start);
 617
 618     return reply;
 619   }
 620
 621   @Override
 622   public SequenceI getSubSequence(int start, int end)
 623   {
 624     if (start < 0)
 625     {
 626       start = 0;
 627     }
 628     char[] seq = getSequence(start, end);
 629     if (seq.length == 0)
 630     {
 631       return null;
 632     }
 633     int nstart = findPosition(start);
 634     int nend = findPosition(end) - 1;
 635     // JBPNote - this is an incomplete copy.
 636     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 637     nseq.setDescription(description);
 638     if (datasetSequence != null)
 639     {
 640       nseq.setDatasetSequence(datasetSequence);
 641     }
 642     else
 643     {
 644       nseq.setDatasetSequence(this);
 645     }
 646     return nseq;
 647   }
 648
 649   /**
 650    * Returns the character of the aligned sequence at the given position (base
 651    * zero), or space if the position is not within the sequence's bounds
 652    *
 653    * @return
 654    */
 655   @Override
 656   public char getCharAt(int i)
 657   {
 658     if (i >= 0 && i < sequence.length)
 659     {
 660       return sequence[i];
 661     }
 662     else
 663     {
 664       return ' ';
 665     }
 666   }
 667
 668   /**
 669    * Sets the sequence description, and also parses out any special formats of
 670    * interest
 671    *
 672    * @param desc
 673    */
 674   @Override
 675   public void setDescription(String desc)
 676   {
 677     this.description = desc;
 678   }
 679
 680   @Override
 681   public void setGeneLoci(String speciesId, String assemblyId,
 682           String chromosomeId, MapList map)
 683   {
 684     addDBRef(new GeneLocus(speciesId, assemblyId, chromosomeId,
 685             new Mapping(map)));
 686   }
 687
 688   /**
 689    * Returns the gene loci mapping for the sequence (may be null)
 690    *
 691    * @return
 692    */
 693   @Override
 694   public GeneLociI getGeneLoci()
 695   {
 696     DBRefEntry[] refs = getDBRefs();
 697     if (refs != null)
 698     {
 699       for (final DBRefEntry ref : refs)
 700       {
 701         if (ref instanceof GeneLociI)
 702         {
 703           return (GeneLociI) ref;
 704         }
 705       }
 706     }
 707     return null;
 708   }
 709
 710   /**
 711    * Answers the description
 712    *
 713    * @return
 714    */
 715   @Override
 716   public String getDescription()
 717   {
 718     return this.description;
 719   }
 720
 721   /**
 722    * {@inheritDoc}
 723    */
 724   @Override
 725   public int findIndex(int pos)
 726   {
 727     /*
 728      * use a valid, hopefully nearby, cursor if available
 729      */
 730     if (isValidCursor(cursor))
 731     {
 732       return findIndex(pos, cursor);
 733     }
 734
 735     int j = start;
 736     int i = 0;
 737     int startColumn = 0;
 738
 739     /*
 740      * traverse sequence from the start counting gaps; make a note of
 741      * the column of the first residue to save in the cursor
 742      */
 743     while ((i < sequence.length) && (j <= end) && (j <= pos))
 744     {
 745       if (!Comparison.isGap(sequence[i]))
 746       {
 747         if (j == start)
 748         {
 749           startColumn = i;
 750         }
 751         j++;
 752       }
 753       i++;
 754     }
 755
 756     if (j == end && j < pos)
 757     {
 758       return end + 1;
 759     }
 760
 761     updateCursor(pos, i, startColumn);
 762     return i;
 763   }
 764
 765   /**
 766    * Updates the cursor to the latest found residue and column position
 767    *
 768    * @param residuePos
 769    *          (start..)
 770    * @param column
 771    *          (1..)
 772    * @param startColumn
 773    *          column position of the first sequence residue
 774    */
 775   protected void updateCursor(int residuePos, int column, int startColumn)
 776   {
 777     /*
 778      * preserve end residue column provided cursor was valid
 779      */
 780     int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
 781
 782     if (residuePos == this.end)
 783     {
 784       endColumn = column;
 785     }
 786
 787     cursor = new SequenceCursor(this, residuePos, column, startColumn,
 788             endColumn, this.changeCount);
 789   }
 790
 791   /**
 792    * Answers the aligned column position (1..) for the given residue position
 793    * (start..) given a 'hint' of a residue/column location in the neighbourhood.
 794    * The hint may be left of, at, or to the right of the required position.
 795    *
 796    * @param pos
 797    * @param curs
 798    * @return
 799    */
 800   protected int findIndex(final int pos, SequenceCursor curs)
 801   {
 802     if (!isValidCursor(curs))
 803     {
 804       /*
 805        * wrong or invalidated cursor, compute de novo
 806        */
 807       return findIndex(pos);
 808     }
 809
 810     if (curs.residuePosition == pos)
 811     {
 812       return curs.columnPosition;
 813     }
 814
 815     /*
 816      * move left or right to find pos from hint.position
 817      */
 818     int col = curs.columnPosition - 1; // convert from base 1 to base 0
 819     int newPos = curs.residuePosition;
 820     int delta = newPos > pos ? -1 : 1;
 821
 822     while (newPos != pos)
 823     {
 824       col += delta; // shift one column left or right
 825       if (col < 0)
 826       {
 827         break;
 828       }
 829       if (col == sequence.length)
 830       {
 831         col--; // return last column if we failed to reach pos
 832         break;
 833       }
 834       if (!Comparison.isGap(sequence[col]))
 835       {
 836         newPos += delta;
 837       }
 838     }
 839
 840     col++; // convert back to base 1
 841
 842     /*
 843      * only update cursor if we found the target position
 844      */
 845     if (newPos == pos)
 846     {
 847       updateCursor(pos, col, curs.firstColumnPosition);
 848     }
 849
 850     return col;
 851   }
 852
 853   /**
 854    * {@inheritDoc}
 855    */
 856   @Override
 857   public int findPosition(final int column)
 858   {
 859     /*
 860      * use a valid, hopefully nearby, cursor if available
 861      */
 862     if (isValidCursor(cursor))
 863     {
 864       return findPosition(column + 1, cursor);
 865     }
 866
 867     // TODO recode this more naturally i.e. count residues only
 868     // as they are found, not 'in anticipation'
 869
 870     /*
 871      * traverse the sequence counting gaps; note the column position
 872      * of the first residue, to save in the cursor
 873      */
 874     int firstResidueColumn = 0;
 875     int lastPosFound = 0;
 876     int lastPosFoundColumn = 0;
 877     int seqlen = sequence.length;
 878
 879     if (seqlen > 0 && !Comparison.isGap(sequence[0]))
 880     {
 881       lastPosFound = start;
 882       lastPosFoundColumn = 0;
 883     }
 884
 885     int j = 0;
 886     int pos = start;
 887
 888     while (j < column && j < seqlen)
 889     {
 890       if (!Comparison.isGap(sequence[j]))
 891       {
 892         lastPosFound = pos;
 893         lastPosFoundColumn = j;
 894         if (pos == this.start)
 895         {
 896           firstResidueColumn = j;
 897         }
 898         pos++;
 899       }
 900       j++;
 901     }
 902     if (j < seqlen && !Comparison.isGap(sequence[j]))
 903     {
 904       lastPosFound = pos;
 905       lastPosFoundColumn = j;
 906       if (pos == this.start)
 907       {
 908         firstResidueColumn = j;
 909       }
 910     }
 911
 912     /*
 913      * update the cursor to the last residue position found (if any)
 914      * (converting column position to base 1)
 915      */
 916     if (lastPosFound != 0)
 917     {
 918       updateCursor(lastPosFound, lastPosFoundColumn + 1,
 919               firstResidueColumn + 1);
 920     }
 921
 922     return pos;
 923   }
 924
 925   /**
 926    * Answers true if the given cursor is not null, is for this sequence object,
 927    * and has a token value that matches this object's changeCount, else false.
 928    * This allows us to ignore a cursor as 'stale' if the sequence has been
 929    * modified since the cursor was created.
 930    *
 931    * @param curs
 932    * @return
 933    */
 934   protected boolean isValidCursor(SequenceCursor curs)
 935   {
 936     if (curs == null || curs.sequence != this || curs.token != changeCount)
 937     {
 938       return false;
 939     }
 940     /*
 941      * sanity check against range
 942      */
 943     if (curs.columnPosition < 0 || curs.columnPosition > sequence.length)
 944     {
 945       return false;
 946     }
 947     if (curs.residuePosition < start || curs.residuePosition > end)
 948     {
 949       return false;
 950     }
 951     return true;
 952   }
 953
 954   /**
 955    * Answers the sequence position (start..) for the given aligned column
 956    * position (1..), given a hint of a cursor in the neighbourhood. The cursor
 957    * may lie left of, at, or to the right of the column position.
 958    *
 959    * @param col
 960    * @param curs
 961    * @return
 962    */
 963   protected int findPosition(final int col, SequenceCursor curs)
 964   {
 965     if (!isValidCursor(curs))
 966     {
 967       /*
 968        * wrong or invalidated cursor, compute de novo
 969        */
 970       return findPosition(col - 1);// ugh back to base 0
 971     }
 972
 973     if (curs.columnPosition == col)
 974     {
 975       cursor = curs; // in case this method becomes public
 976       return curs.residuePosition; // easy case :-)
 977     }
 978
 979     if (curs.lastColumnPosition > 0 && curs.lastColumnPosition < col)
 980     {
 981       /*
 982        * sequence lies entirely to the left of col
 983        * - return last residue + 1
 984        */
 985       return end + 1;
 986     }
 987
 988     if (curs.firstColumnPosition > 0 && curs.firstColumnPosition > col)
 989     {
 990       /*
 991        * sequence lies entirely to the right of col
 992        * - return first residue
 993        */
 994       return start;
 995     }
 996
 997     // todo could choose closest to col out of column,
 998     // firstColumnPosition, lastColumnPosition as a start point
 999
1000     /*
1001      * move left or right to find pos from cursor position
1002      */
1003     int firstResidueColumn = curs.firstColumnPosition;
1004     int column = curs.columnPosition - 1; // to base 0
1005     int newPos = curs.residuePosition;
1006     int delta = curs.columnPosition > col ? -1 : 1;
1007     boolean gapped = false;
1008     int lastFoundPosition = curs.residuePosition;
1009     int lastFoundPositionColumn = curs.columnPosition;
1010
1011     while (column != col - 1)
1012     {
1013       column += delta; // shift one column left or right
1014       if (column < 0 || column == sequence.length)
1015       {
1016         break;
1017       }
1018       gapped = Comparison.isGap(sequence[column]);
1019       if (!gapped)
1020       {
1021         newPos += delta;
1022         lastFoundPosition = newPos;
1023         lastFoundPositionColumn = column + 1;
1024         if (lastFoundPosition == this.start)
1025         {
1026           firstResidueColumn = column + 1;
1027         }
1028       }
1029     }
1030
1031     if (cursor == null || lastFoundPosition != cursor.residuePosition)
1032     {
1033       updateCursor(lastFoundPosition, lastFoundPositionColumn,
1034               firstResidueColumn);
1035     }
1036
1037     /*
1038      * hack to give position to the right if on a gap
1039      * or beyond the length of the sequence (see JAL-2562)
1040      */
1041     if (delta > 0 && (gapped || column >= sequence.length))
1042     {
1043       newPos++;
1044     }
1045
1046     return newPos;
1047   }
1048
1049   /**
1050    * {@inheritDoc}
1051    */
1052   @Override
1053   public ContiguousI findPositions(int fromColumn, int toColumn)
1054   {
1055     if (toColumn < fromColumn || fromColumn < 1)
1056     {
1057       return null;
1058     }
1059
1060     /*
1061      * find the first non-gapped position, if any
1062      */
1063     int firstPosition = 0;
1064     int col = fromColumn - 1;
1065     int length = sequence.length;
1066     while (col < length && col < toColumn)
1067     {
1068       if (!Comparison.isGap(sequence[col]))
1069       {
1070         firstPosition = findPosition(col++);
1071         break;
1072       }
1073       col++;
1074     }
1075
1076     if (firstPosition == 0)
1077     {
1078       return null;
1079     }
1080
1081     /*
1082      * find the last non-gapped position
1083      */
1084     int lastPosition = firstPosition;
1085     while (col < length && col < toColumn)
1086     {
1087       if (!Comparison.isGap(sequence[col++]))
1088       {
1089         lastPosition++;
1090       }
1091     }
1092
1093     return new Range(firstPosition, lastPosition);
1094   }
1095
1096   /**
1097    * Returns an int array where indices correspond to each residue in the
1098    * sequence and the element value gives its position in the alignment
1099    *
1100    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
1101    *         residues in SequenceI object
1102    */
1103   @Override
1104   public int[] gapMap()
1105   {
1106     String seq = jalview.analysis.AlignSeq.extractGaps(
1107             jalview.util.Comparison.GapChars, new String(sequence));
1108     int[] map = new int[seq.length()];
1109     int j = 0;
1110     int p = 0;
1111
1112     while (j < sequence.length)
1113     {
1114       if (!jalview.util.Comparison.isGap(sequence[j]))
1115       {
1116         map[p++] = j;
1117       }
1118
1119       j++;
1120     }
1121
1122     return map;
1123   }
1124
1125   /**
1126    * Build a bitset corresponding to sequence gaps
1127    *
1128    * @return a BitSet where set values correspond to gaps in the sequence
1129    */
1130   @Override
1131   public BitSet gapBitset()
1132   {
1133     BitSet gaps = new BitSet(sequence.length);
1134     int j = 0;
1135     while (j < sequence.length)
1136     {
1137       if (jalview.util.Comparison.isGap(sequence[j]))
1138       {
1139         gaps.set(j);
1140       }
1141       j++;
1142     }
1143     return gaps;
1144   }
1145
1146   @Override
1147   public int[] findPositionMap()
1148   {
1149     int map[] = new int[sequence.length];
1150     int j = 0;
1151     int pos = start;
1152     int seqlen = sequence.length;
1153     while ((j < seqlen))
1154     {
1155       map[j] = pos;
1156       if (!jalview.util.Comparison.isGap(sequence[j]))
1157       {
1158         pos++;
1159       }
1160
1161       j++;
1162     }
1163     return map;
1164   }
1165
1166   @Override
1167   public List<int[]> getInsertions()
1168   {
1169     ArrayList<int[]> map = new ArrayList<>();
1170     int lastj = -1, j = 0;
1171     int pos = start;
1172     int seqlen = sequence.length;
1173     while ((j < seqlen))
1174     {
1175       if (jalview.util.Comparison.isGap(sequence[j]))
1176       {
1177         if (lastj == -1)
1178         {
1179           lastj = j;
1180         }
1181       }
1182       else
1183       {
1184         if (lastj != -1)
1185         {
1186           map.add(new int[] { lastj, j - 1 });
1187           lastj = -1;
1188         }
1189       }
1190       j++;
1191     }
1192     if (lastj != -1)
1193     {
1194       map.add(new int[] { lastj, j - 1 });
1195       lastj = -1;
1196     }
1197     return map;
1198   }
1199
1200   @Override
1201   public BitSet getInsertionsAsBits()
1202   {
1203     BitSet map = new BitSet();
1204     int lastj = -1, j = 0;
1205     int pos = start;
1206     int seqlen = sequence.length;
1207     while ((j < seqlen))
1208     {
1209       if (jalview.util.Comparison.isGap(sequence[j]))
1210       {
1211         if (lastj == -1)
1212         {
1213           lastj = j;
1214         }
1215       }
1216       else
1217       {
1218         if (lastj != -1)
1219         {
1220           map.set(lastj, j);
1221           lastj = -1;
1222         }
1223       }
1224       j++;
1225     }
1226     if (lastj != -1)
1227     {
1228       map.set(lastj, j);
1229       lastj = -1;
1230     }
1231     return map;
1232   }
1233
1234   @Override
1235   public void deleteChars(final int i, final int j)
1236   {
1237     int newstart = start, newend = end;
1238     if (i >= sequence.length || i < 0)
1239     {
1240       return;
1241     }
1242
1243     char[] tmp = StringUtils.deleteChars(sequence, i, j);
1244     boolean createNewDs = false;
1245     // TODO: take a (second look) at the dataset creation validation method for
1246     // the very large sequence case
1247
1248     int startIndex = findIndex(start) - 1;
1249     int endIndex = findIndex(end) - 1;
1250     int startDeleteColumn = -1; // for dataset sequence deletions
1251     int deleteCount = 0;
1252
1253     for (int s = i; s < j && s < sequence.length; s++)
1254     {
1255       if (Comparison.isGap(sequence[s]))
1256       {
1257         continue;
1258       }
1259       deleteCount++;
1260       if (startDeleteColumn == -1)
1261       {
1262         startDeleteColumn = findPosition(s) - start;
1263       }
1264       if (createNewDs)
1265       {
1266         newend--;
1267       }
1268       else
1269       {
1270         if (startIndex == s)
1271         {
1272           /*
1273            * deleting characters from start of sequence; new start is the
1274            * sequence position of the next column (position to the right
1275            * if the column position is gapped)
1276            */
1277           newstart = findPosition(j);
1278           break;
1279         }
1280         else
1281         {
1282           if (endIndex < j)
1283           {
1284             /*
1285              * deleting characters at end of sequence; new end is the sequence
1286              * position of the column before the deletion; subtract 1 if this is
1287              * gapped since findPosition returns the next sequence position
1288              */
1289             newend = findPosition(i - 1);
1290             if (Comparison.isGap(sequence[i - 1]))
1291             {
1292               newend--;
1293             }
1294             break;
1295           }
1296           else
1297           {
1298             createNewDs = true;
1299             newend--;
1300           }
1301         }
1302       }
1303     }
1304
1305     if (createNewDs && this.datasetSequence != null)
1306     {
1307       /*
1308        * if deletion occured in the middle of the sequence,
1309        * construct a new dataset sequence and delete the residues
1310        * that were deleted from the aligned sequence
1311        */
1312       Sequence ds = new Sequence(datasetSequence);
1313       ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount);
1314       datasetSequence = ds;
1315       // TODO: remove any non-inheritable properties ?
1316       // TODO: create a sequence mapping (since there is a relation here ?)
1317     }
1318     start = newstart;
1319     end = newend;
1320     sequence = tmp;
1321     sequenceChanged();
1322   }
1323
1324   @Override
1325   public void insertCharAt(int i, int length, char c)
1326   {
1327     char[] tmp = new char[sequence.length + length];
1328
1329     if (i >= sequence.length)
1330     {
1331       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
1332       i = sequence.length;
1333     }
1334     else
1335     {
1336       System.arraycopy(sequence, 0, tmp, 0, i);
1337     }
1338
1339     int index = i;
1340     while (length > 0)
1341     {
1342       tmp[index++] = c;
1343       length--;
1344     }
1345
1346     if (i < sequence.length)
1347     {
1348       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
1349     }
1350
1351     sequence = tmp;
1352     sequenceChanged();
1353   }
1354
1355   @Override
1356   public void insertCharAt(int i, char c)
1357   {
1358     insertCharAt(i, 1, c);
1359   }
1360
1361   @Override
1362   public String getVamsasId()
1363   {
1364     return vamsasId;
1365   }
1366
1367   @Override
1368   public void setVamsasId(String id)
1369   {
1370     vamsasId = id;
1371   }
1372
1373   @Override
1374   public void setDBRefs(DBRefEntry[] dbref)
1375   {
1376     if (dbrefs == null && datasetSequence != null
1377             && this != datasetSequence)
1378     {
1379       datasetSequence.setDBRefs(dbref);
1380       return;
1381     }
1382     dbrefs = dbref;
1383     if (dbrefs != null)
1384     {
1385       DBRefUtils.ensurePrimaries(this);
1386     }
1387   }
1388
1389   @Override
1390   public DBRefEntry[] getDBRefs()
1391   {
1392     if (dbrefs == null && datasetSequence != null
1393             && this != datasetSequence)
1394     {
1395       return datasetSequence.getDBRefs();
1396     }
1397     return dbrefs;
1398   }
1399
1400   @Override
1401   public void addDBRef(DBRefEntry entry)
1402   {
1403     if (datasetSequence != null)
1404     {
1405       datasetSequence.addDBRef(entry);
1406       return;
1407     }
1408
1409     if (dbrefs == null)
1410     {
1411       dbrefs = new DBRefEntry[0];
1412     }
1413
1414     for (DBRefEntryI dbr : dbrefs)
1415     {
1416       if (dbr.updateFrom(entry))
1417       {
1418         /*
1419          * found a dbref that either matched, or could be
1420          * updated from, the new entry - no need to add it
1421          */
1422         return;
1423       }
1424     }
1425
1426     /*
1427      * extend the array to make room for one more
1428      */
1429     // TODO use an ArrayList instead
1430     int j = dbrefs.length;
1431     DBRefEntry[] temp = new DBRefEntry[j + 1];
1432     System.arraycopy(dbrefs, 0, temp, 0, j);
1433     temp[temp.length - 1] = entry;
1434
1435     dbrefs = temp;
1436
1437     DBRefUtils.ensurePrimaries(this);
1438   }
1439
1440   @Override
1441   public void setDatasetSequence(SequenceI seq)
1442   {
1443     if (seq == this)
1444     {
1445       throw new IllegalArgumentException(
1446               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1447     }
1448     if (seq != null && seq.getDatasetSequence() != null)
1449     {
1450       throw new IllegalArgumentException(
1451               "Implementation error: cascading dataset sequences are not allowed.");
1452     }
1453     datasetSequence = seq;
1454   }
1455
1456   @Override
1457   public SequenceI getDatasetSequence()
1458   {
1459     return datasetSequence;
1460   }
1461
1462   @Override
1463   public AlignmentAnnotation[] getAnnotation()
1464   {
1465     return annotation == null ? null
1466             : annotation
1467                     .toArray(new AlignmentAnnotation[annotation.size()]);
1468   }
1469
1470   @Override
1471   public boolean hasAnnotation(AlignmentAnnotation ann)
1472   {
1473     return annotation == null ? false : annotation.contains(ann);
1474   }
1475
1476   @Override
1477   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1478   {
1479     if (this.annotation == null)
1480     {
1481       this.annotation = new Vector<>();
1482     }
1483     if (!this.annotation.contains(annotation))
1484     {
1485       this.annotation.addElement(annotation);
1486     }
1487     annotation.setSequenceRef(this);
1488   }
1489
1490   @Override
1491   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1492   {
1493     if (this.annotation != null)
1494     {
1495       this.annotation.removeElement(annotation);
1496       if (this.annotation.size() == 0)
1497       {
1498         this.annotation = null;
1499       }
1500     }
1501   }
1502
1503   /**
1504    * test if this is a valid candidate for another sequence's dataset sequence.
1505    *
1506    */
1507   private boolean isValidDatasetSequence()
1508   {
1509     if (datasetSequence != null)
1510     {
1511       return false;
1512     }
1513     for (int i = 0; i < sequence.length; i++)
1514     {
1515       if (jalview.util.Comparison.isGap(sequence[i]))
1516       {
1517         return false;
1518       }
1519     }
1520     return true;
1521   }
1522
1523   @Override
1524   public SequenceI deriveSequence()
1525   {
1526     Sequence seq = null;
1527     if (datasetSequence == null)
1528     {
1529       if (isValidDatasetSequence())
1530       {
1531         // Use this as dataset sequence
1532         seq = new Sequence(getName(), "", 1, -1);
1533         seq.setDatasetSequence(this);
1534         seq.initSeqFrom(this, getAnnotation());
1535         return seq;
1536       }
1537       else
1538       {
1539         // Create a new, valid dataset sequence
1540         createDatasetSequence();
1541       }
1542     }
1543     return new Sequence(this);
1544   }
1545
1546   private boolean _isNa;
1547
1548   private int _seqhash = 0;
1549
1550   /**
1551    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1552    * true
1553    */
1554   @Override
1555   public boolean isProtein()
1556   {
1557     if (datasetSequence != null)
1558     {
1559       return datasetSequence.isProtein();
1560     }
1561     if (_seqhash != sequence.hashCode())
1562     {
1563       _seqhash = sequence.hashCode();
1564       _isNa = Comparison.isNucleotide(this);
1565     }
1566     return !_isNa;
1567   };
1568
1569   /*
1570    * (non-Javadoc)
1571    *
1572    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1573    */
1574   @Override
1575   public SequenceI createDatasetSequence()
1576   {
1577     if (datasetSequence == null)
1578     {
1579       Sequence dsseq = new Sequence(getName(),
1580               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1581                       getSequenceAsString()),
1582               getStart(), getEnd());
1583
1584       datasetSequence = dsseq;
1585
1586       dsseq.setDescription(description);
1587       // move features and database references onto dataset sequence
1588       dsseq.sequenceFeatureStore = sequenceFeatureStore;
1589       sequenceFeatureStore = null;
1590       dsseq.dbrefs = dbrefs;
1591       dbrefs = null;
1592       // TODO: search and replace any references to this sequence with
1593       // references to the dataset sequence in Mappings on dbref
1594       dsseq.pdbIds = pdbIds;
1595       pdbIds = null;
1596       datasetSequence.updatePDBIds();
1597       if (annotation != null)
1598       {
1599         // annotation is cloned rather than moved, to preserve what's currently
1600         // on the alignment
1601         for (AlignmentAnnotation aa : annotation)
1602         {
1603           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1604           _aa.sequenceRef = datasetSequence;
1605           _aa.adjustForAlignment(); // uses annotation's own record of
1606                                     // sequence-column mapping
1607           datasetSequence.addAlignmentAnnotation(_aa);
1608         }
1609       }
1610     }
1611     return datasetSequence;
1612   }
1613
1614   /*
1615    * (non-Javadoc)
1616    *
1617    * @see
1618    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1619    * annotations)
1620    */
1621   @Override
1622   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1623   {
1624     if (annotation != null)
1625     {
1626       annotation.removeAllElements();
1627     }
1628     if (annotations != null)
1629     {
1630       for (int i = 0; i < annotations.length; i++)
1631       {
1632         if (annotations[i] != null)
1633         {
1634           addAlignmentAnnotation(annotations[i]);
1635         }
1636       }
1637     }
1638   }
1639
1640   @Override
1641   public AlignmentAnnotation[] getAnnotation(String label)
1642   {
1643     if (annotation == null || annotation.size() == 0)
1644     {
1645       return null;
1646     }
1647
1648     Vector<AlignmentAnnotation> subset = new Vector<>();
1649     Enumeration<AlignmentAnnotation> e = annotation.elements();
1650     while (e.hasMoreElements())
1651     {
1652       AlignmentAnnotation ann = e.nextElement();
1653       if (ann.label != null && ann.label.equals(label))
1654       {
1655         subset.addElement(ann);
1656       }
1657     }
1658     if (subset.size() == 0)
1659     {
1660       return null;
1661     }
1662     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1663     int i = 0;
1664     e = subset.elements();
1665     while (e.hasMoreElements())
1666     {
1667       anns[i++] = e.nextElement();
1668     }
1669     subset.removeAllElements();
1670     return anns;
1671   }
1672
1673   @Override
1674   public boolean updatePDBIds()
1675   {
1676     if (datasetSequence != null)
1677     {
1678       // TODO: could merge DBRefs
1679       return datasetSequence.updatePDBIds();
1680     }
1681     if (dbrefs == null || dbrefs.length == 0)
1682     {
1683       return false;
1684     }
1685     boolean added = false;
1686     for (DBRefEntry dbr : dbrefs)
1687     {
1688       if (DBRefSource.PDB.equals(dbr.getSource()))
1689       {
1690         /*
1691          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1692          * PDB id is not already present in a 'matching' PDBEntry
1693          * Constructor parses out a chain code if appended to the accession id
1694          * (a fudge used to 'store' the chain code in the DBRef)
1695          */
1696         PDBEntry pdbe = new PDBEntry(dbr);
1697         added |= addPDBId(pdbe);
1698       }
1699     }
1700     return added;
1701   }
1702
1703   @Override
1704   public void transferAnnotation(SequenceI entry, Mapping mp)
1705   {
1706     if (datasetSequence != null)
1707     {
1708       datasetSequence.transferAnnotation(entry, mp);
1709       return;
1710     }
1711     if (entry.getDatasetSequence() != null)
1712     {
1713       transferAnnotation(entry.getDatasetSequence(), mp);
1714       return;
1715     }
1716     // transfer any new features from entry onto sequence
1717     if (entry.getSequenceFeatures() != null)
1718     {
1719
1720       List<SequenceFeature> sfs = entry.getSequenceFeatures();
1721       for (SequenceFeature feature : sfs)
1722       {
1723        SequenceFeature sf[] = (mp != null) ? mp.locateFeature(feature)
1724                 : new SequenceFeature[] { new SequenceFeature(feature) };
1725         if (sf != null)
1726         {
1727           for (int sfi = 0; sfi < sf.length; sfi++)
1728           {
1729             addSequenceFeature(sf[sfi]);
1730           }
1731         }
1732       }
1733     }
1734
1735     // transfer PDB entries
1736     if (entry.getAllPDBEntries() != null)
1737     {
1738       Enumeration<PDBEntry> e = entry.getAllPDBEntries().elements();
1739       while (e.hasMoreElements())
1740       {
1741         PDBEntry pdb = e.nextElement();
1742         addPDBId(pdb);
1743       }
1744     }
1745     // transfer database references
1746     DBRefEntry[] entryRefs = entry.getDBRefs();
1747     if (entryRefs != null)
1748     {
1749       for (int r = 0; r < entryRefs.length; r++)
1750       {
1751         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1752         if (newref.getMap() != null && mp != null)
1753         {
1754           // remap ref using our local mapping
1755         }
1756         // we also assume all version string setting is done by dbSourceProxy
1757         /*
1758          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1759          * newref.setSource(dbSource); }
1760          */
1761         addDBRef(newref);
1762       }
1763     }
1764   }
1765
1766   @Override
1767   public void setRNA(RNA r)
1768   {
1769     rna = r;
1770   }
1771
1772   @Override
1773   public RNA getRNA()
1774   {
1775     return rna;
1776   }
1777
1778   @Override
1779   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1780           String label)
1781   {
1782     List<AlignmentAnnotation> result = new ArrayList<>();
1783     if (this.annotation != null)
1784     {
1785       for (AlignmentAnnotation ann : annotation)
1786       {
1787         String id = ann.getCalcId();
1788         if (id != null && id.equals(calcId)
1789                 && ann.label != null && ann.label.equals(label))
1790         {
1791           result.add(ann);
1792         }
1793       }
1794     }
1795     return result;
1796   }
1797
1798   @Override
1799   public String toString()
1800   {
1801     return getDisplayId(false);
1802   }
1803
1804   @Override
1805   public PDBEntry getPDBEntry(String pdbIdStr)
1806   {
1807     if (getDatasetSequence() != null)
1808     {
1809       return getDatasetSequence().getPDBEntry(pdbIdStr);
1810     }
1811     if (pdbIds == null)
1812     {
1813       return null;
1814     }
1815     List<PDBEntry> entries = getAllPDBEntries();
1816     for (PDBEntry entry : entries)
1817     {
1818       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1819       {
1820         return entry;
1821       }
1822     }
1823     return null;
1824   }
1825
1826   @Override
1827   public List<DBRefEntry> getPrimaryDBRefs()
1828   {
1829     if (datasetSequence != null)
1830     {
1831       return datasetSequence.getPrimaryDBRefs();
1832     }
1833     if (dbrefs == null || dbrefs.length == 0)
1834     {
1835       return Collections.emptyList();
1836     }
1837     synchronized (dbrefs)
1838     {
1839       List<DBRefEntry> primaries = new ArrayList<>();
1840       DBRefEntry[] tmp = new DBRefEntry[1];
1841       for (DBRefEntry ref : dbrefs)
1842       {
1843         if (!ref.isPrimaryCandidate())
1844         {
1845           continue;
1846         }
1847         if (ref.hasMap())
1848         {
1849           MapList mp = ref.getMap().getMap();
1850           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1851           {
1852             // map only involves a subsequence, so cannot be primary
1853             continue;
1854           }
1855         }
1856         // whilst it looks like it is a primary ref, we also sanity check type
1857         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1858                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1859         {
1860           // PDB dbrefs imply there should be a PDBEntry associated
1861           // TODO: tighten PDB dbrefs
1862           // formally imply Jalview has actually downloaded and
1863           // parsed the pdb file. That means there should be a cached file
1864           // handle on the PDBEntry, and a real mapping between sequence and
1865           // extracted sequence from PDB file
1866           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1867           if (pdbentry != null && pdbentry.getFile() != null)
1868           {
1869             primaries.add(ref);
1870           }
1871           continue;
1872         }
1873         // check standard protein or dna sources
1874         tmp[0] = ref;
1875         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1876         if (res != null && res[0] == tmp[0])
1877         {
1878           primaries.add(ref);
1879           continue;
1880         }
1881       }
1882       return primaries;
1883     }
1884   }
1885
1886   @Override
1887   public HiddenMarkovModel getHMM()
1888   {
1889     return hmm;
1890   }
1891
1892   @Override
1893   public void setHMM(HiddenMarkovModel hmm)
1894   {
1895     this.hmm = hmm;
1896   }
1897
1898   @Override
1899   public boolean hasHMMAnnotation()
1900   {
1901     if (this.annotation == null) {
1902       return false;
1903     }
1904     for (AlignmentAnnotation ann : annotation)
1905     {
1906       if (InformationThread.HMM_CALC_ID.equals(ann.getCalcId()))
1907       {
1908         return true;
1909       }
1910     }
1911     return false;
1912   }
1913
1914   /**
1915    * {@inheritDoc}
1916    */
1917   @Override
1918   public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
1919           String... types)
1920   {
1921     int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
1922     int endPos = fromColumn == toColumn ? startPos
1923             : findPosition(toColumn - 1);
1924
1925     List<SequenceFeature> result = getFeatures().findFeatures(startPos,
1926             endPos, types);
1927     if (datasetSequence != null)
1928     {
1929       result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
1930               types);
1931     }
1932     else
1933     {
1934       result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
1935     }
1936
1937     /*
1938      * if end column is gapped, endPos may be to the right,
1939      * and we may have included adjacent or enclosing features;
1940      * remove any that are not enclosing, non-contact features
1941      */
1942     boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
1943             && Comparison.isGap(sequence[toColumn - 1]);
1944     if (endPos > this.end || endColumnIsGapped)
1945     {
1946       ListIterator<SequenceFeature> it = result.listIterator();
1947       while (it.hasNext())
1948       {
1949         SequenceFeature sf = it.next();
1950         int sfBegin = sf.getBegin();
1951         int sfEnd = sf.getEnd();
1952         int featureStartColumn = findIndex(sfBegin);
1953         if (featureStartColumn > toColumn)
1954         {
1955           it.remove();
1956         }
1957         else if (featureStartColumn < fromColumn)
1958         {
1959           int featureEndColumn = sfEnd == sfBegin ? featureStartColumn
1960                   : findIndex(sfEnd);
1961           if (featureEndColumn < fromColumn)
1962           {
1963             it.remove();
1964           }
1965           else if (featureEndColumn > toColumn && sf.isContactFeature())
1966           {
1967             /*
1968              * remove an enclosing feature if it is a contact feature
1969              */
1970             it.remove();
1971           }
1972         }
1973       }
1974     }
1975
1976     return result;
1977   }
1978
1979   /**
1980    * Invalidates any stale cursors (forcing recalculation) by incrementing the
1981    * token that has to match the one presented by the cursor
1982    */
1983   @Override
1984   public void sequenceChanged()
1985   {
1986     changeCount++;
1987   }
1988
1989   /**
1990    * {@inheritDoc}
1991    */
1992   @Override
1993   public int replace(char c1, char c2)
1994   {
1995     if (c1 == c2)
1996     {
1997       return 0;
1998     }
1999     int count = 0;
2000     synchronized (sequence)
2001     {
2002       for (int c = 0; c < sequence.length; c++)
2003       {
2004         if (sequence[c] == c1)
2005         {
2006           sequence[c] = c2;
2007           count++;
2008         }
2009       }
2010     }
2011     if (count > 0)
2012     {
2013       sequenceChanged();
2014     }
2015
2016     return count;
2017   }
2018
2019   @Override
2020   public String getSequenceStringFromIterator(Iterator<int[]> it)
2021   {
2022     StringBuilder newSequence = new StringBuilder();
2023     while (it.hasNext())
2024     {
2025       int[] block = it.next();
2026       if (it.hasNext())
2027       {
2028         newSequence.append(getSequence(block[0], block[1] + 1));
2029       }
2030       else
2031       {
2032         newSequence.append(getSequence(block[0], block[1]));
2033       }
2034     }
2035
2036     return newSequence.toString();
2037   }
2038
2039   @Override
2040   public int firstResidueOutsideIterator(Iterator<int[]> regions)
2041   {
2042     int start = 0;
2043
2044     if (!regions.hasNext())
2045     {
2046       return findIndex(getStart()) - 1;
2047     }
2048
2049     // Simply walk along the sequence whilst watching for region
2050     // boundaries
2051     int hideStart = getLength();
2052     int hideEnd = -1;
2053     boolean foundStart = false;
2054
2055     // step through the non-gapped positions of the sequence
2056     for (int i = getStart(); i <= getEnd() && (!foundStart); i++)
2057     {
2058       // get alignment position of this residue in the sequence
2059       int p = findIndex(i) - 1;
2060
2061       // update region start/end
2062       while (hideEnd < p && regions.hasNext())
2063       {
2064         int[] region = regions.next();
2065         hideStart = region[0];
2066         hideEnd = region[1];
2067       }
2068       if (hideEnd < p)
2069       {
2070         hideStart = getLength();
2071       }
2072       // update boundary for sequence
2073       if (p < hideStart)
2074       {
2075         start = p;
2076         foundStart = true;
2077       }
2078     }
2079
2080     if (foundStart)
2081     {
2082       return start;
2083     }
2084     // otherwise, sequence was completely hidden
2085     return 0;
2086   }
2087
2088   @Override
2089   public boolean hasHMMProfile()
2090   {
2091     return hmm != null;
2092   }
2093 }