src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.analysis.AlignSeq;
  24 import jalview.api.DBRefEntryI;
  25 import jalview.util.Comparison;
  26 import jalview.util.DBRefUtils;
  27 import jalview.util.MapList;
  28 import jalview.util.StringUtils;
  29
  30 import java.util.ArrayList;
  31 import java.util.Arrays;
  32 import java.util.BitSet;
  33 import java.util.Collections;
  34 import java.util.Enumeration;
  35 import java.util.List;
  36 import java.util.Vector;
  37
  38 import fr.orsay.lri.varna.models.rna.RNA;
  39
  40 /**
  41  *
  42  * Implements the SequenceI interface for a char[] based sequence object.
  43  *
  44  * @author $author$
  45  * @version $Revision$
  46  */
  47 public class Sequence extends ASequence implements SequenceI
  48 {
  49   SequenceI datasetSequence;
  50
  51   String name;
  52
  53   private char[] sequence;
  54
  55   int previousPosition;
  56
  57   String description;
  58
  59   int start;
  60
  61   int end;
  62
  63   boolean hasInfo;
  64
  65   HiddenMarkovModel hmm;
  66
  67   boolean isHMMConsensusSequence = false;
  68
  69   Vector<PDBEntry> pdbIds;
  70
  71   String vamsasId;
  72
  73   DBRefEntry[] dbrefs;
  74
  75   RNA rna;
  76
  77   /**
  78    * This annotation is displayed below the alignment but the positions are tied
  79    * to the residues of this sequence
  80    *
  81    * TODO: change to List<>
  82    */
  83   Vector<AlignmentAnnotation> annotation;
  84
  85   /**
  86    * The index of the sequence in a MSA
  87    */
  88   int index = -1;
  89
  90   /**
  91    * array of sequence features - may not be null for a valid sequence object
  92    */
  93   public SequenceFeature[] sequenceFeatures;
  94
  95   /**
  96    * Creates a new Sequence object.
  97    *
  98    * @param name
  99    *          display name string
 100    * @param sequence
 101    *          string to form a possibly gapped sequence out of
 102    * @param start
 103    *          first position of non-gap residue in the sequence
 104    * @param end
 105    *          last position of ungapped residues (nearly always only used for
 106    *          display purposes)
 107    */
 108   public Sequence(String name, String sequence, int start, int end)
 109   {
 110     initSeqAndName(name, sequence.toCharArray(), start, end);
 111   }
 112
 113   public Sequence(String name, char[] sequence, int start, int end)
 114   {
 115     initSeqAndName(name, sequence, start, end);
 116   }
 117
 118   /**
 119    * Stage 1 constructor - assign name, sequence, and set start and end fields.
 120    * start and end are updated values from name2 if it ends with /start-end
 121    *
 122    * @param name2
 123    * @param sequence2
 124    * @param start2
 125    * @param end2
 126    */
 127   protected void initSeqAndName(String name2, char[] sequence2, int start2,
 128           int end2)
 129   {
 130     this.name = name2;
 131     this.sequence = sequence2;
 132     this.start = start2;
 133     this.end = end2;
 134     parseId();
 135     checkValidRange();
 136   }
 137
 138   com.stevesoft.pat.Regex limitrx = new com.stevesoft.pat.Regex(
 139           "[/][0-9]{1,}[-][0-9]{1,}$");
 140
 141   com.stevesoft.pat.Regex endrx = new com.stevesoft.pat.Regex("[0-9]{1,}$");
 142
 143   void parseId()
 144   {
 145     if (name == null)
 146     {
 147       System.err.println(
 148               "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 149       name = "";
 150     }
 151     // Does sequence have the /start-end signature?
 152     if (limitrx.search(name))
 153     {
 154       name = limitrx.left();
 155       endrx.search(limitrx.stringMatched());
 156       setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
 157               endrx.matchedFrom() - 1)));
 158       setEnd(Integer.parseInt(endrx.stringMatched()));
 159     }
 160   }
 161
 162   void checkValidRange()
 163   {
 164     // Note: JAL-774 :
 165     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 166     {
 167       int endRes = 0;
 168       for (int j = 0; j < sequence.length; j++)
 169       {
 170         if (!jalview.util.Comparison.isGap(sequence[j]))
 171         {
 172           endRes++;
 173         }
 174       }
 175       if (endRes > 0)
 176       {
 177         endRes += start - 1;
 178       }
 179
 180       if (end < endRes)
 181       {
 182         end = endRes;
 183       }
 184     }
 185
 186   }
 187
 188   /**
 189    * Creates a new Sequence object.
 190    *
 191    * @param name
 192    *          DOCUMENT ME!
 193    * @param sequence
 194    *          DOCUMENT ME!
 195    */
 196   public Sequence(String name, String sequence)
 197   {
 198     this(name, sequence, 1, -1);
 199   }
 200
 201   /**
 202    * Creates a new Sequence object with new AlignmentAnnotations but inherits
 203    * any existing dataset sequence reference. If non exists, everything is
 204    * copied.
 205    *
 206    * @param seq
 207    *          if seq is a dataset sequence, behaves like a plain old copy
 208    *          constructor
 209    */
 210   public Sequence(SequenceI seq)
 211   {
 212     this(seq, seq.getAnnotation());
 213   }
 214
 215   /**
 216    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 217    * but inherits any existing dataset sequence reference, and duplicate of any
 218    * annotation that is present in the given annotation array.
 219    *
 220    * @param seq
 221    *          the sequence to be copied
 222    * @param alAnnotation
 223    *          an array of annotation including some associated with seq
 224    */
 225   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 226   {
 227     initSeqFrom(seq, alAnnotation);
 228
 229   }
 230
 231   /**
 232    * does the heavy lifting when cloning a dataset sequence, or coping data from
 233    * dataset to a new derived sequence.
 234    *
 235    * @param seq
 236    *          - source of attributes.
 237    * @param alAnnotation
 238    *          - alignment annotation present on seq that should be copied onto
 239    *          this sequence
 240    */
 241   protected void initSeqFrom(SequenceI seq,
 242           AlignmentAnnotation[] alAnnotation)
 243   {
 244     {
 245       char[] oseq = seq.getSequence();
 246       initSeqAndName(seq.getName(), Arrays.copyOf(oseq, oseq.length),
 247               seq.getStart(), seq.getEnd());
 248     }
 249     description = seq.getDescription();
 250     if (seq != datasetSequence)
 251     {
 252       setDatasetSequence(seq.getDatasetSequence());
 253     }
 254     if (datasetSequence == null && seq.getDBRefs() != null)
 255     {
 256       // only copy DBRefs and seqfeatures if we really are a dataset sequence
 257       DBRefEntry[] dbr = seq.getDBRefs();
 258       for (int i = 0; i < dbr.length; i++)
 259       {
 260         addDBRef(new DBRefEntry(dbr[i]));
 261       }
 262       if (seq.getSequenceFeatures() != null)
 263       {
 264         SequenceFeature[] sf = seq.getSequenceFeatures();
 265         for (int i = 0; i < sf.length; i++)
 266         {
 267           addSequenceFeature(new SequenceFeature(sf[i]));
 268         }
 269       }
 270     }
 271     if (seq.getAnnotation() != null)
 272     {
 273       AlignmentAnnotation[] sqann = seq.getAnnotation();
 274       for (int i = 0; i < sqann.length; i++)
 275       {
 276         if (sqann[i] == null)
 277         {
 278           continue;
 279         }
 280         boolean found = (alAnnotation == null);
 281         if (!found)
 282         {
 283           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 284           {
 285             found = (alAnnotation[apos] == sqann[i]);
 286           }
 287         }
 288         if (found)
 289         {
 290           // only copy the given annotation
 291           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 292           addAlignmentAnnotation(newann);
 293         }
 294       }
 295     }
 296     if (seq.getAllPDBEntries() != null)
 297     {
 298       Vector<PDBEntry> ids = seq.getAllPDBEntries();
 299       for (PDBEntry pdb : ids)
 300       {
 301         this.addPDBId(new PDBEntry(pdb));
 302       }
 303     }
 304     if (seq.isHMMConsensusSequence())
 305     {
 306       this.isHMMConsensusSequence = true;
 307     }
 308     if (seq.getHMM() != null)
 309     {
 310       this.hmm = new HiddenMarkovModel(seq.getHMM());
 311     }
 312
 313   }
 314
 315   @Override
 316   public void setSequenceFeatures(SequenceFeature[] features)
 317   {
 318     if (datasetSequence == null)
 319     {
 320       sequenceFeatures = features;
 321     }
 322     else
 323     {
 324       if (datasetSequence.getSequenceFeatures() != features
 325               && datasetSequence.getSequenceFeatures() != null
 326               && datasetSequence.getSequenceFeatures().length > 0)
 327       {
 328         new Exception(
 329                 "Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment")
 330                         .printStackTrace();
 331       }
 332       datasetSequence.setSequenceFeatures(features);
 333     }
 334   }
 335
 336   @Override
 337   public synchronized boolean addSequenceFeature(SequenceFeature sf)
 338   {
 339     if (sequenceFeatures == null && datasetSequence != null)
 340     {
 341       return datasetSequence.addSequenceFeature(sf);
 342     }
 343     if (sequenceFeatures == null)
 344     {
 345       sequenceFeatures = new SequenceFeature[0];
 346     }
 347
 348     for (int i = 0; i < sequenceFeatures.length; i++)
 349     {
 350       if (sequenceFeatures[i].equals(sf))
 351       {
 352         return false;
 353       }
 354     }
 355
 356     SequenceFeature[] temp = new SequenceFeature[sequenceFeatures.length
 357             + 1];
 358     System.arraycopy(sequenceFeatures, 0, temp, 0, sequenceFeatures.length);
 359     temp[sequenceFeatures.length] = sf;
 360
 361     sequenceFeatures = temp;
 362     return true;
 363   }
 364
 365   @Override
 366   public void deleteFeature(SequenceFeature sf)
 367   {
 368     if (sequenceFeatures == null)
 369     {
 370       if (datasetSequence != null)
 371       {
 372         datasetSequence.deleteFeature(sf);
 373       }
 374       return;
 375     }
 376
 377     int index = 0;
 378     for (index = 0; index < sequenceFeatures.length; index++)
 379     {
 380       if (sequenceFeatures[index].equals(sf))
 381       {
 382         break;
 383       }
 384     }
 385
 386     if (index == sequenceFeatures.length)
 387     {
 388       return;
 389     }
 390
 391     int sfLength = sequenceFeatures.length;
 392     if (sfLength < 2)
 393     {
 394       sequenceFeatures = null;
 395     }
 396     else
 397     {
 398       SequenceFeature[] temp = new SequenceFeature[sfLength - 1];
 399       System.arraycopy(sequenceFeatures, 0, temp, 0, index);
 400
 401       if (index < sfLength)
 402       {
 403         System.arraycopy(sequenceFeatures, index + 1, temp, index,
 404                 sequenceFeatures.length - index - 1);
 405       }
 406
 407       sequenceFeatures = temp;
 408     }
 409   }
 410
 411   /**
 412    * Returns the sequence features (if any), looking first on the sequence, then
 413    * on its dataset sequence, and so on until a non-null value is found (or
 414    * none). This supports retrieval of sequence features stored on the sequence
 415    * (as in the applet) or on the dataset sequence (as in the Desktop version).
 416    *
 417    * @return
 418    */
 419   @Override
 420   public SequenceFeature[] getSequenceFeatures()
 421   {
 422     SequenceFeature[] features = sequenceFeatures;
 423
 424     SequenceI seq = this;
 425     int count = 0; // failsafe against loop in sequence.datasetsequence...
 426     while (features == null && seq.getDatasetSequence() != null
 427             && count++ < 10)
 428     {
 429       seq = seq.getDatasetSequence();
 430       features = ((Sequence) seq).sequenceFeatures;
 431     }
 432     return features;
 433   }
 434
 435   @Override
 436   public boolean addPDBId(PDBEntry entry)
 437   {
 438     if (pdbIds == null)
 439     {
 440       pdbIds = new Vector<>();
 441       pdbIds.add(entry);
 442       return true;
 443     }
 444
 445     for (PDBEntry pdbe : pdbIds)
 446     {
 447       if (pdbe.updateFrom(entry))
 448       {
 449         return false;
 450       }
 451     }
 452     pdbIds.addElement(entry);
 453     return true;
 454   }
 455
 456   /**
 457    * DOCUMENT ME!
 458    *
 459    * @param id
 460    *          DOCUMENT ME!
 461    */
 462   @Override
 463   public void setPDBId(Vector<PDBEntry> id)
 464   {
 465     pdbIds = id;
 466   }
 467
 468   /**
 469    * DOCUMENT ME!
 470    *
 471    * @return DOCUMENT ME!
 472    */
 473   @Override
 474   public Vector<PDBEntry> getAllPDBEntries()
 475   {
 476     return pdbIds == null ? new Vector<PDBEntry>() : pdbIds;
 477   }
 478
 479   /**
 480    * DOCUMENT ME!
 481    *
 482    * @return DOCUMENT ME!
 483    */
 484   @Override
 485   public String getDisplayId(boolean jvsuffix)
 486   {
 487     StringBuffer result = new StringBuffer(name);
 488     if (jvsuffix)
 489     {
 490       result.append("/" + start + "-" + end);
 491     }
 492
 493     return result.toString();
 494   }
 495
 496   /**
 497    * DOCUMENT ME!
 498    *
 499    * @param name
 500    *          DOCUMENT ME!
 501    */
 502   @Override
 503   public void setName(String name)
 504   {
 505     this.name = name;
 506     this.parseId();
 507   }
 508
 509   /**
 510    * DOCUMENT ME!
 511    *
 512    * @return DOCUMENT ME!
 513    */
 514   @Override
 515   public String getName()
 516   {
 517     return this.name;
 518   }
 519
 520   /**
 521    * DOCUMENT ME!
 522    *
 523    * @param start
 524    *          DOCUMENT ME!
 525    */
 526   @Override
 527   public void setStart(int start)
 528   {
 529     this.start = start;
 530   }
 531
 532   /**
 533    * DOCUMENT ME!
 534    *
 535    * @return DOCUMENT ME!
 536    */
 537   @Override
 538   public int getStart()
 539   {
 540     return this.start;
 541   }
 542
 543   /**
 544    * DOCUMENT ME!
 545    *
 546    * @param end
 547    *          DOCUMENT ME!
 548    */
 549   @Override
 550   public void setEnd(int end)
 551   {
 552     this.end = end;
 553   }
 554
 555   /**
 556    * DOCUMENT ME!
 557    *
 558    * @return DOCUMENT ME!
 559    */
 560   @Override
 561   public int getEnd()
 562   {
 563     return this.end;
 564   }
 565
 566   /**
 567    * DOCUMENT ME!
 568    *
 569    * @return DOCUMENT ME!
 570    */
 571   @Override
 572   public int getLength()
 573   {
 574     return this.sequence.length;
 575   }
 576
 577   /**
 578    * DOCUMENT ME!
 579    *
 580    * @param seq
 581    *          DOCUMENT ME!
 582    */
 583   @Override
 584   public void setSequence(String seq)
 585   {
 586     this.sequence = seq.toCharArray();
 587     checkValidRange();
 588   }
 589
 590   @Override
 591   public String getSequenceAsString()
 592   {
 593     return new String(sequence);
 594   }
 595
 596   @Override
 597   public String getSequenceAsString(int start, int end)
 598   {
 599     return new String(getSequence(start, end));
 600   }
 601
 602   @Override
 603   public char[] getSequence()
 604   {
 605     return sequence;
 606   }
 607
 608   /*
 609    * (non-Javadoc)
 610    *
 611    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 612    */
 613   @Override
 614   public char[] getSequence(int start, int end)
 615   {
 616     if (start < 0)
 617     {
 618       start = 0;
 619     }
 620     // JBPNote - left to user to pad the result here (TODO:Decide on this
 621     // policy)
 622     if (start >= sequence.length)
 623     {
 624       return new char[0];
 625     }
 626
 627     if (end >= sequence.length)
 628     {
 629       end = sequence.length;
 630     }
 631
 632     char[] reply = new char[end - start];
 633     System.arraycopy(sequence, start, reply, 0, end - start);
 634
 635     return reply;
 636   }
 637
 638   @Override
 639   public SequenceI getSubSequence(int start, int end)
 640   {
 641     if (start < 0)
 642     {
 643       start = 0;
 644     }
 645     char[] seq = getSequence(start, end);
 646     if (seq.length == 0)
 647     {
 648       return null;
 649     }
 650     int nstart = findPosition(start);
 651     int nend = findPosition(end) - 1;
 652     // JBPNote - this is an incomplete copy.
 653     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 654     nseq.setDescription(description);
 655     if (datasetSequence != null)
 656     {
 657       nseq.setDatasetSequence(datasetSequence);
 658     }
 659     else
 660     {
 661       nseq.setDatasetSequence(this);
 662     }
 663     return nseq;
 664   }
 665
 666   /**
 667    * Returns the character of the aligned sequence at the given position (base
 668    * zero), or space if the position is not within the sequence's bounds
 669    *
 670    * @return
 671    */
 672   @Override
 673   public char getCharAt(int i)
 674   {
 675     if (i >= 0 && i < sequence.length)
 676     {
 677       return sequence[i];
 678     }
 679     else
 680     {
 681       return ' ';
 682     }
 683   }
 684
 685   /**
 686    * DOCUMENT ME!
 687    *
 688    * @param desc
 689    *          DOCUMENT ME!
 690    */
 691   @Override
 692   public void setDescription(String desc)
 693   {
 694     this.description = desc;
 695   }
 696
 697   /**
 698    * DOCUMENT ME!
 699    *
 700    * @return DOCUMENT ME!
 701    */
 702   @Override
 703   public String getDescription()
 704   {
 705     return this.description;
 706   }
 707
 708   /*
 709    * (non-Javadoc)
 710    *
 711    * @see jalview.datamodel.SequenceI#findIndex(int)
 712    */
 713   @Override
 714   public int findIndex(int pos)
 715   {
 716     // returns the alignment position for a residue
 717     int j = start;
 718     int i = 0;
 719     // Rely on end being at least as long as the length of the sequence.
 720     while ((i < sequence.length) && (j <= end) && (j <= pos))
 721     {
 722       if (!jalview.util.Comparison.isGap(sequence[i]))
 723       {
 724         j++;
 725       }
 726
 727       i++;
 728     }
 729
 730     if ((j == end) && (j < pos))
 731     {
 732       return end + 1;
 733     }
 734     else
 735     {
 736       return i;
 737     }
 738   }
 739
 740   @Override
 741   public int findPosition(int i)
 742   {
 743     int j = 0;
 744     int pos = start;
 745     int seqlen = sequence.length;
 746     while ((j < i) && (j < seqlen))
 747     {
 748       if (!jalview.util.Comparison.isGap(sequence[j]))
 749       {
 750         pos++;
 751       }
 752
 753       j++;
 754     }
 755
 756     return pos;
 757   }
 758
 759   /**
 760    * Returns an int array where indices correspond to each residue in the
 761    * sequence and the element value gives its position in the alignment
 762    *
 763    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
 764    *         residues in SequenceI object
 765    */
 766   @Override
 767   public int[] gapMap()
 768   {
 769     String seq = jalview.analysis.AlignSeq.extractGaps(
 770             jalview.util.Comparison.GapChars, new String(sequence));
 771     int[] map = new int[seq.length()];
 772     int j = 0;
 773     int p = 0;
 774
 775     while (j < sequence.length)
 776     {
 777       if (!jalview.util.Comparison.isGap(sequence[j]))
 778       {
 779         map[p++] = j;
 780       }
 781
 782       j++;
 783     }
 784
 785     return map;
 786   }
 787
 788   @Override
 789   public int[] findPositionMap()
 790   {
 791     int map[] = new int[sequence.length];
 792     int j = 0;
 793     int pos = start;
 794     int seqlen = sequence.length;
 795     while ((j < seqlen))
 796     {
 797       map[j] = pos;
 798       if (!jalview.util.Comparison.isGap(sequence[j]))
 799       {
 800         pos++;
 801       }
 802
 803       j++;
 804     }
 805     return map;
 806   }
 807
 808   @Override
 809   public List<int[]> getInsertions()
 810   {
 811     ArrayList<int[]> map = new ArrayList<>();
 812     int lastj = -1, j = 0;
 813     int pos = start;
 814     int seqlen = sequence.length;
 815     while ((j < seqlen))
 816     {
 817       if (jalview.util.Comparison.isGap(sequence[j]))
 818       {
 819         if (lastj == -1)
 820         {
 821           lastj = j;
 822         }
 823       }
 824       else
 825       {
 826         if (lastj != -1)
 827         {
 828           map.add(new int[] { lastj, j - 1 });
 829           lastj = -1;
 830         }
 831       }
 832       j++;
 833     }
 834     if (lastj != -1)
 835     {
 836       map.add(new int[] { lastj, j - 1 });
 837       lastj = -1;
 838     }
 839     return map;
 840   }
 841
 842   @Override
 843   public BitSet getInsertionsAsBits()
 844   {
 845     BitSet map = new BitSet();
 846     int lastj = -1, j = 0;
 847     int pos = start;
 848     int seqlen = sequence.length;
 849     while ((j < seqlen))
 850     {
 851       if (jalview.util.Comparison.isGap(sequence[j]))
 852       {
 853         if (lastj == -1)
 854         {
 855           lastj = j;
 856         }
 857       }
 858       else
 859       {
 860         if (lastj != -1)
 861         {
 862           map.set(lastj, j);
 863           lastj = -1;
 864         }
 865       }
 866       j++;
 867     }
 868     if (lastj != -1)
 869     {
 870       map.set(lastj, j);
 871       lastj = -1;
 872     }
 873     return map;
 874   }
 875
 876   @Override
 877   public void deleteChars(int i, int j)
 878   {
 879     int newstart = start, newend = end;
 880     if (i >= sequence.length || i < 0)
 881     {
 882       return;
 883     }
 884
 885     char[] tmp = StringUtils.deleteChars(sequence, i, j);
 886     boolean createNewDs = false;
 887     // TODO: take a (second look) at the dataset creation validation method for
 888     // the very large sequence case
 889     int eindex = -1, sindex = -1;
 890     boolean ecalc = false, scalc = false;
 891     for (int s = i; s < j; s++)
 892     {
 893       if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
 894       {
 895         if (createNewDs)
 896         {
 897           newend--;
 898         }
 899         else
 900         {
 901           if (!scalc)
 902           {
 903             sindex = findIndex(start) - 1;
 904             scalc = true;
 905           }
 906           if (sindex == s)
 907           {
 908             // delete characters including start of sequence
 909             newstart = findPosition(j);
 910             break; // don't need to search for any more residue characters.
 911           }
 912           else
 913           {
 914             // delete characters after start.
 915             if (!ecalc)
 916             {
 917               eindex = findIndex(end) - 1;
 918               ecalc = true;
 919             }
 920             if (eindex < j)
 921             {
 922               // delete characters at end of sequence
 923               newend = findPosition(i - 1);
 924               break; // don't need to search for any more residue characters.
 925             }
 926             else
 927             {
 928               createNewDs = true;
 929               newend--; // decrease end position by one for the deleted residue
 930               // and search further
 931             }
 932           }
 933         }
 934       }
 935     }
 936     // deletion occured in the middle of the sequence
 937     if (createNewDs && this.datasetSequence != null)
 938     {
 939       // construct a new sequence
 940       Sequence ds = new Sequence(datasetSequence);
 941       // TODO: remove any non-inheritable properties ?
 942       // TODO: create a sequence mapping (since there is a relation here ?)
 943       ds.deleteChars(i, j);
 944       datasetSequence = ds;
 945     }
 946     start = newstart;
 947     end = newend;
 948     sequence = tmp;
 949   }
 950
 951   @Override
 952   public void insertCharAt(int i, int length, char c)
 953   {
 954     char[] tmp = new char[sequence.length + length];
 955
 956     if (i >= sequence.length)
 957     {
 958       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
 959       i = sequence.length;
 960     }
 961     else
 962     {
 963       System.arraycopy(sequence, 0, tmp, 0, i);
 964     }
 965
 966     int index = i;
 967     while (length > 0)
 968     {
 969       tmp[index++] = c;
 970       length--;
 971     }
 972
 973     if (i < sequence.length)
 974     {
 975       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
 976     }
 977
 978     sequence = tmp;
 979   }
 980
 981   @Override
 982   public void insertCharAt(int i, char c)
 983   {
 984     insertCharAt(i, 1, c);
 985   }
 986
 987   @Override
 988   public String getVamsasId()
 989   {
 990     return vamsasId;
 991   }
 992
 993   @Override
 994   public void setVamsasId(String id)
 995   {
 996     vamsasId = id;
 997   }
 998
 999   @Override
1000   public void setDBRefs(DBRefEntry[] dbref)
1001   {
1002     if (dbrefs == null && datasetSequence != null
1003             && this != datasetSequence)
1004     {
1005       datasetSequence.setDBRefs(dbref);
1006       return;
1007     }
1008     dbrefs = dbref;
1009     if (dbrefs != null)
1010     {
1011       DBRefUtils.ensurePrimaries(this);
1012     }
1013   }
1014
1015   @Override
1016   public DBRefEntry[] getDBRefs()
1017   {
1018     if (dbrefs == null && datasetSequence != null
1019             && this != datasetSequence)
1020     {
1021       return datasetSequence.getDBRefs();
1022     }
1023     return dbrefs;
1024   }
1025
1026   @Override
1027   public void addDBRef(DBRefEntry entry)
1028   {
1029     if (datasetSequence != null)
1030     {
1031       datasetSequence.addDBRef(entry);
1032       return;
1033     }
1034
1035     if (dbrefs == null)
1036     {
1037       dbrefs = new DBRefEntry[0];
1038     }
1039
1040     for (DBRefEntryI dbr : dbrefs)
1041     {
1042       if (dbr.updateFrom(entry))
1043       {
1044         /*
1045          * found a dbref that either matched, or could be
1046          * updated from, the new entry - no need to add it
1047          */
1048         return;
1049       }
1050     }
1051
1052     /*
1053      * extend the array to make room for one more
1054      */
1055     // TODO use an ArrayList instead
1056     int j = dbrefs.length;
1057     DBRefEntry[] temp = new DBRefEntry[j + 1];
1058     System.arraycopy(dbrefs, 0, temp, 0, j);
1059     temp[temp.length - 1] = entry;
1060
1061     dbrefs = temp;
1062
1063     DBRefUtils.ensurePrimaries(this);
1064   }
1065
1066   @Override
1067   public void setDatasetSequence(SequenceI seq)
1068   {
1069     if (seq == this)
1070     {
1071       throw new IllegalArgumentException(
1072               "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
1073     }
1074     if (seq != null && seq.getDatasetSequence() != null)
1075     {
1076       throw new IllegalArgumentException(
1077               "Implementation error: cascading dataset sequences are not allowed.");
1078     }
1079     datasetSequence = seq;
1080   }
1081
1082   @Override
1083   public SequenceI getDatasetSequence()
1084   {
1085     return datasetSequence;
1086   }
1087
1088   @Override
1089   public AlignmentAnnotation[] getAnnotation()
1090   {
1091     return annotation == null ? null
1092             : annotation
1093                     .toArray(new AlignmentAnnotation[annotation.size()]);
1094   }
1095
1096   @Override
1097   public boolean hasAnnotation(AlignmentAnnotation ann)
1098   {
1099     return annotation == null ? false : annotation.contains(ann);
1100   }
1101
1102   @Override
1103   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
1104   {
1105     if (this.annotation == null)
1106     {
1107       this.annotation = new Vector<>();
1108     }
1109     if (!this.annotation.contains(annotation))
1110     {
1111       this.annotation.addElement(annotation);
1112     }
1113     annotation.setSequenceRef(this);
1114   }
1115
1116   @Override
1117   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
1118   {
1119     if (this.annotation != null)
1120     {
1121       this.annotation.removeElement(annotation);
1122       if (this.annotation.size() == 0)
1123       {
1124         this.annotation = null;
1125       }
1126     }
1127   }
1128
1129   /**
1130    * test if this is a valid candidate for another sequence's dataset sequence.
1131    *
1132    */
1133   private boolean isValidDatasetSequence()
1134   {
1135     if (datasetSequence != null)
1136     {
1137       return false;
1138     }
1139     for (int i = 0; i < sequence.length; i++)
1140     {
1141       if (jalview.util.Comparison.isGap(sequence[i]))
1142       {
1143         return false;
1144       }
1145     }
1146     return true;
1147   }
1148
1149   @Override
1150   public SequenceI deriveSequence()
1151   {
1152     Sequence seq = null;
1153     if (datasetSequence == null)
1154     {
1155       if (isValidDatasetSequence())
1156       {
1157         // Use this as dataset sequence
1158         seq = new Sequence(getName(), "", 1, -1);
1159         seq.setDatasetSequence(this);
1160         seq.initSeqFrom(this, getAnnotation());
1161         return seq;
1162       }
1163       else
1164       {
1165         // Create a new, valid dataset sequence
1166         createDatasetSequence();
1167       }
1168     }
1169     return new Sequence(this);
1170   }
1171
1172   private boolean _isNa;
1173
1174   private long _seqhash = 0;
1175
1176   /**
1177    * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
1178    * true
1179    */
1180   @Override
1181   public boolean isProtein()
1182   {
1183     if (datasetSequence != null)
1184     {
1185       return datasetSequence.isProtein();
1186     }
1187     if (_seqhash != sequence.hashCode())
1188     {
1189       _seqhash = sequence.hashCode();
1190       _isNa = Comparison.isNucleotide(this);
1191     }
1192     return !_isNa;
1193   };
1194
1195   /*
1196    * (non-Javadoc)
1197    *
1198    * @see jalview.datamodel.SequenceI#createDatasetSequence()
1199    */
1200   @Override
1201   public SequenceI createDatasetSequence()
1202   {
1203     if (datasetSequence == null)
1204     {
1205       Sequence dsseq = new Sequence(getName(),
1206               AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
1207                       getSequenceAsString()),
1208               getStart(), getEnd());
1209
1210       datasetSequence = dsseq;
1211
1212       dsseq.setDescription(description);
1213       // move features and database references onto dataset sequence
1214       dsseq.sequenceFeatures = sequenceFeatures;
1215       sequenceFeatures = null;
1216       dsseq.dbrefs = dbrefs;
1217       dbrefs = null;
1218       // TODO: search and replace any references to this sequence with
1219       // references to the dataset sequence in Mappings on dbref
1220       dsseq.pdbIds = pdbIds;
1221       pdbIds = null;
1222       datasetSequence.updatePDBIds();
1223       if (annotation != null)
1224       {
1225         // annotation is cloned rather than moved, to preserve what's currently
1226         // on the alignment
1227         for (AlignmentAnnotation aa : annotation)
1228         {
1229           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1230           _aa.sequenceRef = datasetSequence;
1231           _aa.adjustForAlignment(); // uses annotation's own record of
1232                                     // sequence-column mapping
1233           datasetSequence.addAlignmentAnnotation(_aa);
1234         }
1235       }
1236     }
1237     return datasetSequence;
1238   }
1239
1240   /*
1241    * (non-Javadoc)
1242    *
1243    * @see
1244    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1245    * annotations)
1246    */
1247   @Override
1248   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1249   {
1250     if (annotation != null)
1251     {
1252       annotation.removeAllElements();
1253     }
1254     if (annotations != null)
1255     {
1256       for (int i = 0; i < annotations.length; i++)
1257       {
1258         if (annotations[i] != null)
1259         {
1260           addAlignmentAnnotation(annotations[i]);
1261         }
1262       }
1263     }
1264   }
1265
1266   @Override
1267   public AlignmentAnnotation[] getAnnotation(String label)
1268   {
1269     if (annotation == null || annotation.size() == 0)
1270     {
1271       return null;
1272     }
1273
1274     Vector subset = new Vector();
1275     Enumeration e = annotation.elements();
1276     while (e.hasMoreElements())
1277     {
1278       AlignmentAnnotation ann = (AlignmentAnnotation) e.nextElement();
1279       if (ann.label != null && ann.label.equals(label))
1280       {
1281         subset.addElement(ann);
1282       }
1283     }
1284     if (subset.size() == 0)
1285     {
1286       return null;
1287     }
1288     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1289     int i = 0;
1290     e = subset.elements();
1291     while (e.hasMoreElements())
1292     {
1293       anns[i++] = (AlignmentAnnotation) e.nextElement();
1294     }
1295     subset.removeAllElements();
1296     return anns;
1297   }
1298
1299   @Override
1300   public boolean updatePDBIds()
1301   {
1302     if (datasetSequence != null)
1303     {
1304       // TODO: could merge DBRefs
1305       return datasetSequence.updatePDBIds();
1306     }
1307     if (dbrefs == null || dbrefs.length == 0)
1308     {
1309       return false;
1310     }
1311     boolean added = false;
1312     for (DBRefEntry dbr : dbrefs)
1313     {
1314       if (DBRefSource.PDB.equals(dbr.getSource()))
1315       {
1316         /*
1317          * 'Add' any PDB dbrefs as a PDBEntry - add is only performed if the
1318          * PDB id is not already present in a 'matching' PDBEntry
1319          * Constructor parses out a chain code if appended to the accession id
1320          * (a fudge used to 'store' the chain code in the DBRef)
1321          */
1322         PDBEntry pdbe = new PDBEntry(dbr);
1323         added |= addPDBId(pdbe);
1324       }
1325     }
1326     return added;
1327   }
1328
1329   @Override
1330   public void transferAnnotation(SequenceI entry, Mapping mp)
1331   {
1332     if (datasetSequence != null)
1333     {
1334       datasetSequence.transferAnnotation(entry, mp);
1335       return;
1336     }
1337     if (entry.getDatasetSequence() != null)
1338     {
1339       transferAnnotation(entry.getDatasetSequence(), mp);
1340       return;
1341     }
1342     // transfer any new features from entry onto sequence
1343     if (entry.getSequenceFeatures() != null)
1344     {
1345
1346       SequenceFeature[] sfs = entry.getSequenceFeatures();
1347       for (int si = 0; si < sfs.length; si++)
1348       {
1349         SequenceFeature sf[] = (mp != null) ? mp.locateFeature(sfs[si])
1350                 : new SequenceFeature[]
1351                 { new SequenceFeature(sfs[si]) };
1352         if (sf != null && sf.length > 0)
1353         {
1354           for (int sfi = 0; sfi < sf.length; sfi++)
1355           {
1356             addSequenceFeature(sf[sfi]);
1357           }
1358         }
1359       }
1360     }
1361
1362     // transfer PDB entries
1363     if (entry.getAllPDBEntries() != null)
1364     {
1365       Enumeration e = entry.getAllPDBEntries().elements();
1366       while (e.hasMoreElements())
1367       {
1368         PDBEntry pdb = (PDBEntry) e.nextElement();
1369         addPDBId(pdb);
1370       }
1371     }
1372     // transfer database references
1373     DBRefEntry[] entryRefs = entry.getDBRefs();
1374     if (entryRefs != null)
1375     {
1376       for (int r = 0; r < entryRefs.length; r++)
1377       {
1378         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1379         if (newref.getMap() != null && mp != null)
1380         {
1381           // remap ref using our local mapping
1382         }
1383         // we also assume all version string setting is done by dbSourceProxy
1384         /*
1385          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1386          * newref.setSource(dbSource); }
1387          */
1388         addDBRef(newref);
1389       }
1390     }
1391   }
1392
1393   /**
1394    * @return The index (zero-based) on this sequence in the MSA. It returns
1395    *         {@code -1} if this information is not available.
1396    */
1397   @Override
1398   public int getIndex()
1399   {
1400     return index;
1401   }
1402
1403   /**
1404    * Defines the position of this sequence in the MSA. Use the value {@code -1}
1405    * if this information is undefined.
1406    *
1407    * @param The
1408    *          position for this sequence. This value is zero-based (zero for
1409    *          this first sequence)
1410    */
1411   @Override
1412   public void setIndex(int value)
1413   {
1414     index = value;
1415   }
1416
1417   @Override
1418   public void setRNA(RNA r)
1419   {
1420     rna = r;
1421   }
1422
1423   @Override
1424   public RNA getRNA()
1425   {
1426     return rna;
1427   }
1428
1429   @Override
1430   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1431           String label)
1432   {
1433     List<AlignmentAnnotation> result = new ArrayList<>();
1434     if (this.annotation != null)
1435     {
1436       for (AlignmentAnnotation ann : annotation)
1437       {
1438         if (ann.calcId != null && ann.calcId.equals(calcId)
1439                 && ann.label != null && ann.label.equals(label))
1440         {
1441           result.add(ann);
1442         }
1443       }
1444     }
1445     return result;
1446   }
1447
1448   @Override
1449   public String toString()
1450   {
1451     return getDisplayId(false);
1452   }
1453
1454   @Override
1455   public PDBEntry getPDBEntry(String pdbIdStr)
1456   {
1457     if (getDatasetSequence() != null)
1458     {
1459       return getDatasetSequence().getPDBEntry(pdbIdStr);
1460     }
1461     if (pdbIds == null)
1462     {
1463       return null;
1464     }
1465     List<PDBEntry> entries = getAllPDBEntries();
1466     for (PDBEntry entry : entries)
1467     {
1468       if (entry.getId().equalsIgnoreCase(pdbIdStr))
1469       {
1470         return entry;
1471       }
1472     }
1473     return null;
1474   }
1475
1476   @Override
1477   public List<DBRefEntry> getPrimaryDBRefs()
1478   {
1479     if (datasetSequence != null)
1480     {
1481       return datasetSequence.getPrimaryDBRefs();
1482     }
1483     if (dbrefs == null || dbrefs.length == 0)
1484     {
1485       return Collections.emptyList();
1486     }
1487     synchronized (dbrefs)
1488     {
1489       List<DBRefEntry> primaries = new ArrayList<>();
1490       DBRefEntry[] tmp = new DBRefEntry[1];
1491       for (DBRefEntry ref : dbrefs)
1492       {
1493         if (!ref.isPrimaryCandidate())
1494         {
1495           continue;
1496         }
1497         if (ref.hasMap())
1498         {
1499           MapList mp = ref.getMap().getMap();
1500           if (mp.getFromLowest() > start || mp.getFromHighest() < end)
1501           {
1502             // map only involves a subsequence, so cannot be primary
1503             continue;
1504           }
1505         }
1506         // whilst it looks like it is a primary ref, we also sanity check type
1507         if (DBRefUtils.getCanonicalName(DBRefSource.PDB)
1508                 .equals(DBRefUtils.getCanonicalName(ref.getSource())))
1509         {
1510           // PDB dbrefs imply there should be a PDBEntry associated
1511           // TODO: tighten PDB dbrefs
1512           // formally imply Jalview has actually downloaded and
1513           // parsed the pdb file. That means there should be a cached file
1514           // handle on the PDBEntry, and a real mapping between sequence and
1515           // extracted sequence from PDB file
1516           PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
1517           if (pdbentry != null && pdbentry.getFile() != null)
1518           {
1519             primaries.add(ref);
1520           }
1521           continue;
1522         }
1523         // check standard protein or dna sources
1524         tmp[0] = ref;
1525         DBRefEntry[] res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
1526         if (res != null && res[0] == tmp[0])
1527         {
1528           primaries.add(ref);
1529           continue;
1530         }
1531       }
1532       return primaries;
1533     }
1534   }
1535
1536   @Override
1537   public HiddenMarkovModel getHMM()
1538   {
1539     return hmm;
1540   }
1541
1542   @Override
1543   public void setHMM(HiddenMarkovModel hmm)
1544   {
1545     this.hmm = hmm;
1546   }
1547
1548   @Override
1549   public void updateHMMMapping()
1550   {
1551     int node = 1;
1552     int column = 0;
1553     hmm.emptyNodeLookup();
1554     for (char residue : sequence)
1555     {
1556       if (!Comparison.isGap(residue))
1557       {
1558         hmm.setAlignmentColumn(node, column);
1559         node++;
1560       }
1561       column++;
1562     }
1563
1564   }
1565
1566   @Override
1567   public boolean isHMMConsensusSequence()
1568   {
1569     return isHMMConsensusSequence;
1570   }
1571
1572   @Override
1573   public void setIsHMMConsensusSequence(boolean isHMMConsensusSequence)
1574   {
1575     this.isHMMConsensusSequence = isHMMConsensusSequence;
1576   }
1577
1578   @Override
1579   public boolean hasHMMAnnotation()
1580   {
1581     return hasInfo;
1582   }
1583
1584   @Override
1585   public void setHasInfo(boolean status)
1586   {
1587     hasInfo = true;
1588   }
1589
1590   @Override
1591   public int getPreviousPosition()
1592   {
1593     return previousPosition;
1594   }
1595
1596   @Override
1597   public void setPreviousPosition(int previousPosition)
1598   {
1599     this.previousPosition = previousPosition;
1600   }
1601
1602 }