src/jalview/datamodel/Sequence.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2b1)
   3  * Copyright (C) 2014 The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import java.util.ArrayList;
  24 import java.util.Enumeration;
  25 import java.util.List;
  26 import java.util.Vector;
  27
  28 import fr.orsay.lri.varna.models.rna.RNA;
  29
  30 import jalview.analysis.AlignSeq;
  31
  32 /**
  33  *
  34  * Implements the SequenceI interface for a char[] based sequence object.
  35  *
  36  * @author $author$
  37  * @version $Revision$
  38  */
  39 public class Sequence implements SequenceI
  40 {
  41   SequenceI datasetSequence;
  42
  43   String name;
  44
  45   private char[] sequence;
  46
  47   String description;
  48
  49   int start;
  50
  51   int end;
  52
  53   Vector pdbIds;
  54
  55   String vamsasId;
  56
  57   DBRefEntry[] dbrefs;
  58
  59   RNA rna;
  60
  61   /**
  62    * This annotation is displayed below the alignment but the positions are tied
  63    * to the residues of this sequence
  64    *
  65    * TODO: change to List<>
  66    */
  67   Vector<AlignmentAnnotation> annotation;
  68
  69   /**
  70    * The index of the sequence in a MSA
  71    */
  72   int index = -1;
  73
  74   /** array of sequence features - may not be null for a valid sequence object */
  75   public SequenceFeature[] sequenceFeatures;
  76
  77   /**
  78    * Creates a new Sequence object.
  79    *
  80    * @param name
  81    *          display name string
  82    * @param sequence
  83    *          string to form a possibly gapped sequence out of
  84    * @param start
  85    *          first position of non-gap residue in the sequence
  86    * @param end
  87    *          last position of ungapped residues (nearly always only used for
  88    *          display purposes)
  89    */
  90   public Sequence(String name, String sequence, int start, int end)
  91   {
  92     this.name = name;
  93     this.sequence = sequence.toCharArray();
  94     this.start = start;
  95     this.end = end;
  96     parseId();
  97     checkValidRange();
  98   }
  99
 100   public Sequence(String name, char[] sequence, int start, int end)
 101   {
 102     this.name = name;
 103     this.sequence = sequence;
 104     this.start = start;
 105     this.end = end;
 106     parseId();
 107     checkValidRange();
 108   }
 109
 110   com.stevesoft.pat.Regex limitrx = new com.stevesoft.pat.Regex(
 111           "[/][0-9]{1,}[-][0-9]{1,}$");
 112
 113   com.stevesoft.pat.Regex endrx = new com.stevesoft.pat.Regex("[0-9]{1,}$");
 114
 115   void parseId()
 116   {
 117     if (name == null)
 118     {
 119       System.err
 120               .println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
 121       name = "";
 122     }
 123     // Does sequence have the /start-end signiature?
 124     if (limitrx.search(name))
 125     {
 126       name = limitrx.left();
 127       endrx.search(limitrx.stringMatched());
 128       setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
 129               endrx.matchedFrom() - 1)));
 130       setEnd(Integer.parseInt(endrx.stringMatched()));
 131     }
 132   }
 133
 134   void checkValidRange()
 135   {
 136     // Note: JAL-774 :
 137     // http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
 138     {
 139       int endRes = 0;
 140       for (int j = 0; j < sequence.length; j++)
 141       {
 142         if (!jalview.util.Comparison.isGap(sequence[j]))
 143         {
 144           endRes++;
 145         }
 146       }
 147       if (endRes > 0)
 148       {
 149         endRes += start - 1;
 150       }
 151
 152       if (end < endRes)
 153       {
 154         end = endRes;
 155       }
 156     }
 157
 158   }
 159
 160   /**
 161    * Creates a new Sequence object.
 162    *
 163    * @param name
 164    *          DOCUMENT ME!
 165    * @param sequence
 166    *          DOCUMENT ME!
 167    */
 168   public Sequence(String name, String sequence)
 169   {
 170     this(name, sequence, 1, -1);
 171   }
 172
 173   /**
 174    * Creates a new Sequence object with new features, DBRefEntries,
 175    * AlignmentAnnotations, and PDBIds but inherits any existing dataset sequence
 176    * reference.
 177    *
 178    * @param seq
 179    *          DOCUMENT ME!
 180    */
 181   public Sequence(SequenceI seq)
 182   {
 183     this(seq, seq.getAnnotation());
 184   }
 185
 186   /**
 187    * Create a new sequence object with new features, DBRefEntries, and PDBIds
 188    * but inherits any existing dataset sequence reference, and duplicate of any
 189    * annotation that is present in the given annotation array.
 190    *
 191    * @param seq
 192    *          the sequence to be copied
 193    * @param alAnnotation
 194    *          an array of annotation including some associated with seq
 195    */
 196   public Sequence(SequenceI seq, AlignmentAnnotation[] alAnnotation)
 197   {
 198     this(seq.getName(), seq.getSequence(), seq.getStart(), seq.getEnd());
 199     description = seq.getDescription();
 200     if (seq.getSequenceFeatures() != null)
 201     {
 202       SequenceFeature[] sf = seq.getSequenceFeatures();
 203       for (int i = 0; i < sf.length; i++)
 204       {
 205         addSequenceFeature(new SequenceFeature(sf[i]));
 206       }
 207     }
 208     setDatasetSequence(seq.getDatasetSequence());
 209     if (datasetSequence == null && seq.getDBRef() != null)
 210     {
 211       // only copy DBRefs if we really are a dataset sequence
 212       DBRefEntry[] dbr = seq.getDBRef();
 213       for (int i = 0; i < dbr.length; i++)
 214       {
 215         addDBRef(new DBRefEntry(dbr[i]));
 216       }
 217     }
 218     if (seq.getAnnotation() != null)
 219     {
 220       AlignmentAnnotation[] sqann = seq.getAnnotation();
 221       for (int i = 0; i < sqann.length; i++)
 222       {
 223         if (sqann[i] == null)
 224         {
 225           continue;
 226         }
 227         boolean found = (alAnnotation == null);
 228         if (!found)
 229         {
 230           for (int apos = 0; !found && apos < alAnnotation.length; apos++)
 231           {
 232             found = (alAnnotation[apos] == sqann[i]);
 233           }
 234         }
 235         if (found)
 236         {
 237           // only copy the given annotation
 238           AlignmentAnnotation newann = new AlignmentAnnotation(sqann[i]);
 239           addAlignmentAnnotation(newann);
 240         }
 241       }
 242     }
 243     if (seq.getPDBId() != null)
 244     {
 245       Vector ids = seq.getPDBId();
 246       Enumeration e = ids.elements();
 247       while (e.hasMoreElements())
 248       {
 249         this.addPDBId(new PDBEntry((PDBEntry) e.nextElement()));
 250       }
 251     }
 252   }
 253
 254   /**
 255    * DOCUMENT ME!
 256    *
 257    * @param v
 258    *          DOCUMENT ME!
 259    */
 260   public void setSequenceFeatures(SequenceFeature[] features)
 261   {
 262     sequenceFeatures = features;
 263   }
 264
 265   public synchronized void addSequenceFeature(SequenceFeature sf)
 266   {
 267     if (sequenceFeatures == null)
 268     {
 269       sequenceFeatures = new SequenceFeature[0];
 270     }
 271
 272     for (int i = 0; i < sequenceFeatures.length; i++)
 273     {
 274       if (sequenceFeatures[i].equals(sf))
 275       {
 276         return;
 277       }
 278     }
 279
 280     SequenceFeature[] temp = new SequenceFeature[sequenceFeatures.length + 1];
 281     System.arraycopy(sequenceFeatures, 0, temp, 0, sequenceFeatures.length);
 282     temp[sequenceFeatures.length] = sf;
 283
 284     sequenceFeatures = temp;
 285   }
 286
 287   public void deleteFeature(SequenceFeature sf)
 288   {
 289     if (sequenceFeatures == null)
 290     {
 291       return;
 292     }
 293
 294     int index = 0;
 295     for (index = 0; index < sequenceFeatures.length; index++)
 296     {
 297       if (sequenceFeatures[index].equals(sf))
 298       {
 299         break;
 300       }
 301     }
 302
 303     if (index == sequenceFeatures.length)
 304     {
 305       return;
 306     }
 307
 308     int sfLength = sequenceFeatures.length;
 309     if (sfLength < 2)
 310     {
 311       sequenceFeatures = null;
 312     }
 313     else
 314     {
 315       SequenceFeature[] temp = new SequenceFeature[sfLength - 1];
 316       System.arraycopy(sequenceFeatures, 0, temp, 0, index);
 317
 318       if (index < sfLength)
 319       {
 320         System.arraycopy(sequenceFeatures, index + 1, temp, index,
 321                 sequenceFeatures.length - index - 1);
 322       }
 323
 324       sequenceFeatures = temp;
 325     }
 326   }
 327
 328   /**
 329    * DOCUMENT ME!
 330    *
 331    * @return DOCUMENT ME!
 332    */
 333   public SequenceFeature[] getSequenceFeatures()
 334   {
 335     return sequenceFeatures;
 336   }
 337
 338   public void addPDBId(PDBEntry entry)
 339   {
 340     if (pdbIds == null)
 341     {
 342       pdbIds = new Vector<PDBEntry>();
 343     }
 344     if (!pdbIds.contains(entry))
 345     {
 346       pdbIds.addElement(entry);
 347     }
 348   }
 349
 350   /**
 351    * DOCUMENT ME!
 352    *
 353    * @param id
 354    *          DOCUMENT ME!
 355    */
 356   public void setPDBId(Vector id)
 357   {
 358     pdbIds = id;
 359   }
 360
 361   /**
 362    * DOCUMENT ME!
 363    *
 364    * @return DOCUMENT ME!
 365    */
 366   public Vector<PDBEntry> getPDBId()
 367   {
 368     return pdbIds;
 369   }
 370
 371   /**
 372    * DOCUMENT ME!
 373    *
 374    * @return DOCUMENT ME!
 375    */
 376   public String getDisplayId(boolean jvsuffix)
 377   {
 378     StringBuffer result = new StringBuffer(name);
 379     if (jvsuffix)
 380     {
 381       result.append("/" + start + "-" + end);
 382     }
 383
 384     return result.toString();
 385   }
 386
 387   /**
 388    * DOCUMENT ME!
 389    *
 390    * @param name
 391    *          DOCUMENT ME!
 392    */
 393   public void setName(String name)
 394   {
 395     this.name = name;
 396     this.parseId();
 397   }
 398
 399   /**
 400    * DOCUMENT ME!
 401    *
 402    * @return DOCUMENT ME!
 403    */
 404   public String getName()
 405   {
 406     return this.name;
 407   }
 408
 409   /**
 410    * DOCUMENT ME!
 411    *
 412    * @param start
 413    *          DOCUMENT ME!
 414    */
 415   public void setStart(int start)
 416   {
 417     this.start = start;
 418   }
 419
 420   /**
 421    * DOCUMENT ME!
 422    *
 423    * @return DOCUMENT ME!
 424    */
 425   public int getStart()
 426   {
 427     return this.start;
 428   }
 429
 430   /**
 431    * DOCUMENT ME!
 432    *
 433    * @param end
 434    *          DOCUMENT ME!
 435    */
 436   public void setEnd(int end)
 437   {
 438     this.end = end;
 439   }
 440
 441   /**
 442    * DOCUMENT ME!
 443    *
 444    * @return DOCUMENT ME!
 445    */
 446   public int getEnd()
 447   {
 448     return this.end;
 449   }
 450
 451   /**
 452    * DOCUMENT ME!
 453    *
 454    * @return DOCUMENT ME!
 455    */
 456   public int getLength()
 457   {
 458     return this.sequence.length;
 459   }
 460
 461   /**
 462    * DOCUMENT ME!
 463    *
 464    * @param seq
 465    *          DOCUMENT ME!
 466    */
 467   public void setSequence(String seq)
 468   {
 469     this.sequence = seq.toCharArray();
 470     checkValidRange();
 471   }
 472
 473   public String getSequenceAsString()
 474   {
 475     return new String(sequence);
 476   }
 477
 478   public String getSequenceAsString(int start, int end)
 479   {
 480     return new String(getSequence(start, end));
 481   }
 482
 483   public char[] getSequence()
 484   {
 485     return sequence;
 486   }
 487
 488   /*
 489    * (non-Javadoc)
 490    *
 491    * @see jalview.datamodel.SequenceI#getSequence(int, int)
 492    */
 493   public char[] getSequence(int start, int end)
 494   {
 495     if (start < 0)
 496     {
 497       start = 0;
 498     }
 499     // JBPNote - left to user to pad the result here (TODO:Decide on this
 500     // policy)
 501     if (start >= sequence.length)
 502     {
 503       return new char[0];
 504     }
 505
 506     if (end >= sequence.length)
 507     {
 508       end = sequence.length;
 509     }
 510
 511     char[] reply = new char[end - start];
 512     System.arraycopy(sequence, start, reply, 0, end - start);
 513
 514     return reply;
 515   }
 516
 517   @Override
 518   public SequenceI getSubSequence(int start, int end)
 519   {
 520     if (start < 0)
 521     {
 522       start = 0;
 523     }
 524     char[] seq = getSequence(start, end);
 525     if (seq.length == 0)
 526     {
 527       return null;
 528     }
 529     int nstart = findPosition(start);
 530     int nend = findPosition(end) - 1;
 531     // JBPNote - this is an incomplete copy.
 532     SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
 533     nseq.setDescription(description);
 534     if (datasetSequence != null)
 535     {
 536       nseq.setDatasetSequence(datasetSequence);
 537     }
 538     else
 539     {
 540       nseq.setDatasetSequence(this);
 541     }
 542     return nseq;
 543   }
 544
 545   /**
 546    * DOCUMENT ME!
 547    *
 548    * @param i
 549    *          DOCUMENT ME!
 550    *
 551    * @return DOCUMENT ME!
 552    */
 553   public char getCharAt(int i)
 554   {
 555     if (i < sequence.length)
 556     {
 557       return sequence[i];
 558     }
 559     else
 560     {
 561       return ' ';
 562     }
 563   }
 564
 565   /**
 566    * DOCUMENT ME!
 567    *
 568    * @param desc
 569    *          DOCUMENT ME!
 570    */
 571   public void setDescription(String desc)
 572   {
 573     this.description = desc;
 574   }
 575
 576   /**
 577    * DOCUMENT ME!
 578    *
 579    * @return DOCUMENT ME!
 580    */
 581   public String getDescription()
 582   {
 583     return this.description;
 584   }
 585
 586   /*
 587    * (non-Javadoc)
 588    *
 589    * @see jalview.datamodel.SequenceI#findIndex(int)
 590    */
 591   public int findIndex(int pos)
 592   {
 593     // returns the alignment position for a residue
 594     int j = start;
 595     int i = 0;
 596     // Rely on end being at least as long as the length of the sequence.
 597     while ((i < sequence.length) && (j <= end) && (j <= pos))
 598     {
 599       if (!jalview.util.Comparison.isGap(sequence[i]))
 600       {
 601         j++;
 602       }
 603
 604       i++;
 605     }
 606
 607     if ((j == end) && (j < pos))
 608     {
 609       return end + 1;
 610     }
 611     else
 612     {
 613       return i;
 614     }
 615   }
 616
 617   @Override
 618   public int findPosition(int i)
 619   {
 620     int j = 0;
 621     int pos = start;
 622     int seqlen = sequence.length;
 623     while ((j < i) && (j < seqlen))
 624     {
 625       if (!jalview.util.Comparison.isGap(sequence[j]))
 626       {
 627         pos++;
 628       }
 629
 630       j++;
 631     }
 632
 633     return pos;
 634   }
 635
 636   /**
 637    * Returns an int array where indices correspond to each residue in the
 638    * sequence and the element value gives its position in the alignment
 639    *
 640    * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
 641    *         residues in SequenceI object
 642    */
 643   public int[] gapMap()
 644   {
 645     String seq = jalview.analysis.AlignSeq.extractGaps(
 646             jalview.util.Comparison.GapChars, new String(sequence));
 647     int[] map = new int[seq.length()];
 648     int j = 0;
 649     int p = 0;
 650
 651     while (j < sequence.length)
 652     {
 653       if (!jalview.util.Comparison.isGap(sequence[j]))
 654       {
 655         map[p++] = j;
 656       }
 657
 658       j++;
 659     }
 660
 661     return map;
 662   }
 663
 664   @Override
 665   public int[] findPositionMap()
 666   {
 667     int map[] = new int[sequence.length];
 668     int j = 0;
 669     int pos = start;
 670     int seqlen = sequence.length;
 671     while ((j < seqlen))
 672     {
 673       map[j] = pos;
 674       if (!jalview.util.Comparison.isGap(sequence[j]))
 675       {
 676         pos++;
 677       }
 678
 679       j++;
 680     }
 681     return map;
 682   }
 683
 684   /*
 685    * (non-Javadoc)
 686    *
 687    * @see jalview.datamodel.SequenceI#deleteChars(int, int)
 688    */
 689    @Override
 690   public void deleteChars(int i, int j)
 691   {
 692     int newstart = start, newend = end;
 693     if (i >= sequence.length)
 694     {
 695       return;
 696     }
 697
 698     char[] tmp;
 699
 700     if (j >= sequence.length)
 701     {
 702       tmp = new char[i];
 703       System.arraycopy(sequence, 0, tmp, 0, i);
 704       j = sequence.length;
 705     }
 706     else
 707     {
 708       tmp = new char[sequence.length - j + i];
 709       System.arraycopy(sequence, 0, tmp, 0, i);
 710       System.arraycopy(sequence, j, tmp, i, sequence.length - j);
 711     }
 712     boolean createNewDs = false;
 713     // TODO: take a look at the new dataset creation validation method below -
 714     // this could become time comsuming for large sequences - consider making it
 715     // more efficient
 716     for (int s = i; s < j; s++)
 717     {
 718       if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
 719       {
 720         if (createNewDs)
 721         {
 722           newend--;
 723         }
 724         else
 725         {
 726           int sindex = findIndex(start) - 1;
 727           if (sindex == s)
 728           {
 729             // delete characters including start of sequence
 730             newstart = findPosition(j);
 731             break; // don't need to search for any more residue characters.
 732           }
 733           else
 734           {
 735             // delete characters after start.
 736             int eindex = findIndex(end) - 1;
 737             if (eindex < j)
 738             {
 739               // delete characters at end of sequence
 740               newend = findPosition(i - 1);
 741               break; // don't need to search for any more residue characters.
 742             }
 743             else
 744             {
 745               createNewDs = true;
 746               newend--; // decrease end position by one for the deleted residue
 747               // and search further
 748             }
 749           }
 750         }
 751       }
 752     }
 753     // deletion occured in the middle of the sequence
 754     if (createNewDs && this.datasetSequence != null)
 755     {
 756       // construct a new sequence
 757       Sequence ds = new Sequence(datasetSequence);
 758       // TODO: remove any non-inheritable properties ?
 759       // TODO: create a sequence mapping (since there is a relation here ?)
 760       ds.deleteChars(i, j);
 761       datasetSequence = ds;
 762     }
 763     start = newstart;
 764     end = newend;
 765     sequence = tmp;
 766   }
 767
 768    @Override
 769   public void insertCharAt(int i, int length, char c)
 770   {
 771     char[] tmp = new char[sequence.length + length];
 772
 773     if (i >= sequence.length)
 774     {
 775       System.arraycopy(sequence, 0, tmp, 0, sequence.length);
 776       i = sequence.length;
 777     }
 778     else
 779     {
 780       System.arraycopy(sequence, 0, tmp, 0, i);
 781     }
 782
 783     int index = i;
 784     while (length > 0)
 785     {
 786       tmp[index++] = c;
 787       length--;
 788     }
 789
 790     if (i < sequence.length)
 791     {
 792       System.arraycopy(sequence, i, tmp, index, sequence.length - i);
 793     }
 794
 795     sequence = tmp;
 796   }
 797
 798   @Override
 799   public void insertCharAt(int i, char c)
 800   {
 801     insertCharAt(i, 1, c);
 802   }
 803
 804   @Override
 805   public String getVamsasId()
 806   {
 807     return vamsasId;
 808   }
 809
 810   @Override
 811   public void setVamsasId(String id)
 812   {
 813     vamsasId = id;
 814   }
 815
 816   @Override
 817   public void setDBRef(DBRefEntry[] dbref)
 818   {
 819     dbrefs = dbref;
 820   }
 821
 822   @Override
 823   public DBRefEntry[] getDBRef()
 824   {
 825     if (dbrefs == null && datasetSequence != null
 826             && this != datasetSequence)
 827     {
 828       return datasetSequence.getDBRef();
 829     }
 830     return dbrefs;
 831   }
 832
 833   @Override
 834   public void addDBRef(DBRefEntry entry)
 835   {
 836     if (dbrefs == null)
 837     {
 838       dbrefs = new DBRefEntry[0];
 839     }
 840
 841     int i, iSize = dbrefs.length;
 842
 843     for (i = 0; i < iSize; i++)
 844     {
 845       if (dbrefs[i].equalRef(entry))
 846       {
 847         if (entry.getMap() != null)
 848         {
 849           if (dbrefs[i].getMap() == null)
 850           {
 851             // overwrite with 'superior' entry that contains a mapping.
 852             dbrefs[i] = entry;
 853           }
 854         }
 855         return;
 856       }
 857     }
 858
 859     DBRefEntry[] temp = new DBRefEntry[iSize + 1];
 860     System.arraycopy(dbrefs, 0, temp, 0, iSize);
 861     temp[temp.length - 1] = entry;
 862
 863     dbrefs = temp;
 864   }
 865
 866   @Override
 867   public void setDatasetSequence(SequenceI seq)
 868   {
 869     datasetSequence = seq;
 870   }
 871
 872   @Override
 873   public SequenceI getDatasetSequence()
 874   {
 875     return datasetSequence;
 876   }
 877
 878   /**
 879    * Returns a new array containing this sequence's annotations, or null.
 880    */
 881   @Override
 882   public AlignmentAnnotation[] getAnnotation()
 883   {
 884     return annotation == null ? null : annotation
 885             .toArray(new AlignmentAnnotation[annotation.size()]);
 886   }
 887
 888   /**
 889    * Returns true if this sequence has the given annotation (by object
 890    * identity).
 891    */
 892   @Override
 893   public boolean hasAnnotation(AlignmentAnnotation ann)
 894   {
 895     return annotation == null ? false : annotation.contains(ann);
 896   }
 897
 898   /**
 899    * Add the given annotation, if not already added, and set its sequence ref to
 900    * be this sequence. Does nothing if this sequence's annotations already
 901    * include this annotation (by identical object reference).
 902    */
 903   @Override
 904   public void addAlignmentAnnotation(AlignmentAnnotation annotation)
 905   {
 906     if (this.annotation == null)
 907     {
 908       this.annotation = new Vector();
 909     }
 910     if (!this.annotation.contains(annotation))
 911     {
 912       this.annotation.addElement(annotation);
 913     }
 914     annotation.setSequenceRef(this);
 915   }
 916
 917   public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
 918   {
 919     if (this.annotation != null)
 920     {
 921       this.annotation.removeElement(annotation);
 922       if (this.annotation.size() == 0)
 923       {
 924         this.annotation = null;
 925       }
 926     }
 927   }
 928
 929   /**
 930    * test if this is a valid candidate for another sequence's dataset sequence.
 931    *
 932    */
 933   private boolean isValidDatasetSequence()
 934   {
 935     if (datasetSequence != null)
 936     {
 937       return false;
 938     }
 939     for (int i = 0; i < sequence.length; i++)
 940     {
 941       if (jalview.util.Comparison.isGap(sequence[i]))
 942       {
 943         return false;
 944       }
 945     }
 946     return true;
 947   }
 948
 949   /*
 950    * (non-Javadoc)
 951    *
 952    * @see jalview.datamodel.SequenceI#deriveSequence()
 953    */
 954   @Override
 955   public SequenceI deriveSequence()
 956   {
 957     SequenceI seq = new Sequence(this);
 958     if (datasetSequence != null)
 959     {
 960       // duplicate current sequence with same dataset
 961       seq.setDatasetSequence(datasetSequence);
 962     }
 963     else
 964     {
 965       if (isValidDatasetSequence())
 966       {
 967         // Use this as dataset sequence
 968         seq.setDatasetSequence(this);
 969       }
 970       else
 971       {
 972         // Create a new, valid dataset sequence
 973         SequenceI ds = seq;
 974         ds.setSequence(AlignSeq.extractGaps(
 975                 jalview.util.Comparison.GapChars, new String(sequence)));
 976         setDatasetSequence(ds);
 977         ds.setSequenceFeatures(getSequenceFeatures());
 978         seq = this; // and return this sequence as the derived sequence.
 979       }
 980     }
 981     return seq;
 982   }
 983
 984   /*
 985    * (non-Javadoc)
 986    *
 987    * @see jalview.datamodel.SequenceI#createDatasetSequence()
 988    */
 989   @Override
 990   public SequenceI createDatasetSequence()
 991   {
 992     if (datasetSequence == null)
 993     {
 994       datasetSequence = new Sequence(getName(), AlignSeq.extractGaps(
 995               jalview.util.Comparison.GapChars, getSequenceAsString()),
 996               getStart(), getEnd());
 997       datasetSequence.setSequenceFeatures(getSequenceFeatures());
 998       datasetSequence.setDescription(getDescription());
 999       setSequenceFeatures(null);
1000       // move database references onto dataset sequence
1001       datasetSequence.setDBRef(getDBRef());
1002       setDBRef(null);
1003       datasetSequence.setPDBId(getPDBId());
1004       setPDBId(null);
1005       datasetSequence.updatePDBIds();
1006       if (annotation != null)
1007       {
1008         for (AlignmentAnnotation aa : annotation)
1009         {
1010           AlignmentAnnotation _aa = new AlignmentAnnotation(aa);
1011           _aa.sequenceRef = datasetSequence;
1012           _aa.adjustForAlignment(); // uses annotation's own record of
1013                                    // sequence-column mapping
1014           datasetSequence.addAlignmentAnnotation(_aa);
1015         }
1016       }
1017     }
1018     return datasetSequence;
1019   }
1020
1021   /*
1022    * (non-Javadoc)
1023    *
1024    * @see
1025    * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
1026    * annotations)
1027    */
1028   public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
1029   {
1030     if (annotation != null)
1031     {
1032       annotation.removeAllElements();
1033     }
1034     if (annotations != null)
1035     {
1036       for (int i = 0; i < annotations.length; i++)
1037       {
1038         if (annotations[i] != null)
1039         {
1040           addAlignmentAnnotation(annotations[i]);
1041         }
1042       }
1043     }
1044   }
1045
1046   /*
1047    * (non-Javadoc)
1048    *
1049    * @see jalview.datamodel.SequenceI#getAnnotation(java.lang.String)
1050    */
1051   @Override
1052   public AlignmentAnnotation[] getAnnotation(String label)
1053   {
1054     if (annotation == null || annotation.size() == 0)
1055     {
1056       return null;
1057     }
1058
1059     Vector subset = new Vector();
1060     Enumeration e = annotation.elements();
1061     while (e.hasMoreElements())
1062     {
1063       AlignmentAnnotation ann = (AlignmentAnnotation) e.nextElement();
1064       if (ann.label != null && ann.label.equals(label))
1065       {
1066         subset.addElement(ann);
1067       }
1068     }
1069     if (subset.size() == 0)
1070     {
1071       return null;
1072     }
1073     AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
1074     int i = 0;
1075     e = subset.elements();
1076     while (e.hasMoreElements())
1077     {
1078       anns[i++] = (AlignmentAnnotation) e.nextElement();
1079     }
1080     subset.removeAllElements();
1081     return anns;
1082   }
1083
1084   @Override
1085   public boolean updatePDBIds()
1086   {
1087     if (datasetSequence != null)
1088     {
1089       // TODO: could merge DBRefs
1090       return datasetSequence.updatePDBIds();
1091     }
1092     if (dbrefs == null || dbrefs.length == 0)
1093     {
1094       return false;
1095     }
1096     Vector newpdb = new Vector();
1097     for (int i = 0; i < dbrefs.length; i++)
1098     {
1099       if (DBRefSource.PDB.equals(dbrefs[i].getSource()))
1100       {
1101         PDBEntry pdbe = new PDBEntry();
1102         pdbe.setId(dbrefs[i].getAccessionId());
1103         if (pdbIds == null || pdbIds.size() == 0)
1104         {
1105           newpdb.addElement(pdbe);
1106         }
1107         else
1108         {
1109           Enumeration en = pdbIds.elements();
1110           boolean matched = false;
1111           while (!matched && en.hasMoreElements())
1112           {
1113             PDBEntry anentry = (PDBEntry) en.nextElement();
1114             if (anentry.getId().equals(pdbe.getId()))
1115             {
1116               matched = true;
1117             }
1118           }
1119           if (!matched)
1120           {
1121             newpdb.addElement(pdbe);
1122           }
1123         }
1124       }
1125     }
1126     if (newpdb.size() > 0)
1127     {
1128       Enumeration en = newpdb.elements();
1129       while (en.hasMoreElements())
1130       {
1131         addPDBId((PDBEntry) en.nextElement());
1132       }
1133       return true;
1134     }
1135     return false;
1136   }
1137
1138   /*
1139    * (non-Javadoc)
1140    *
1141    * @see
1142    * jalview.datamodel.SequenceI#transferAnnotation(jalview.datamodel.SequenceI,
1143    * jalview.datamodel.Mapping)
1144    */
1145   @Override
1146   public void transferAnnotation(SequenceI entry, Mapping mp)
1147   {
1148     if (datasetSequence != null)
1149     {
1150       datasetSequence.transferAnnotation(entry, mp);
1151       return;
1152     }
1153     if (entry.getDatasetSequence() != null)
1154     {
1155       transferAnnotation(entry.getDatasetSequence(), mp);
1156       return;
1157     }
1158     // transfer any new features from entry onto sequence
1159     if (entry.getSequenceFeatures() != null)
1160     {
1161
1162       SequenceFeature[] sfs = entry.getSequenceFeatures();
1163       for (int si = 0; si < sfs.length; si++)
1164       {
1165         SequenceFeature sf[] = (mp != null) ? mp.locateFeature(sfs[si])
1166                 : new SequenceFeature[]
1167                 { new SequenceFeature(sfs[si]) };
1168         if (sf != null && sf.length > 0)
1169         {
1170           for (int sfi = 0; sfi < sf.length; sfi++)
1171           {
1172             addSequenceFeature(sf[sfi]);
1173           }
1174         }
1175       }
1176     }
1177
1178     // transfer PDB entries
1179     if (entry.getPDBId() != null)
1180     {
1181       Enumeration e = entry.getPDBId().elements();
1182       while (e.hasMoreElements())
1183       {
1184         PDBEntry pdb = (PDBEntry) e.nextElement();
1185         addPDBId(pdb);
1186       }
1187     }
1188     // transfer database references
1189     DBRefEntry[] entryRefs = entry.getDBRef();
1190     if (entryRefs != null)
1191     {
1192       for (int r = 0; r < entryRefs.length; r++)
1193       {
1194         DBRefEntry newref = new DBRefEntry(entryRefs[r]);
1195         if (newref.getMap() != null && mp != null)
1196         {
1197           // remap ref using our local mapping
1198         }
1199         // we also assume all version string setting is done by dbSourceProxy
1200         /*
1201          * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
1202          * newref.setSource(dbSource); }
1203          */
1204         addDBRef(newref);
1205       }
1206     }
1207   }
1208
1209   /**
1210    * @return The index (zero-based) on this sequence in the MSA. It returns
1211    *         {@code -1} if this information is not available.
1212    */
1213   public int getIndex()
1214   {
1215     return index;
1216   }
1217
1218   /**
1219    * Defines the position of this sequence in the MSA. Use the value {@code -1}
1220    * if this information is undefined.
1221    *
1222    * @param The
1223    *          position for this sequence. This value is zero-based (zero for
1224    *          this first sequence)
1225    */
1226   public void setIndex(int value)
1227   {
1228     index = value;
1229   }
1230
1231   public void setRNA(RNA r)
1232   {
1233     rna = r;
1234   }
1235
1236   public RNA getRNA()
1237   {
1238     return rna;
1239   }
1240
1241   /**
1242    * Returns a (possibly empty) list of any annotations that match on given
1243    * calcId (source) and label (type). Null values do not match.
1244    *
1245    * @param calcId
1246    * @param label
1247    */
1248   @Override
1249   public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
1250           String label)
1251   {
1252     List<AlignmentAnnotation> result = new ArrayList<AlignmentAnnotation>();
1253     if (this.annotation != null) {
1254       for (AlignmentAnnotation ann : annotation) {
1255         if (ann.calcId != null && ann.calcId.equals(calcId)
1256                 && ann.label != null && ann.label.equals(label))
1257         {
1258           result.add(ann);
1259         }
1260       }
1261     }
1262     return result;
1263   }
1264
1265 }