2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.datamodel;
21 import jalview.analysis.*;
23 import jalview.util.*;
27 /** Data structure to hold and manipulate a multiple sequence alignment
29 public class Alignment implements AlignmentI
31 protected Alignment dataset;
32 protected Vector sequences;
33 protected Vector groups = new Vector();
34 protected char gapCharacter = '-';
35 protected int type = NUCLEOTIDE;
36 public static final int PROTEIN = 0;
37 public static final int NUCLEOTIDE = 1;
40 public AlignmentAnnotation[] annotations;
42 HiddenSequences hiddenSequences = new HiddenSequences(this);
44 private void initAlignment(SequenceI[] seqs) {
47 if( jalview.util.Comparison.isNucleotide(seqs))
52 sequences = new Vector();
54 for (i = 0; i < seqs.length; i++)
56 sequences.addElement(seqs[i]);
60 /** Make an alignment from an array of Sequences.
64 public Alignment(SequenceI[] seqs)
69 * Make a new alignment from an array of SeqCigars
70 * @param seqs SeqCigar[]
72 public Alignment(SeqCigar[] alseqs) {
73 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null);
77 * Make a new alignment from an CigarArray
78 * JBPNote - can only do this when compactAlignment does not contain hidden regions.
79 * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately.
80 * @param compactAlignment CigarArray
82 public static AlignmentI createAlignment(CigarArray compactAlignment) {
83 throw new Error("Alignment(CigarArray) not yet implemented");
84 // this(compactAlignment.refCigars);
90 * @return DOCUMENT ME!
92 public Vector getSequences()
97 public SequenceI [] getSequencesArray()
99 SequenceI [] reply = new SequenceI[sequences.size()];
100 for(int i=0; i<sequences.size(); i++)
102 reply[i] = (SequenceI)sequences.elementAt(i);
110 * @param i DOCUMENT ME!
112 * @return DOCUMENT ME!
114 public SequenceI getSequenceAt(int i)
116 if (i < sequences.size())
118 return (SequenceI) sequences.elementAt(i);
124 /** Adds a sequence to the alignment. Recalculates maxLength and size.
128 public void addSequence(SequenceI snew)
132 if(snew.getDatasetSequence()!=null)
134 System.out.println(snew.getName());
135 getDataset().addSequence(snew.getDatasetSequence());
139 Sequence ds = new Sequence(snew.getName(),
140 AlignSeq.extractGaps("-. ",
145 snew.setDatasetSequence(ds);
146 getDataset().addSequence(ds);
150 sequences.addElement(snew);
154 /** Adds a sequence to the alignment. Recalculates maxLength and size.
158 public void setSequenceAt(int i, SequenceI snew)
160 SequenceI oldseq = getSequenceAt(i);
161 deleteSequence(oldseq);
163 sequences.setElementAt(snew, i);
169 * @return DOCUMENT ME!
171 public Vector getGroups()
176 /** Takes out columns consisting entirely of gaps (-,.," ")
178 public void removeGaps()
180 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
181 int j, jSize = seqs.length;
184 for (int i = 0; i < jSize; i++)
186 if (seqs[i].getLength() > width)
188 width = seqs[i].getLength();
192 int startCol = -1, endCol = -1;
193 boolean delete = true;
194 for (int i = 0; i < width; i++)
198 for (j = 0; j < jSize; j++)
200 if (seqs[j].getLength() > i)
202 if (!jalview.util.Comparison.isGap(seqs[j].getCharAt(i)))
213 if(delete && startCol==-1)
219 if (!delete && startCol > -1)
221 deleteColumns(seqs, startCol, endCol);
222 width -= (endCol - startCol);
223 i -= (endCol - startCol);
229 if (delete && startCol > -1)
231 deleteColumns(seqs, startCol, endCol);
235 /** Removes a range of columns (start to end inclusive).
237 * @param seqs Sequences to remove columns from
238 * @param start Start column in the alignment
239 * @param end End column in the alignment
241 public void deleteColumns(SequenceI [] seqs, int start, int end)
243 for(int i=0; i<seqs.length; i++)
244 seqs[i].deleteChars(start, end);
251 * @param i DOCUMENT ME!
253 public void trimLeft(int i)
255 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
256 int j, jSize = seqs.length;
257 for (j = 0; j < jSize; j++)
259 int newstart = seqs[j].findPosition(i);
261 if(i>seqs[j].getLength())
263 sequences.removeElement(seqs[j]);
269 seqs[j].setStart(newstart);
270 seqs[j].setSequence(seqs[j].getSequence().substring(i));
278 * @param i DOCUMENT ME!
280 public void trimRight(int i)
282 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
283 int j, jSize = seqs.length;
284 for (j = 0; j < jSize; j++)
286 int newend = seqs[j].findPosition(i);
288 seqs[j].setEnd(newend);
289 if(seqs[j].getLength()>i)
290 seqs[j].setSequence(seqs[j].getSequence().substring(0, i + 1));
297 * @param s DOCUMENT ME!
299 public void deleteSequence(SequenceI s)
301 for (int i = 0; i < getHeight(); i++)
303 if (getSequenceAt(i) == s)
313 * @param i DOCUMENT ME!
315 public void deleteSequence(int i)
317 sequences.removeElementAt(i);
322 public SequenceGroup findGroup(SequenceI s)
324 for (int i = 0; i < this.groups.size(); i++)
326 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
328 if (sg.getSequences(false).contains(s))
340 * @param s DOCUMENT ME!
342 * @return DOCUMENT ME!
344 public SequenceGroup[] findAllGroups(SequenceI s)
346 Vector temp = new Vector();
348 int gSize = groups.size();
349 for (int i = 0; i < gSize; i++)
351 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
352 if(sg==null || sg.getSequences(false)==null)
354 this.deleteGroup(sg);
359 if (sg.getSequences(false).contains(s))
365 SequenceGroup[] ret = new SequenceGroup[temp.size()];
367 for (int i = 0; i < temp.size(); i++)
369 ret[i] = (SequenceGroup) temp.elementAt(i);
378 public void addGroup(SequenceGroup sg)
380 if (!groups.contains(sg))
382 groups.addElement(sg);
389 public void deleteAllGroups()
391 groups.removeAllElements();
395 while (i < sequences.size())
397 SequenceI s = getSequenceAt(i);
398 s.setColor(java.awt.Color.white);
404 public void deleteGroup(SequenceGroup g)
406 if (groups.contains(g))
408 groups.removeElement(g);
413 public SequenceI findName(String name)
417 while (i < sequences.size())
419 if (getSequenceAt(i).getName().equals(name))
421 return getSequenceAt(i);
432 public int findIndex(SequenceI s)
436 while (i < sequences.size())
438 if (s == getSequenceAt(i))
452 * @return DOCUMENT ME!
454 public int getHeight()
456 return sequences.size();
462 * @return DOCUMENT ME!
464 public int getWidth()
468 for (int i = 0; i < sequences.size(); i++)
470 if (getSequenceAt(i).getLength() > maxLength)
472 maxLength = getSequenceAt(i).getLength();
482 * @return DOCUMENT ME!
484 public int getMaxIdLength()
489 while (i < sequences.size())
491 SequenceI seq = getSequenceAt(i);
492 String tmp = seq.getName() + "/" + seq.getStart() + "-" +
495 if (tmp.length() > max)
509 * @param gc DOCUMENT ME!
511 public void setGapCharacter(char gc)
515 for (int i = 0; i < sequences.size(); i++)
517 Sequence seq = (Sequence) sequences.elementAt(i);
518 seq.setSequence( seq.getSequence().replace('.', gc) );
519 seq.setSequence( seq.getSequence().replace('-', gc) );
520 seq.setSequence( seq.getSequence().replace(' ', gc) );
527 * @return DOCUMENT ME!
529 public char getGapCharacter()
537 * @return DOCUMENT ME!
539 public Vector getAAFrequency()
541 return AAFrequency.calculate(sequences, 0, getWidth());
547 * @return DOCUMENT ME!
549 public boolean isAligned()
551 int width = getWidth();
553 for (int i = 0; i < sequences.size(); i++)
555 if (getSequenceAt(i).getLength() != width)
567 * @param aa DOCUMENT ME!
569 public void deleteAnnotation(AlignmentAnnotation aa)
573 if (annotations != null)
575 aSize = annotations.length;
578 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
582 for (int i = 0; i < aSize; i++)
584 if (annotations[i] == aa)
589 temp[tIndex] = annotations[i];
597 public void adjustSequenceAnnotations()
599 if(annotations!=null)
601 for (int a = 0; a < annotations.length; a++)
603 if (annotations[a].sequenceRef != null)
605 annotations[a].adjustForAlignment();
614 * @param aa DOCUMENT ME!
616 public void addAnnotation(AlignmentAnnotation aa)
619 if (annotations != null)
621 aSize = annotations.length + 1;
624 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
632 for (i = 0; i < (aSize-1); i++)
634 temp[i] = annotations[i];
641 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
643 if(aa==null || annotations==null || annotations.length-1<index)
646 int aSize = annotations.length;
647 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
651 for (int i = 0; i < aSize; i++)
657 temp[i] = annotations[i];
659 temp[i] = annotations[i-1];
668 * @return DOCUMENT ME!
670 public AlignmentAnnotation[] getAlignmentAnnotation()
675 public void setNucleotide(boolean b)
683 public boolean isNucleotide()
691 public void setDataset(Alignment data)
693 if(dataset==null && data==null)
695 // Create a new dataset for this alignment.
696 // Can only be done once, if dataset is not null
697 // This will not be performed
698 Sequence[] seqs = new Sequence[getHeight()];
699 for (int i = 0; i < getHeight(); i++)
701 if(getSequenceAt(i).getDatasetSequence()!=null)
703 seqs[i] = (Sequence)getSequenceAt(i).getDatasetSequence();
707 seqs[i] = new Sequence(getSequenceAt(i).getName(),
708 AlignSeq.extractGaps(
709 jalview.util.Comparison.GapChars,
710 getSequenceAt(i).getSequence()
712 getSequenceAt(i).getStart(),
713 getSequenceAt(i).getEnd());
715 getSequenceAt(i).setDatasetSequence(seqs[i]);
719 dataset = new Alignment(seqs);
721 else if(dataset==null && data!=null)
727 public Alignment getDataset()
732 public boolean padGaps() {
733 boolean modified=false;
735 //Remove excess gaps from the end of alignment
739 for (int i = 0; i < sequences.size(); i++)
741 current = getSequenceAt(i);
742 for (int j = current.getLength(); j > maxLength; j--)
744 if (j > maxLength && !jalview.util.Comparison.isGap(
745 current.getCharAt(j)))
755 for (int i = 0; i < sequences.size();
758 current = getSequenceAt(i);
760 if (current.getLength() < maxLength)
762 current.insertCharAt(maxLength - 1, gapCharacter);
765 else if(current.getLength() > maxLength)
767 current.deleteChars(maxLength, current.getLength());
773 public HiddenSequences getHiddenSequences()
775 return hiddenSequences;
777 SequenceI [] getVisibleAndRepresentedSeqs()
779 if(hiddenSequences==null || hiddenSequences.getSize()<1)
780 return getSequencesArray();
782 Vector seqs = new Vector();
784 SequenceGroup hidden;
785 for (int i = 0; i < sequences.size(); i++)
787 seq = (SequenceI) sequences.elementAt(i);
788 seqs.addElement(seq);
789 hidden = seq.getHiddenSequences();
792 for(int j=0; j<hidden.getSize(false); j++)
794 seqs.addElement(hidden.getSequenceAt(j));
798 SequenceI [] result = new SequenceI[seqs.size()];
799 for(int i=0; i<seqs.size(); i++)
800 result[i] = (SequenceI)seqs.elementAt(i);
806 public CigarArray getCompactAlignment()
808 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
809 for (int i=0; i<sequences.size(); i++) {
810 alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i));
812 return new CigarArray(alseqs);