2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.datamodel;
21 import jalview.analysis.*;
23 import jalview.util.*;
27 /** Data structure to hold and manipulate a multiple sequence alignment
29 public class Alignment implements AlignmentI
31 protected Alignment dataset;
32 protected Vector sequences;
33 protected Vector groups = new Vector();
34 protected char gapCharacter = '-';
35 protected int type = NUCLEOTIDE;
36 public static final int PROTEIN = 0;
37 public static final int NUCLEOTIDE = 1;
40 public AlignmentAnnotation[] annotations;
42 HiddenSequences hiddenSequences = new HiddenSequences(this);
44 private void initAlignment(SequenceI[] seqs) {
47 if( jalview.util.Comparison.isNucleotide(seqs))
52 sequences = new Vector();
54 for (i = 0; i < seqs.length; i++)
56 sequences.addElement(seqs[i]);
60 /** Make an alignment from an array of Sequences.
64 public Alignment(SequenceI[] seqs)
69 * Make a new alignment from an array of SeqCigars
70 * @param seqs SeqCigar[]
72 public Alignment(SeqCigar[] alseqs) {
74 SequenceI[] seqs = new SequenceI[alseqs.length];
75 for (int i=0; i<alseqs.length; i++) {
76 seqs[i] = alseqs[i].getSeq(this.gapCharacter);
81 * Make a new alignment from an CigarArray
82 * JBPNote - can only do this when compactAlignment does not contain hidden regions.
83 * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately.
84 * @param compactAlignment CigarArray
86 public Alignment(CigarArray compactAlignment) {
87 throw new Error("Alignment(CigarArray) not yet implemented");
88 // this(compactAlignment.refCigars);
94 * @return DOCUMENT ME!
96 public Vector getSequences()
101 public SequenceI [] getSequencesArray()
103 SequenceI [] reply = new SequenceI[sequences.size()];
104 for(int i=0; i<sequences.size(); i++)
106 reply[i] = (SequenceI)sequences.elementAt(i);
114 * @param i DOCUMENT ME!
116 * @return DOCUMENT ME!
118 public SequenceI getSequenceAt(int i)
120 if (i < sequences.size())
122 return (SequenceI) sequences.elementAt(i);
128 /** Adds a sequence to the alignment. Recalculates maxLength and size.
132 public void addSequence(SequenceI snew)
136 if(snew.getDatasetSequence()!=null)
138 System.out.println(snew.getName());
139 getDataset().addSequence(snew.getDatasetSequence());
143 Sequence ds = new Sequence(snew.getName(),
144 AlignSeq.extractGaps("-. ",
149 snew.setDatasetSequence(ds);
150 getDataset().addSequence(ds);
154 sequences.addElement(snew);
158 /** Adds a sequence to the alignment. Recalculates maxLength and size.
162 public void setSequenceAt(int i, SequenceI snew)
164 SequenceI oldseq = getSequenceAt(i);
165 deleteSequence(oldseq);
167 sequences.setElementAt(snew, i);
173 * @return DOCUMENT ME!
175 public Vector getGroups()
180 /** Takes out columns consisting entirely of gaps (-,.," ")
182 public void removeGaps()
184 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
185 int j, jSize = seqs.length;
188 for (int i = 0; i < jSize; i++)
190 if (seqs[i].getLength() > width)
192 width = seqs[i].getLength();
196 int startCol = -1, endCol = -1;
197 boolean delete = true;
198 for (int i = 0; i < width; i++)
202 for (j = 0; j < jSize; j++)
204 if (seqs[j].getLength() > i)
206 if (!jalview.util.Comparison.isGap(seqs[j].getCharAt(i)))
217 if(delete && startCol==-1)
223 if (!delete && startCol > -1)
225 deleteColumns(seqs, startCol, endCol);
226 width -= (endCol - startCol);
227 i -= (endCol - startCol);
233 if (delete && startCol > -1)
235 deleteColumns(seqs, startCol, endCol);
239 /** Removes a range of columns (start to end inclusive).
241 * @param seqs Sequences to remove columns from
242 * @param start Start column in the alignment
243 * @param end End column in the alignment
245 public void deleteColumns(SequenceI [] seqs, int start, int end)
247 for(int i=0; i<seqs.length; i++)
248 seqs[i].deleteChars(start, end);
255 * @param i DOCUMENT ME!
257 public void trimLeft(int i)
259 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
260 int j, jSize = seqs.length;
261 for (j = 0; j < jSize; j++)
263 int newstart = seqs[j].findPosition(i);
265 if(i>seqs[j].getLength())
267 sequences.removeElement(seqs[j]);
273 seqs[j].setStart(newstart);
274 seqs[j].setSequence(seqs[j].getSequence().substring(i));
282 * @param i DOCUMENT ME!
284 public void trimRight(int i)
286 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
287 int j, jSize = seqs.length;
288 for (j = 0; j < jSize; j++)
290 int newend = seqs[j].findPosition(i);
292 seqs[j].setEnd(newend);
293 if(seqs[j].getLength()>i)
294 seqs[j].setSequence(seqs[j].getSequence().substring(0, i + 1));
301 * @param s DOCUMENT ME!
303 public void deleteSequence(SequenceI s)
305 for (int i = 0; i < getHeight(); i++)
307 if (getSequenceAt(i) == s)
317 * @param i DOCUMENT ME!
319 public void deleteSequence(int i)
321 sequences.removeElementAt(i);
326 public SequenceGroup findGroup(SequenceI s)
328 for (int i = 0; i < this.groups.size(); i++)
330 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
332 if (sg.getSequences(false).contains(s))
344 * @param s DOCUMENT ME!
346 * @return DOCUMENT ME!
348 public SequenceGroup[] findAllGroups(SequenceI s)
350 Vector temp = new Vector();
352 int gSize = groups.size();
353 for (int i = 0; i < gSize; i++)
355 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
356 if(sg==null || sg.getSequences(false)==null)
358 this.deleteGroup(sg);
363 if (sg.getSequences(false).contains(s))
369 SequenceGroup[] ret = new SequenceGroup[temp.size()];
371 for (int i = 0; i < temp.size(); i++)
373 ret[i] = (SequenceGroup) temp.elementAt(i);
382 public void addGroup(SequenceGroup sg)
384 if (!groups.contains(sg))
386 groups.addElement(sg);
393 public void deleteAllGroups()
395 groups.removeAllElements();
399 while (i < sequences.size())
401 SequenceI s = getSequenceAt(i);
402 s.setColor(java.awt.Color.white);
408 public void deleteGroup(SequenceGroup g)
410 if (groups.contains(g))
412 groups.removeElement(g);
417 public SequenceI findName(String name)
421 while (i < sequences.size())
423 if (getSequenceAt(i).getName().equals(name))
425 return getSequenceAt(i);
436 public int findIndex(SequenceI s)
440 while (i < sequences.size())
442 if (s == getSequenceAt(i))
456 * @return DOCUMENT ME!
458 public int getHeight()
460 return sequences.size();
466 * @return DOCUMENT ME!
468 public int getWidth()
472 for (int i = 0; i < sequences.size(); i++)
474 if (getSequenceAt(i).getLength() > maxLength)
476 maxLength = getSequenceAt(i).getLength();
486 * @return DOCUMENT ME!
488 public int getMaxIdLength()
493 while (i < sequences.size())
495 SequenceI seq = getSequenceAt(i);
496 String tmp = seq.getName() + "/" + seq.getStart() + "-" +
499 if (tmp.length() > max)
513 * @param gc DOCUMENT ME!
515 public void setGapCharacter(char gc)
519 for (int i = 0; i < sequences.size(); i++)
521 Sequence seq = (Sequence) sequences.elementAt(i);
522 seq.setSequence( seq.getSequence().replace('.', gc) );
523 seq.setSequence( seq.getSequence().replace('-', gc) );
524 seq.setSequence( seq.getSequence().replace(' ', gc) );
531 * @return DOCUMENT ME!
533 public char getGapCharacter()
541 * @return DOCUMENT ME!
543 public Vector getAAFrequency()
545 return AAFrequency.calculate(sequences, 0, getWidth());
551 * @return DOCUMENT ME!
553 public boolean isAligned()
555 int width = getWidth();
557 for (int i = 0; i < sequences.size(); i++)
559 if (getSequenceAt(i).getLength() != width)
571 * @param aa DOCUMENT ME!
573 public void deleteAnnotation(AlignmentAnnotation aa)
577 if (annotations != null)
579 aSize = annotations.length;
582 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
586 for (int i = 0; i < aSize; i++)
588 if (annotations[i] == aa)
593 temp[tIndex] = annotations[i];
601 public void adjustSequenceAnnotations()
603 if(annotations!=null)
605 for (int a = 0; a < annotations.length; a++)
607 if (annotations[a].sequenceRef != null)
609 annotations[a].adjustForAlignment();
618 * @param aa DOCUMENT ME!
620 public void addAnnotation(AlignmentAnnotation aa)
623 if (annotations != null)
625 aSize = annotations.length + 1;
628 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
636 for (i = 0; i < (aSize-1); i++)
638 temp[i] = annotations[i];
645 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
647 if(aa==null || annotations==null || annotations.length-1<index)
650 int aSize = annotations.length;
651 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
655 for (int i = 0; i < aSize; i++)
661 temp[i] = annotations[i];
663 temp[i] = annotations[i-1];
672 * @return DOCUMENT ME!
674 public AlignmentAnnotation[] getAlignmentAnnotation()
679 public void setNucleotide(boolean b)
687 public boolean isNucleotide()
695 public void setDataset(Alignment data)
697 if(dataset==null && data==null)
699 // Create a new dataset for this alignment.
700 // Can only be done once, if dataset is not null
701 // This will not be performed
702 Sequence[] seqs = new Sequence[getHeight()];
703 for (int i = 0; i < getHeight(); i++)
705 if(getSequenceAt(i).getDatasetSequence()!=null)
707 seqs[i] = (Sequence)getSequenceAt(i).getDatasetSequence();
711 seqs[i] = new Sequence(getSequenceAt(i).getName(),
712 AlignSeq.extractGaps(
713 jalview.util.Comparison.GapChars,
714 getSequenceAt(i).getSequence()
716 getSequenceAt(i).getStart(),
717 getSequenceAt(i).getEnd());
719 getSequenceAt(i).setDatasetSequence(seqs[i]);
723 dataset = new Alignment(seqs);
725 else if(dataset==null && data!=null)
731 public Alignment getDataset()
736 public boolean padGaps() {
737 boolean modified=false;
739 //Remove excess gaps from the end of alignment
743 for (int i = 0; i < sequences.size(); i++)
745 current = getSequenceAt(i);
746 for (int j = current.getLength(); j > maxLength; j--)
748 if (j > maxLength && !jalview.util.Comparison.isGap(
749 current.getCharAt(j)))
759 for (int i = 0; i < sequences.size();
762 current = getSequenceAt(i);
764 if (current.getLength() < maxLength)
766 current.insertCharAt(maxLength - 1, gapCharacter);
769 else if(current.getLength() > maxLength)
771 current.deleteChars(maxLength, current.getLength());
777 public HiddenSequences getHiddenSequences()
779 return hiddenSequences;
781 SequenceI [] getVisibleAndRepresentedSeqs()
783 if(hiddenSequences==null || hiddenSequences.getSize()<1)
784 return getSequencesArray();
786 Vector seqs = new Vector();
788 SequenceGroup hidden;
789 for (int i = 0; i < sequences.size(); i++)
791 seq = (SequenceI) sequences.elementAt(i);
792 seqs.addElement(seq);
793 hidden = seq.getHiddenSequences();
796 for(int j=0; j<hidden.getSize(false); j++)
798 seqs.addElement(hidden.getSequenceAt(j));
802 SequenceI [] result = new SequenceI[seqs.size()];
803 for(int i=0; i<seqs.size(); i++)
804 result[i] = (SequenceI)seqs.elementAt(i);
810 public CigarArray getCompactAlignment()
812 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
813 for (int i=0; i<sequences.size(); i++) {
814 alseqs[i] = new SeqCigar((SequenceI) sequences.get(i));
816 return new CigarArray(alseqs);