2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.datamodel;
\r
21 import jalview.analysis.*;
\r
23 import jalview.util.*;
\r
27 /** Data structure to hold and manipulate a multiple sequence alignment
\r
29 public class Alignment implements AlignmentI
\r
31 protected Alignment dataset;
\r
32 protected Vector sequences;
\r
33 protected Vector groups = new Vector();
\r
34 protected char gapCharacter = '-';
\r
35 protected int type = NUCLEOTIDE;
\r
36 public static final int PROTEIN = 0;
\r
37 public static final int NUCLEOTIDE = 1;
\r
39 /** DOCUMENT ME!! */
\r
40 public AlignmentAnnotation[] annotations;
\r
42 HiddenSequences hiddenSequences = new HiddenSequences(this);
\r
45 /** Make an alignment from an array of Sequences.
\r
49 public Alignment(SequenceI[] seqs)
\r
53 if( jalview.util.Comparison.isNucleotide(seqs))
\r
58 sequences = new Vector();
\r
60 for (i = 0; i < seqs.length; i++)
\r
62 sequences.addElement(seqs[i]);
\r
69 * @return DOCUMENT ME!
\r
71 public Vector getSequences()
\r
76 public SequenceI [] getSequencesArray()
\r
78 SequenceI [] reply = new SequenceI[sequences.size()];
\r
79 for(int i=0; i<sequences.size(); i++)
\r
81 reply[i] = (SequenceI)sequences.elementAt(i);
\r
89 * @param i DOCUMENT ME!
\r
91 * @return DOCUMENT ME!
\r
93 public SequenceI getSequenceAt(int i)
\r
95 if (i < sequences.size())
\r
97 return (SequenceI) sequences.elementAt(i);
\r
103 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
107 public void addSequence(SequenceI snew)
\r
111 if(snew.getDatasetSequence()!=null)
\r
113 System.out.println(snew.getName());
\r
114 getDataset().addSequence(snew.getDatasetSequence());
\r
118 Sequence ds = new Sequence(snew.getName(),
\r
119 AlignSeq.extractGaps("-. ",
\r
120 snew.getSequence()),
\r
124 snew.setDatasetSequence(ds);
\r
125 getDataset().addSequence(ds);
\r
129 sequences.addElement(snew);
\r
133 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
137 public void setSequenceAt(int i, SequenceI snew)
\r
139 SequenceI oldseq = getSequenceAt(i);
\r
140 deleteSequence(oldseq);
\r
142 sequences.setElementAt(snew, i);
\r
148 * @return DOCUMENT ME!
\r
150 public Vector getGroups()
\r
155 /** Takes out columns consisting entirely of gaps (-,.," ")
\r
157 public void removeGaps()
\r
159 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
\r
160 int j, jSize = seqs.length;
\r
164 for (int i = 0; i < jSize; i++)
\r
166 if (seqs[i].getLength() > width)
\r
168 width = seqs[i].getLength();
\r
172 int startCol = -1, endCol = -1;
\r
173 boolean delete = true;
\r
174 for (int i = 0; i < width; i++)
\r
178 for (j = 0; j < jSize; j++)
\r
180 current = getSequenceAt(j);
\r
182 if (current.getLength() > i)
\r
184 if (!jalview.util.Comparison.isGap(current.getCharAt(i)))
\r
196 if(delete && startCol==-1)
\r
202 if (!delete && startCol > -1)
\r
204 deleteColumns(seqs, startCol, endCol);
\r
205 width -= (endCol - startCol);
\r
206 i -= (endCol - startCol);
\r
212 if (delete && startCol > -1)
\r
214 deleteColumns(seqs, startCol, endCol);
\r
219 /** Removes a range of columns (start to end inclusive).
\r
221 * @param seqs Sequences to remove columns from
\r
222 * @param start Start column in the alignment
\r
223 * @param end End column in the alignment
\r
225 public void deleteColumns(SequenceI [] seqs, int start, int end)
\r
227 for(int i=0; i<seqs.length; i++)
\r
228 seqs[i].deleteChars(start, end);
\r
235 * @param i DOCUMENT ME!
\r
237 public void trimLeft(int i)
\r
239 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
\r
240 int j, jSize = seqs.length;
\r
241 for (j = 0; j < jSize; j++)
\r
243 int newstart = seqs[j].findPosition(i);
\r
245 if(i>seqs[j].getLength())
\r
247 sequences.removeElement(seqs[j]);
\r
253 seqs[j].setStart(newstart);
\r
254 seqs[j].setSequence(seqs[j].getSequence().substring(i));
\r
262 * @param i DOCUMENT ME!
\r
264 public void trimRight(int i)
\r
266 SequenceI[] seqs = getVisibleAndRepresentedSeqs();
\r
267 int j, jSize = seqs.length;
\r
268 for (j = 0; j < jSize; j++)
\r
270 int newend = seqs[j].findPosition(i);
\r
272 seqs[j].setEnd(newend);
\r
273 if(seqs[j].getLength()>i)
\r
274 seqs[j].setSequence(seqs[j].getSequence().substring(0, i + 1));
\r
281 * @param s DOCUMENT ME!
\r
283 public void deleteSequence(SequenceI s)
\r
285 for (int i = 0; i < getHeight(); i++)
\r
287 if (getSequenceAt(i) == s)
\r
297 * @param i DOCUMENT ME!
\r
299 public void deleteSequence(int i)
\r
301 sequences.removeElementAt(i);
\r
306 public SequenceGroup findGroup(SequenceI s)
\r
308 for (int i = 0; i < this.groups.size(); i++)
\r
310 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
312 if (sg.getSequences(false).contains(s))
\r
324 * @param s DOCUMENT ME!
\r
326 * @return DOCUMENT ME!
\r
328 public SequenceGroup[] findAllGroups(SequenceI s)
\r
330 Vector temp = new Vector();
\r
332 int gSize = groups.size();
\r
333 for (int i = 0; i < gSize; i++)
\r
335 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
336 if(sg==null || sg.getSequences(false)==null)
\r
338 this.deleteGroup(sg);
\r
343 if (sg.getSequences(false).contains(s))
\r
345 temp.addElement(sg);
\r
349 SequenceGroup[] ret = new SequenceGroup[temp.size()];
\r
351 for (int i = 0; i < temp.size(); i++)
\r
353 ret[i] = (SequenceGroup) temp.elementAt(i);
\r
362 public void addGroup(SequenceGroup sg)
\r
364 if (!groups.contains(sg))
\r
366 groups.addElement(sg);
\r
373 public void deleteAllGroups()
\r
375 groups.removeAllElements();
\r
379 while (i < sequences.size())
\r
381 SequenceI s = getSequenceAt(i);
\r
382 s.setColor(java.awt.Color.white);
\r
388 public void deleteGroup(SequenceGroup g)
\r
390 if (groups.contains(g))
\r
392 groups.removeElement(g);
\r
397 public SequenceI findName(String name)
\r
401 while (i < sequences.size())
\r
403 if (getSequenceAt(i).getName().equals(name))
\r
405 return getSequenceAt(i);
\r
416 public int findIndex(SequenceI s)
\r
420 while (i < sequences.size())
\r
422 if (s == getSequenceAt(i))
\r
436 * @return DOCUMENT ME!
\r
438 public int getHeight()
\r
440 return sequences.size();
\r
446 * @return DOCUMENT ME!
\r
448 public int getWidth()
\r
450 int maxLength = -1;
\r
452 for (int i = 0; i < sequences.size(); i++)
\r
454 if (getSequenceAt(i).getLength() > maxLength)
\r
456 maxLength = getSequenceAt(i).getLength();
\r
466 * @return DOCUMENT ME!
\r
468 public int getMaxIdLength()
\r
473 while (i < sequences.size())
\r
475 SequenceI seq = getSequenceAt(i);
\r
476 String tmp = seq.getName() + "/" + seq.getStart() + "-" +
\r
479 if (tmp.length() > max)
\r
481 max = tmp.length();
\r
493 * @param gc DOCUMENT ME!
\r
495 public void setGapCharacter(char gc)
\r
499 for (int i = 0; i < sequences.size(); i++)
\r
501 Sequence seq = (Sequence) sequences.elementAt(i);
\r
502 seq.setSequence( seq.getSequence().replace('.', gc) );
\r
503 seq.setSequence( seq.getSequence().replace('-', gc) );
\r
504 seq.setSequence( seq.getSequence().replace(' ', gc) );
\r
511 * @return DOCUMENT ME!
\r
513 public char getGapCharacter()
\r
515 return gapCharacter;
\r
521 * @return DOCUMENT ME!
\r
523 public Vector getAAFrequency()
\r
525 return AAFrequency.calculate(sequences, 0, getWidth());
\r
531 * @return DOCUMENT ME!
\r
533 public boolean isAligned()
\r
535 int width = getWidth();
\r
537 for (int i = 0; i < sequences.size(); i++)
\r
539 if (getSequenceAt(i).getLength() != width)
\r
551 * @param aa DOCUMENT ME!
\r
553 public void deleteAnnotation(AlignmentAnnotation aa)
\r
557 if (annotations != null)
\r
559 aSize = annotations.length;
\r
562 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
\r
566 for (int i = 0; i < aSize; i++)
\r
568 if (annotations[i] == aa)
\r
573 temp[tIndex] = annotations[i];
\r
577 annotations = temp;
\r
581 public void adjustSequenceAnnotations()
\r
583 if(annotations!=null)
\r
585 for (int a = 0; a < annotations.length; a++)
\r
587 if (annotations[a].sequenceRef != null)
\r
589 annotations[a].adjustForAlignment();
\r
598 * @param aa DOCUMENT ME!
\r
600 public void addAnnotation(AlignmentAnnotation aa)
\r
603 if (annotations != null)
\r
605 aSize = annotations.length + 1;
\r
608 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
\r
610 temp[aSize-1] = aa;
\r
616 for (i = 0; i < (aSize-1); i++)
\r
618 temp[i] = annotations[i];
\r
622 annotations = temp;
\r
625 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
\r
627 if(aa==null || annotations==null || annotations.length-1<index)
\r
630 int aSize = annotations.length;
\r
631 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
\r
635 for (int i = 0; i < aSize; i++)
\r
641 temp[i] = annotations[i];
\r
643 temp[i] = annotations[i-1];
\r
646 annotations = temp;
\r
652 * @return DOCUMENT ME!
\r
654 public AlignmentAnnotation[] getAlignmentAnnotation()
\r
656 return annotations;
\r
659 public void setNucleotide(boolean b)
\r
667 public boolean isNucleotide()
\r
669 if(type==NUCLEOTIDE)
\r
675 public void setDataset(Alignment data)
\r
677 if(dataset==null && data==null)
\r
679 // Create a new dataset for this alignment.
\r
680 // Can only be done once, if dataset is not null
\r
681 // This will not be performed
\r
682 Sequence[] seqs = new Sequence[getHeight()];
\r
683 for (int i = 0; i < getHeight(); i++)
\r
685 if(getSequenceAt(i).getDatasetSequence()!=null)
\r
687 seqs[i] = (Sequence)getSequenceAt(i).getDatasetSequence();
\r
691 seqs[i] = new Sequence(getSequenceAt(i).getName(),
\r
692 AlignSeq.extractGaps(
\r
693 jalview.util.Comparison.GapChars,
\r
694 getSequenceAt(i).getSequence()
\r
696 getSequenceAt(i).getStart(),
\r
697 getSequenceAt(i).getEnd());
\r
699 getSequenceAt(i).setDatasetSequence(seqs[i]);
\r
703 dataset = new Alignment(seqs);
\r
705 else if(dataset==null && data!=null)
\r
711 public Alignment getDataset()
\r
716 public boolean padGaps() {
\r
717 boolean modified=false;
\r
719 //Remove excess gaps from the end of alignment
\r
720 int maxLength = -1;
\r
723 for (int i = 0; i < sequences.size(); i++)
\r
725 current = getSequenceAt(i);
\r
726 for (int j = current.getLength(); j > maxLength; j--)
\r
728 if (j > maxLength && !jalview.util.Comparison.isGap(
\r
729 current.getCharAt(j)))
\r
739 for (int i = 0; i < sequences.size();
\r
742 current = getSequenceAt(i);
\r
744 if (current.getLength() < maxLength)
\r
746 current.insertCharAt(maxLength - 1, gapCharacter);
\r
749 else if(current.getLength() > maxLength)
\r
751 current.deleteChars(maxLength, current.getLength());
\r
757 public HiddenSequences getHiddenSequences()
\r
759 return hiddenSequences;
\r
762 SequenceI [] getVisibleAndRepresentedSeqs()
\r
764 if(hiddenSequences==null || hiddenSequences.getSize()<1)
\r
765 return getSequencesArray();
\r
767 Vector seqs = new Vector();
\r
769 SequenceGroup hidden;
\r
770 for (int i = 0; i < sequences.size(); i++)
\r
772 seq = (SequenceI) sequences.elementAt(i);
\r
773 seqs.addElement(seq);
\r
774 hidden = seq.getHiddenSequences();
\r
777 for(int j=0; j<hidden.getSize(false); j++)
\r
779 seqs.addElement(hidden.getSequenceAt(j));
\r
783 SequenceI [] result = new SequenceI[seqs.size()];
\r
784 for(int i=0; i<seqs.size(); i++)
\r
785 result[i] = (SequenceI)seqs.elementAt(i);
\r